From 53535b4c3830c1cecd3def374c3d8d353e9a46f0 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 28 Feb 2023 08:58:14 +0100 Subject: [PATCH 01/81] Add JProfile acknowledgement for free license --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index d40e5f228f7..36fa2de67bf 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ We love contributors! Please see our [Contributing Guide][] for ways you can hel Dataverse is a trademark of President and Fellows of Harvard College and is registered in the United States. +We thank EJ Technologies for granting us a free open source project license for their Java profiler [JProfiler][]. + [![Dataverse Project logo](src/main/webapp/resources/images/dataverseproject_logo.jpg?raw=true "Dataverse Project")](http://dataverse.org) [![API Test Status](https://jenkins.dataverse.org/buildStatus/icon?job=IQSS-dataverse-develop&subject=API%20Test%20Status)](https://jenkins.dataverse.org/job/IQSS-dataverse-develop/) @@ -38,3 +40,4 @@ Dataverse is a trademark of President and Fellows of Harvard College and is regi [chat.dataverse.org]: http://chat.dataverse.org [Dataverse Community Meeting]: https://dataverse.org/events [open source]: LICENSE.md +[JProfiler]: https://www.ej-technologies.com/products/jprofiler/overview.html From 3a337e9405dee42ece3868c75930f695fb0125fe Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 28 Feb 2023 09:04:58 +0100 Subject: [PATCH 02/81] Add JProfiler acknowledgement to dev guide intro --- doc/sphinx-guides/source/developers/intro.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/sphinx-guides/source/developers/intro.rst b/doc/sphinx-guides/source/developers/intro.rst index 7f4e8c1ba34..4eed89a28ef 100755 --- a/doc/sphinx-guides/source/developers/intro.rst +++ b/doc/sphinx-guides/source/developers/intro.rst @@ -32,6 +32,8 @@ We make use of a variety of Jakarta EE technologies such as JPA, JAX-RS, JMS, an In addition, we start to adopt parts of Eclipse MicroProfile, namely `MicroProfile Config `_. +We thank EJ Technologies for granting us a free open source project license for their Java profiler `JProfiler `_. + Roadmap ------- From fc4cd59dfc1aecc51a319a9034360aa7d7b05166 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 28 Feb 2023 09:12:18 +0100 Subject: [PATCH 03/81] Add JProfiler and acknowledgement to tools page of dev guide --- doc/sphinx-guides/source/developers/tools.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/sphinx-guides/source/developers/tools.rst b/doc/sphinx-guides/source/developers/tools.rst index cbd27d6e8d2..4ef52f25f43 100755 --- a/doc/sphinx-guides/source/developers/tools.rst +++ b/doc/sphinx-guides/source/developers/tools.rst @@ -147,6 +147,14 @@ For example... would be consistent with a file descriptor leak on the dataset page. +JProfiler ++++++++++ + +Tracking down resource drainage, bottlenecks etc gets easier using a profiler. + +We thank EJ Technologies for granting us a free open source project license for their Java profiler +`JProfiler `_. + jmap and jstat ++++++++++++++ From 305739fb73360118997d0514325e109d10c4db53 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 1 Mar 2023 14:30:14 +0100 Subject: [PATCH 04/81] Revert "Add JProfile acknowledgement for free license" This reverts commit 53535b4c3830c1cecd3def374c3d8d353e9a46f0. --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index 36fa2de67bf..d40e5f228f7 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,6 @@ We love contributors! Please see our [Contributing Guide][] for ways you can hel Dataverse is a trademark of President and Fellows of Harvard College and is registered in the United States. -We thank EJ Technologies for granting us a free open source project license for their Java profiler [JProfiler][]. - [![Dataverse Project logo](src/main/webapp/resources/images/dataverseproject_logo.jpg?raw=true "Dataverse Project")](http://dataverse.org) [![API Test Status](https://jenkins.dataverse.org/buildStatus/icon?job=IQSS-dataverse-develop&subject=API%20Test%20Status)](https://jenkins.dataverse.org/job/IQSS-dataverse-develop/) @@ -40,4 +38,3 @@ We thank EJ Technologies for granting us a free open source project license for [chat.dataverse.org]: http://chat.dataverse.org [Dataverse Community Meeting]: https://dataverse.org/events [open source]: LICENSE.md -[JProfiler]: https://www.ej-technologies.com/products/jprofiler/overview.html From e4d12a14ef631beefdc2d7287f8b9ef49227e9df Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 1 Mar 2023 14:30:17 +0100 Subject: [PATCH 05/81] Revert "Add JProfiler acknowledgement to dev guide intro" This reverts commit 3a337e9405dee42ece3868c75930f695fb0125fe. --- doc/sphinx-guides/source/developers/intro.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/sphinx-guides/source/developers/intro.rst b/doc/sphinx-guides/source/developers/intro.rst index 4eed89a28ef..7f4e8c1ba34 100755 --- a/doc/sphinx-guides/source/developers/intro.rst +++ b/doc/sphinx-guides/source/developers/intro.rst @@ -32,8 +32,6 @@ We make use of a variety of Jakarta EE technologies such as JPA, JAX-RS, JMS, an In addition, we start to adopt parts of Eclipse MicroProfile, namely `MicroProfile Config `_. -We thank EJ Technologies for granting us a free open source project license for their Java profiler `JProfiler `_. - Roadmap ------- From 15e200295818ca3c136ac3a0c57411191038083d Mon Sep 17 00:00:00 2001 From: Ludovic DANIEL Date: Wed, 26 Jul 2023 16:42:36 +0200 Subject: [PATCH 06/81] #8655 Re-add value 'cell counting' to Life Science metadatablock's Measurement Type field vocabulary --- scripts/api/data/metadatablocks/biomedical.tsv | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/api/data/metadatablocks/biomedical.tsv b/scripts/api/data/metadatablocks/biomedical.tsv index 28d59130c34..f26a25752e6 100644 --- a/scripts/api/data/metadatablocks/biomedical.tsv +++ b/scripts/api/data/metadatablocks/biomedical.tsv @@ -45,6 +45,7 @@ studyFactorType Treatment Compound EFO_0000369 17 studyFactorType Treatment Type EFO_0000727 18 studyFactorType Other OTHER_FACTOR 19 + studyAssayMeasurementType cell counting ERO_0001899 0 studyAssayMeasurementType cell sorting CHMO_0001085 1 studyAssayMeasurementType clinical chemistry analysis OBI_0000520 2 studyAssayMeasurementType copy number variation profiling OBI_0000537 3 From d9bb9a343de06cf0c849ee728e2d0b6ee57ccae9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20ROUCOU?= Date: Wed, 2 Aug 2023 17:45:46 +0200 Subject: [PATCH 07/81] i18n into MyData page --- .../iq/dataverse/mydata/DataRetrieverAPI.java | 29 +++++++-------- .../dataverse/mydata/MyDataFilterParams.java | 24 ++++++------- .../iq/dataverse/mydata/MyDataFinder.java | 35 ++++++++++--------- src/main/java/propertyFiles/Bundle.properties | 15 ++++++++ .../iq/dataverse/api/DataRetrieverApiIT.java | 20 +++++++++-- 5 files changed, 78 insertions(+), 45 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java index d244021d01a..51e1cb70bc7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java @@ -25,13 +25,10 @@ import edu.harvard.iq.dataverse.search.SearchException; import edu.harvard.iq.dataverse.search.SearchFields; import edu.harvard.iq.dataverse.search.SortBy; -import java.math.BigDecimal; import java.util.Arrays; import java.util.List; import java.util.Map; -import java.util.Random; import java.util.logging.Logger; -import java.util.Locale; import javax.ejb.EJB; import javax.inject.Inject; import javax.json.Json; @@ -41,7 +38,6 @@ import javax.ws.rs.Path; import javax.ws.rs.Produces; import javax.ws.rs.QueryParam; -import javax.ws.rs.DefaultValue; import javax.ws.rs.container.ContainerRequestContext; import javax.ws.rs.core.Context; @@ -272,9 +268,7 @@ public String retrieveMyDataAsJsonString( @QueryParam("dataset_valid") List datasetValidities) { boolean OTHER_USER = false; - String localeCode = session.getLocaleCode(); - String noMsgResultsFound = BundleUtil.getStringFromPropertyFile("dataretrieverAPI.noMsgResultsFound", - "Bundle", new Locale(localeCode)); + String noMsgResultsFound = BundleUtil.getStringFromBundle("dataretrieverAPI.noMsgResultsFound"); if ((session.getUser() != null) && (session.getUser().isAuthenticated())) { authUser = (AuthenticatedUser) session.getUser(); @@ -282,7 +276,10 @@ public String retrieveMyDataAsJsonString( try { authUser = getRequestAuthenticatedUserOrDie(crc); } catch (WrappedResponse e) { - return this.getJSONErrorString("Requires authentication. Please login.", "retrieveMyDataAsJsonString. User not found! Shouldn't be using this anyway"); + return this.getJSONErrorString( + BundleUtil.getStringFromBundle("dataretrieverAPI.authentication.required"), + BundleUtil.getStringFromBundle("dataretrieverAPI.authentication.required.opt") + ); } } @@ -295,7 +292,9 @@ public String retrieveMyDataAsJsonString( authUser = searchUser; OTHER_USER = true; } else { - return this.getJSONErrorString("No user found for: \"" + userIdentifier + "\"", null); + return this.getJSONErrorString( + BundleUtil.getStringFromBundle("dataretrieverAPI.user.not.found", Arrays.asList(userIdentifier)), + null); } } @@ -335,8 +334,7 @@ public String retrieveMyDataAsJsonString( myDataFinder = new MyDataFinder(rolePermissionHelper, roleAssigneeService, dvObjectServiceBean, - groupService, - noMsgResultsFound); + groupService); this.myDataFinder.runFindDataSteps(filterParams); if (myDataFinder.hasError()){ return this.getJSONErrorString(myDataFinder.getErrorMessage(), myDataFinder.getErrorMessage()); @@ -391,11 +389,14 @@ public String retrieveMyDataAsJsonString( } catch (SearchException ex) { solrQueryResponse = null; - this.logger.severe("Solr SearchException: " + ex.getMessage()); + logger.severe("Solr SearchException: " + ex.getMessage()); } - if (solrQueryResponse==null){ - return this.getJSONErrorString("Sorry! There was an error with the search service.", "Sorry! There was a SOLR Error"); + if (solrQueryResponse == null) { + return this.getJSONErrorString( + BundleUtil.getStringFromBundle("dataretrieverAPI.solr.error"), + BundleUtil.getStringFromBundle("dataretrieverAPI.solr.error.opt") + ); } // --------------------------------- diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFilterParams.java b/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFilterParams.java index c378034f951..15d83fe4296 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFilterParams.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFilterParams.java @@ -12,6 +12,7 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.search.SearchConstants; import edu.harvard.iq.dataverse.search.SearchFields; +import edu.harvard.iq.dataverse.util.BundleUtil; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -178,26 +179,25 @@ public List getRoleIds(){ } - - private void checkParams(){ - - if ((this.userIdentifier == null)||(this.userIdentifier.isEmpty())){ - this.addError("Sorry! No user was found!"); + private void checkParams() { + if ((this.userIdentifier == null) || (this.userIdentifier.isEmpty())) { + this.addError(BundleUtil.getStringFromBundle("myDataFilterParams.error.no.user")); return; } - if ((this.roleIds == null)||(this.roleIds.isEmpty())){ - this.addError("No results. Please select at least one Role."); + if ((this.roleIds == null) || (this.roleIds.isEmpty())) { + this.addError(BundleUtil.getStringFromBundle("myDataFilterParams.error.result.no.role")); return; } - if ((this.dvObjectTypes == null)||(this.dvObjectTypes.isEmpty())){ - this.addError("No results. Please select one of Dataverses, Datasets, Files."); + if ((this.dvObjectTypes == null) || (this.dvObjectTypes.isEmpty())) { + this.addError(BundleUtil.getStringFromBundle("myDataFilterParams.error.result.no.dvobject")); return; } - - if ((this.publicationStatuses == null)||(this.publicationStatuses.isEmpty())){ - this.addError("No results. Please select one of " + StringUtils.join(MyDataFilterParams.defaultPublishedStates, ", ") + "."); + + if ((this.publicationStatuses == null) || (this.publicationStatuses.isEmpty())) { + this.addError(BundleUtil.getStringFromBundle("dataretrieverAPI.user.not.found", + Arrays.asList(StringUtils.join(MyDataFilterParams.defaultPublishedStates, ", ")))); return; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFinder.java b/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFinder.java index 6acdfd9cdde..8ed2ef830e5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFinder.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFinder.java @@ -11,7 +11,9 @@ import edu.harvard.iq.dataverse.authorization.DataverseRolePermissionHelper; import edu.harvard.iq.dataverse.authorization.groups.GroupServiceBean; import edu.harvard.iq.dataverse.search.SearchFields; +import edu.harvard.iq.dataverse.util.BundleUtil; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -47,7 +49,6 @@ public class MyDataFinder { private RoleAssigneeServiceBean roleAssigneeService; private DvObjectServiceBean dvObjectServiceBean; private GroupServiceBean groupService; - private String noMsgResultsFound; //private RoleAssigneeServiceBean roleService = new RoleAssigneeServiceBean(); //private MyDataQueryHelperServiceBean myDataQueryHelperService; // -------------------- @@ -86,12 +87,11 @@ public class MyDataFinder { private List fileGrandparentFileIds = new ArrayList<>(); // dataverse has file permissions - public MyDataFinder(DataverseRolePermissionHelper rolePermissionHelper, RoleAssigneeServiceBean roleAssigneeService, DvObjectServiceBean dvObjectServiceBean, GroupServiceBean groupService, String _noMsgResultsFound) { + public MyDataFinder(DataverseRolePermissionHelper rolePermissionHelper, RoleAssigneeServiceBean roleAssigneeService, DvObjectServiceBean dvObjectServiceBean, GroupServiceBean groupService) { this.rolePermissionHelper = rolePermissionHelper; this.roleAssigneeService = roleAssigneeService; this.dvObjectServiceBean = dvObjectServiceBean; this.groupService = groupService; - this.noMsgResultsFound = _noMsgResultsFound; this.loadHarvestedDataverseIds(); } @@ -213,7 +213,7 @@ private List getSolrFilterQueries(boolean totalCountsOnly){ // ----------------------------------------------------------------- String dvObjectFQ = this.getSolrDvObjectFilterQuery(); if (dvObjectFQ ==null){ - this.addErrorMessage(noMsgResultsFound); + this.addErrorMessage(BundleUtil.getStringFromBundle("myDataFinder.error.result.empty")); return null; } filterQueries.add(dvObjectFQ); @@ -286,7 +286,7 @@ public String getSolrDvObjectFilterQuery(){ if ((distinctEntityIds.isEmpty()) && (distinctParentIds.isEmpty())) { - this.addErrorMessage(noMsgResultsFound); + this.addErrorMessage(BundleUtil.getStringFromBundle("myDataFinder.error.result.empty")); return null; } @@ -430,24 +430,25 @@ public JsonArrayBuilder getListofSelectedRoles(){ } - private boolean runStep1RoleAssignments(){ + private boolean runStep1RoleAssignments() { List results = this.roleAssigneeService.getAssigneeAndRoleIdListFor(filterParams); //logger.info("runStep1RoleAssignments results: " + results.toString()); - if (results == null){ - this.addErrorMessage("Sorry, the EntityManager isn't working (still)."); + if (results == null) { + this.addErrorMessage(BundleUtil.getStringFromBundle("myDataFinder.error.result.null")); return false; - }else if (results.isEmpty()){ + } else if (results.isEmpty()) { List roleNames = this.rolePermissionHelper.getRoleNamesByIdList(this.filterParams.getRoleIds()); - if ((roleNames == null)||(roleNames.isEmpty())){ - this.addErrorMessage("Sorry, you have no assigned roles."); - }else{ - if (roleNames.size()==1){ - this.addErrorMessage("Sorry, nothing was found for this role: " + StringUtils.join(roleNames, ", ")); - }else{ - this.addErrorMessage("Sorry, nothing was found for these roles: " + StringUtils.join(roleNames, ", ")); + if ((roleNames == null) || (roleNames.isEmpty())) { + this.addErrorMessage(BundleUtil.getStringFromBundle("myDataFinder.error.result.no.role")); + } else { + final List args = Arrays.asList(StringUtils.join(roleNames, ", ")); + if (roleNames.size() == 1) { + this.addErrorMessage(BundleUtil.getStringFromBundle("myDataFinder.error.result.role.empty", args)); + } else { + this.addErrorMessage(BundleUtil.getStringFromBundle("myDataFinder.error.result.roles.empty", args)); } } return false; @@ -497,7 +498,7 @@ private boolean runStep2DirectAssignments(){ List results = this.dvObjectServiceBean.getDvObjectInfoForMyData(directDvObjectIds); //List results = this.roleAssigneeService.getAssignmentsFor(this.userIdentifier); if (results.isEmpty()){ - this.addErrorMessage("Sorry, you have no assigned Dataverses, Datasets, or Files."); + this.addErrorMessage(BundleUtil.getStringFromBundle("myDataFinder.error.result.no.dvobject")); return false; } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 64a1c626f13..a01d175d903 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2781,7 +2781,22 @@ passwdVal.passwdReq.lowercase=lowercase passwdVal.passwdReq.letter=letter passwdVal.passwdReq.numeral=numeral passwdVal.passwdReq.special=special +#mydata API (DataRetriverAPI.java and MyDataFinder.java) dataretrieverAPI.noMsgResultsFound=Sorry, no results were found. +dataretrieverAPI.authentication.required=Requires authentication. Please login. +dataretrieverAPI.authentication.required.opt=retrieveMyDataAsJsonString. User not found! Shouldn't be using this anyway. +dataretrieverAPI.user.not.found=No user found for: "{0}" +dataretrieverAPI.solr.error=Sorry! There was an error with the search service. +dataretrieverAPI.solr.error.opt=Sorry! There was a SOLR Error. +myDataFilterParams.error.no.user=Sorry! No user was found! +myDataFilterParams.error.result.no.role=No results. Please select at least one Role. +myDataFilterParams.error.result.no.dvobject=No results. Please select one of Dataverses, Datasets, Files. +myDataFilterParams.error.result.no.publicationStatus=No results. Please select one of {0}. +myDataFinder.error.result.null=Sorry, the EntityManager isn't working (still). +myDataFinder.error.result.no.role=Sorry, you have no assigned roles. +myDataFinder.error.result.role.empty=Sorry, nothing was found for this role: {0} +myDataFinder.error.result.roles.empty=Sorry, nothing was found for these roles: {0} +myDataFinder.error.result.no.dvobject=Sorry, you have no assigned Dataverses, Datasets, or Files. #xlsxfilereader.java xlsxfilereader.ioexception.parse=Could not parse Excel/XLSX spreadsheet. {0} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java index 69d5db08744..fd0952fd336 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java @@ -3,10 +3,14 @@ import com.jayway.restassured.RestAssured; import com.jayway.restassured.response.Response; import edu.harvard.iq.dataverse.api.auth.ApiKeyAuthMechanism; +import edu.harvard.iq.dataverse.util.BundleUtil; + import org.junit.BeforeClass; import org.junit.Test; import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import static javax.ws.rs.core.Response.Status.OK; import static javax.ws.rs.core.Response.Status.UNAUTHORIZED; @@ -15,6 +19,8 @@ public class DataRetrieverApiIT { + private static final String ERR_MSG_FORMAT = "{\"success\":false,\"error_message\":\"%s\"}"; + @BeforeClass public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); @@ -35,14 +41,24 @@ public void testRetrieveMyDataAsJsonString() { String badUserIdentifier = "bad-identifier"; Response invalidUserIdentifierResponse = UtilIT.retrieveMyDataAsJsonString(superUserApiToken, badUserIdentifier, emptyRoleIdsList); - assertEquals("{\"success\":false,\"error_message\":\"No user found for: \\\"" + badUserIdentifier + "\\\"\"}", invalidUserIdentifierResponse.prettyPrint()); + assertEquals(prettyPrintError("dataretrieverAPI.user.not.found", Arrays.asList(badUserIdentifier)), invalidUserIdentifierResponse.prettyPrint()); assertEquals(OK.getStatusCode(), invalidUserIdentifierResponse.getStatusCode()); // Call as superuser with valid user identifier Response createSecondUserResponse = UtilIT.createRandomUser(); String userIdentifier = UtilIT.getUsernameFromResponse(createSecondUserResponse); Response validUserIdentifierResponse = UtilIT.retrieveMyDataAsJsonString(superUserApiToken, userIdentifier, emptyRoleIdsList); - assertEquals("{\"success\":false,\"error_message\":\"Sorry, you have no assigned roles.\"}", validUserIdentifierResponse.prettyPrint()); + assertEquals(prettyPrintError("myDataFinder.error.result.no.role", null), validUserIdentifierResponse.prettyPrint()); assertEquals(OK.getStatusCode(), validUserIdentifierResponse.getStatusCode()); } + + private static String prettyPrintError(String resourceBundleKey, List params) { + final String errorMessage; + if (params == null || params.isEmpty()) { + errorMessage = BundleUtil.getStringFromBundle(resourceBundleKey); + } else { + errorMessage = BundleUtil.getStringFromBundle(resourceBundleKey, params); + } + return String.format(ERR_MSG_FORMAT, errorMessage.replaceAll("\"", "\\\\\"")); + } } From 697f081c7dc896344199818bf0ea09e4cc30558d Mon Sep 17 00:00:00 2001 From: Steven Ferey Date: Wed, 23 Aug 2023 16:39:44 +0200 Subject: [PATCH 08/81] Update src/main/java/propertyFiles/Bundle.properties Co-authored-by: Philip Durbin --- src/main/java/propertyFiles/Bundle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index b9651d0c36e..892dbd361da 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2800,7 +2800,7 @@ dataretrieverAPI.authentication.required=Requires authentication. Please login. dataretrieverAPI.authentication.required.opt=retrieveMyDataAsJsonString. User not found! Shouldn't be using this anyway. dataretrieverAPI.user.not.found=No user found for: "{0}" dataretrieverAPI.solr.error=Sorry! There was an error with the search service. -dataretrieverAPI.solr.error.opt=Sorry! There was a SOLR Error. +dataretrieverAPI.solr.error.opt=Sorry! There was a Solr Error. myDataFilterParams.error.no.user=Sorry! No user was found! myDataFilterParams.error.result.no.role=No results. Please select at least one Role. myDataFilterParams.error.result.no.dvobject=No results. Please select one of Dataverses, Datasets, Files. From f146f08d5cbea067572eb25350b10dcca4c0ee8d Mon Sep 17 00:00:00 2001 From: sferey Date: Wed, 23 Aug 2023 17:01:03 +0200 Subject: [PATCH 09/81] changed translation myDataFinder.error.result.null --- src/main/java/propertyFiles/Bundle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 892dbd361da..bdebc4a20f0 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2805,7 +2805,7 @@ myDataFilterParams.error.no.user=Sorry! No user was found! myDataFilterParams.error.result.no.role=No results. Please select at least one Role. myDataFilterParams.error.result.no.dvobject=No results. Please select one of Dataverses, Datasets, Files. myDataFilterParams.error.result.no.publicationStatus=No results. Please select one of {0}. -myDataFinder.error.result.null=Sorry, the EntityManager isn't working (still). +myDataFinder.error.result.null=Sorry, the authenticated user ID could not be retrieved. myDataFinder.error.result.no.role=Sorry, you have no assigned roles. myDataFinder.error.result.role.empty=Sorry, nothing was found for this role: {0} myDataFinder.error.result.roles.empty=Sorry, nothing was found for these roles: {0} From fbf1f9cbd05e18031170f206b3352c9a424cab16 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 16 Oct 2023 15:36:27 +0200 Subject: [PATCH 10/81] RO-Crate metadata mime type detection --- .../edu/harvard/iq/dataverse/util/FileUtil.java | 15 ++++++++++++++- .../MimeTypeDetectionByFileName.properties | 2 ++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 1ad389fb0e2..a7c3488e57f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -86,6 +86,7 @@ import java.util.HashMap; import java.util.List; import java.util.Optional; +import java.util.ResourceBundle; import java.util.UUID; import java.util.logging.Level; import java.util.logging.Logger; @@ -182,6 +183,7 @@ public class FileUtil implements java.io.Serializable { public static final String MIME_TYPE_NETCDF = "application/netcdf"; public static final String MIME_TYPE_XNETCDF = "application/x-netcdf"; public static final String MIME_TYPE_HDF5 = "application/x-hdf5"; + public static final String MIME_TYPE_RO_CRATE = "application/ld+json; profile=\"http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate\""; // File type "thumbnail classes" tags: @@ -421,6 +423,11 @@ public static String retestIngestableFileType(File file, String fileType) { } public static String determineFileType(File f, String fileName) throws IOException{ + final ResourceBundle bundle = BundleUtil.getResourceBundle("MimeTypeDetectionByFileName"); + if (bundle.keySet().contains(fileName)) { + return bundle.getString(fileName); + } + String fileType = null; String fileExtension = getFileExtension(fileName); @@ -545,6 +552,11 @@ public static String determineFileType(File f, String fileName) throws IOExcepti } public static String determineFileTypeByNameAndExtension(String fileName) { + final ResourceBundle bundle = BundleUtil.getResourceBundle("MimeTypeDetectionByFileName"); + if (bundle.keySet().contains(fileName)) { + return bundle.getString(fileName); + } + String mimetypesFileTypeMapResult = MIME_TYPE_MAP.getContentType(fileName); logger.fine("MimetypesFileTypeMap type by extension, for " + fileName + ": " + mimetypesFileTypeMapResult); if (mimetypesFileTypeMapResult != null) { @@ -825,7 +837,8 @@ public static boolean useRecognizedType(String suppliedContentType, String recog || canIngestAsTabular(recognizedType) || recognizedType.equals("application/fits-gzipped") || recognizedType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE) || recognizedType.equalsIgnoreCase(BagItFileHandler.FILE_TYPE) - || recognizedType.equals(MIME_TYPE_ZIP)) { + || recognizedType.equals(MIME_TYPE_ZIP) + || recognizedType.equals(MIME_TYPE_RO_CRATE)) { return true; } return false; diff --git a/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties b/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties index 70b0c4e371e..5c1a22bfd5f 100644 --- a/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties +++ b/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties @@ -2,3 +2,5 @@ Makefile=text/x-makefile Snakemake=text/x-snakemake Dockerfile=application/x-docker-file Vagrantfile=application/x-vagrant-file +ro-crate-metadata.json=application/ld+json; profile="http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate" +ro-crate-metadata.jsonld=application/ld+json; profile="http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate" From fbc6a54ba067e24f135d11cc7f66a950838c45a0 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 16 Oct 2023 19:34:46 +0200 Subject: [PATCH 11/81] fixed display and facet string for mime types with profile + test + reslease notes --- .../10015-RO-Crate-metadata-file.md | 10 +++++++ .../harvard/iq/dataverse/util/FileUtil.java | 10 +++++++ .../propertyFiles/MimeTypeDisplay.properties | 1 + .../propertyFiles/MimeTypeFacets.properties | 1 + .../iq/dataverse/util/FileUtilTest.java | 29 +++++++++++++++++++ .../resources/fileutil/ro-crate-metadata.json | 1 + 6 files changed, 52 insertions(+) create mode 100644 doc/release-notes/10015-RO-Crate-metadata-file.md create mode 100644 src/test/resources/fileutil/ro-crate-metadata.json diff --git a/doc/release-notes/10015-RO-Crate-metadata-file.md b/doc/release-notes/10015-RO-Crate-metadata-file.md new file mode 100644 index 00000000000..4b018a634f7 --- /dev/null +++ b/doc/release-notes/10015-RO-Crate-metadata-file.md @@ -0,0 +1,10 @@ +Detection of mime-types based on a filename with extension and detection of the RO-Crate metadata files. + +From now on, filenames with extensions can be added into `MimeTypeDetectionByFileName.properties` file. Filenames added there will take precedence over simply recognizing files by extensions. For example, two new filenames are added into that file: +``` +ro-crate-metadata.json=application/ld+json; profile="http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate" +ro-crate-metadata.jsonld=application/ld+json; profile="http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate" +``` + +Therefore, files named `ro-crate-metadata.json` will be then detected as RO-Crated metadata files from now on, instead as generic `JSON` files. +For more information on the RO-Crate specifications, see https://www.researchobject.org/ro-crate diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index a7c3488e57f..baff17af601 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -280,6 +280,11 @@ public static String getUserFriendlyFileType(DataFile dataFile) { if (fileType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)){ return ShapefileHandler.SHAPEFILE_FILE_TYPE_FRIENDLY_NAME; } + try { + return BundleUtil.getStringFromPropertyFile(fileType,"MimeTypeDisplay" ); + } catch (MissingResourceException e) { + //NOOP: we will try again after trimming ";" + } if (fileType.contains(";")) { fileType = fileType.substring(0, fileType.indexOf(";")); } @@ -294,6 +299,11 @@ public static String getUserFriendlyFileType(DataFile dataFile) { } public static String getIndexableFacetFileType(DataFile dataFile) { + try { + return BundleUtil.getStringFromDefaultPropertyFile(dataFile.getContentType(),"MimeTypeFacets" ); + } catch (MissingResourceException e) { + //NOOP: we will try again after trimming ";" + } String fileType = getFileType(dataFile); try { return BundleUtil.getStringFromDefaultPropertyFile(fileType,"MimeTypeFacets" ); diff --git a/src/main/java/propertyFiles/MimeTypeDisplay.properties b/src/main/java/propertyFiles/MimeTypeDisplay.properties index 295ac226fa1..8486a113116 100644 --- a/src/main/java/propertyFiles/MimeTypeDisplay.properties +++ b/src/main/java/propertyFiles/MimeTypeDisplay.properties @@ -222,5 +222,6 @@ text/xml-graphml=GraphML Network Data application/octet-stream=Unknown application/x-docker-file=Docker Image File application/x-vagrant-file=Vagrant Image File +application/ld+json;\u0020profile\u003d\u0022http\u003a//www.w3.org/ns/json-ld#flattened\u0020http\u003a//www.w3.org/ns/json-ld#compacted\u0020https\u003a//w3id.org/ro/crate\u0022=RO-Crate metadata # Dataverse-specific application/vnd.dataverse.file-package=Dataverse Package diff --git a/src/main/java/propertyFiles/MimeTypeFacets.properties b/src/main/java/propertyFiles/MimeTypeFacets.properties index aaab66f20ae..831c509b860 100644 --- a/src/main/java/propertyFiles/MimeTypeFacets.properties +++ b/src/main/java/propertyFiles/MimeTypeFacets.properties @@ -224,5 +224,6 @@ video/webm=Video text/xml-graphml=Network Data # Other application/octet-stream=Unknown +application/ld+json;\u0020profile\u003d\u0022http\u003a//www.w3.org/ns/json-ld#flattened\u0020http\u003a//www.w3.org/ns/json-ld#compacted\u0020https\u003a//w3id.org/ro/crate\u0022=Metadata # Dataverse-specific application/vnd.dataverse.file-package=Data diff --git a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java index 2cfe9f25d7e..396d613e768 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java @@ -371,4 +371,33 @@ public void testHdf4File() throws IOException { assertEquals("application/octet-stream", contentType); } + @Test + public void testDetermineFileTypeROCrate() { + final String roCrateContentType = "application/ld+json; profile=\"http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate\""; + final DataFile rocrate = new DataFile(roCrateContentType); + + assertEquals(roCrateContentType, rocrate.getContentType()); + assertEquals("RO-Crate metadata", FileUtil.getUserFriendlyFileType(rocrate)); + assertEquals("Metadata", FileUtil.getIndexableFacetFileType(rocrate)); + + final File roCrateFile = new File("src/test/resources/fileutil/ro-crate-metadata.json"); + if (roCrateFile.exists()) { + try { + assertEquals(roCrateContentType, FileUtil.determineFileType(roCrateFile, "ro-crate-metadata.json")); + } catch (IOException ex) { + Logger.getLogger(FileUtilTest.class.getName()).log(Level.SEVERE, null, ex); + } + } else { + fail("File does not exist: " + roCrateFile.toPath().toString()); + } + + // test ";" removal + final String dockerFileWithProfile = "application/x-docker-file; profile=\"http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate\""; + final DataFile dockerDataFile = new DataFile(dockerFileWithProfile); + + assertEquals(dockerFileWithProfile, dockerDataFile.getContentType()); + assertEquals("Docker Image File", FileUtil.getUserFriendlyFileType(dockerDataFile)); + assertEquals("Code", FileUtil.getIndexableFacetFileType(dockerDataFile)); + } + } diff --git a/src/test/resources/fileutil/ro-crate-metadata.json b/src/test/resources/fileutil/ro-crate-metadata.json new file mode 100644 index 00000000000..5ac4b24ad2b --- /dev/null +++ b/src/test/resources/fileutil/ro-crate-metadata.json @@ -0,0 +1 @@ +{"@context":["https://w3id.org/ro/crate/1.1/context",{"metadataContext":"https://language-archives.services/metadata-definitions#metadataContext","orthographicNotes":"https://language-archives.services/metadata-definitions#orthographicNotes","media":"https://language-archives.services/metadata-definitions#media","comments":"https://language-archives.services/metadata-definitions#comments","private":"https://language-archives.services/metadata-definitions#private","depositFormReceived":"https://language-archives.services/metadata-definitions#depositFormReceived","subjectLanguages":"https://language-archives.services/metadata-definitions#subjectLanguages","originatedOn":"https://language-archives.services/metadata-definitions#originatedOn","languageAsGiven":"https://language-archives.services/metadata-definitions#languageAsGiven","contentLanguages":"https://language-archives.services/metadata-definitions#contentLanguages","dialect":"https://language-archives.services/metadata-definitions#dialect","discourseType":"https://language-archives.services/metadata-definitions#discourseType","bornDigital":"https://language-archives.services/metadata-definitions#bornDigital","accessNarrative":"https://language-archives.services/metadata-definitions#accessNarrative","receivedOn":"https://language-archives.services/metadata-definitions#receivedOn","digitisedOn":"https://language-archives.services/metadata-definitions#digitisedOn","ingestNotes":"https://language-archives.services/metadata-definitions#ingestNotes","adminComment":"https://language-archives.services/metadata-definitions#adminComment","external":"https://language-archives.services/metadata-definitions#external","dataType":"https://language-archives.services/metadata-definitions#dataType","citeAs":"https://language-archives.services/metadata-definitions#citeAs","originatedOnNarrative":"https://language-archives.services/metadata-definitions#originatedOnNarrative","countries":"https://language-archives.services/metadata-definitions#countries","tapesReturned":"https://language-archives.services/metadata-definitions#tapesReturned","originalMedia":"https://language-archives.services/metadata-definitions#originalMedia","metadataExportable":"https://language-archives.services/metadata-definitions#metadataExportable","metadataImportedOn":"https://language-archives.services/metadata-definitions#metadataImportedOn","metadataExportedOn":"https://language-archives.services/metadata-definitions#metadataExportedOn","tracking":"https://language-archives.services/metadata-definitions#tracking","fieldsOfResearch":"https://language-archives.services/metadata-definitions#fieldsOfResearch","role":"https://language-archives.services/metadata-definitions#role","doi":"https://language-archives.services/metadata-definitions#doi","sampleRate":"https://language-archives.services/metadata-definitions#sampleRate","channels":"https://language-archives.services/metadata-definitions#channels","fps":"https://language-archives.services/metadata-definitions#fps","essenceId":"https://language-archives.services/metadata-definitions#essenceId"}],"@graph":[{"@id":"#Erakor village","@type":"Place","name":"Erakor village"},{"@id":"#country_Australia","@type":"Country","name":"Australia"},{"@id":"#country_Vanuatu","@type":"Country","code":"VU","name":"Vanuatu"},{"@id":"#country_null","@type":"Country"},{"@id":"#geo_166.427,-22.283,166.467,-22.241","@type":"GeoShape","box":"166.427,-22.283 166.467,-22.241"},{"@id":"#geo_168.159,-17.83,168.594,-17.585","@type":"GeoShape","box":"168.159,-17.83 168.594,-17.585"},{"@id":"#geo_168.217,-17.8235,168.317,-17.7235","@type":"GeoShape","box":"168.217,-17.8235 168.317,-17.7235"},{"@id":"#identifier_collectionId","@type":"PropertyValue","name":"collectionIdentifier","value":"NT1"},{"@id":"#identifier_doi","@type":"PropertyValue","name":"doi","value":"10.4225/72/56F94A61DA9EC"},{"@id":"#identifier_domain","@type":"PropertyValue","name":"domain","value":"paradisec.org.au"},{"@id":"#identifier_hashid","@type":"PropertyValue","name":"hashId","value":"72b3dc1401c8ff06aacba0990a128fc113cf9ad5275f494b05c1142177356561bd7f4c0e8800bade2cbbbed75f6d9d019894735ad7e40762684d243a442d658a"},{"@id":"#identifier_id","@type":"PropertyValue","name":"id","value":"/paradisec.org.au/NT1/98007"},{"@id":"#identifier_itemId","@type":"PropertyValue","name":"itemIdentifier","value":"98007"},{"@id":"#language_bis","@type":"Language","code":"bis","location":{"@id":"#geo_166.427,-22.283,166.467,-22.241"},"name":"Bislama"},{"@id":"#language_erk","@type":"Language","code":"erk","location":{"@id":"#geo_168.159,-17.83,168.594,-17.585"},"name":"Efate, South"},{"@id":"#place_geo_168.217,-17.8235,168.317,-17.7235","@type":"Place","geo":{"@id":"#geo_168.217,-17.8235,168.317,-17.7235"}},{"@id":"./","@type":["Dataset","RepositoryObject"],"additionalType":"item","contentLocation":[{"@id":"#Erakor village"},{"@id":"#place_geo_168.217,-17.8235,168.317,-17.7235"}],"contributor":[{"@id":"http://nla.gov.au/nla.party-479603"},{"@id":"Kalsarap Namaf"},{"@id":"Iokopeth null"},{"@id":"John Maklen"},{"@id":"Waia Tenene"}],"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2018-05-17T04:13:04.000Z","description":"NT1-98007. Text #047 (speaker is John Maklen. Text title: History of villages before Erakor); Text #048 (speaker is John Maklen. Text title: Mantu the flying fox and Erromango); Text #049. Text title: Asaraf (speaker is John Maklen);Text #050. Text title: Mumu and Kotkot (speaker is John Maklen); Text #051. Text title: Natopu ni Erakor—the spirit who lives at Erakor (speaker is John Maklen);Text #038. Text title: The need for respect (speaker is Iokopeth) Stories can be seen at NT8-TEXT. There are time-aligned transcripts of this item and handwritten transcripts by Manuel Wayane scanned as jpg files.","hasPart":[{"@id":"NT1-98007-001.jpg"},{"@id":"NT1-98007-002.jpg"},{"@id":"NT1-98007-003.jpg"},{"@id":"NT1-98007-004.jpg"},{"@id":"NT1-98007-005.jpg"},{"@id":"NT1-98007-006.jpg"},{"@id":"NT1-98007-007.jpg"},{"@id":"NT1-98007-008.jpg"},{"@id":"NT1-98007-009.jpg"},{"@id":"NT1-98007-010.jpg"},{"@id":"NT1-98007-011.jpg"},{"@id":"NT1-98007-012.jpg"},{"@id":"NT1-98007-013.jpg"},{"@id":"NT1-98007-014.jpg"},{"@id":"NT1-98007-015.jpg"},{"@id":"NT1-98007-016.jpg"},{"@id":"NT1-98007-017.jpg"},{"@id":"NT1-98007-018.jpg"},{"@id":"NT1-98007-019.jpg"},{"@id":"NT1-98007-020.jpg"},{"@id":"NT1-98007-021.jpg"},{"@id":"NT1-98007-022.jpg"},{"@id":"NT1-98007-023.jpg"},{"@id":"NT1-98007-024.jpg"},{"@id":"NT1-98007-025.jpg"},{"@id":"NT1-98007-026.jpg"},{"@id":"NT1-98007-027.jpg"},{"@id":"NT1-98007-028.jpg"},{"@id":"NT1-98007-029.jpg"},{"@id":"NT1-98007-030.jpg"},{"@id":"NT1-98007-031.jpg"},{"@id":"NT1-98007-98007A.mp3"},{"@id":"NT1-98007-98007A.wav"},{"@id":"NT1-98007-98007B.mp3"},{"@id":"NT1-98007-98007B.wav"},{"@id":"NT1-98007-98007az.xml"},{"@id":"NT1-98007-A.tab"},{"@id":"NT1-98007-A.xml"},{"@id":"NT1-98007-B.tab"},{"@id":"NT1-98007-B.xml"},{"@id":"NT1-98007-98007A.ixt"},{"@id":"NT1-98007-98007A.trs"},{"@id":"NT1-98007-98007A.flextext"},{"@id":"NT1-98007-98007A.eaf"},{"@id":"NT1-98007-98007B.eaf"}],"identifier":[{"@id":"#identifier_domain"},{"@id":"#identifier_id"},{"@id":"#identifier_hashid"},{"@id":"#identifier_itemId"},{"@id":"#identifier_collectionId"},{"@id":"#identifier_doi"}],"license":{"@id":"_:b0"},"memberOf":{"@id":"/paradisec.org.au/NT1"},"name":"Recordings in South Efate","publisher":{"@id":"http://nla.gov.au/nla.party-593909"},"bornDigital":0,"contentLanguages":[{"@id":"#language_bis"},{"@id":"#language_erk"}],"countries":{"@id":"#country_Vanuatu"},"digitisedOn":"Sun Dec 31 2000 13:00:00 GMT+0000 (Coordinated Universal Time)","external":0,"languageAsGiven":"Nafsan","metadataExportable":1,"originalMedia":"audiocassette","originatedOn":"1998-10-03","private":0,"subjectLanguages":{"@id":"#language_erk"},"tapesReturned":0},{"@id":"Iokopeth null","@type":"Person","givenName":"Iokopeth","homeLocation":{"@id":"#country_null"},"name":"Iokopeth","role":{"@id":"role_speaker"}},{"@id":"John Maklen","@type":"Person","familyName":"Maklen","givenName":"John","homeLocation":{"@id":"#country_null"},"name":"John Maklen","role":{"@id":"role_speaker"}},{"@id":"Kalsarap Namaf","@type":"Person","familyName":"Namaf","givenName":"Kalsarap","homeLocation":{"@id":"#country_null"},"name":"Kalsarap Namaf","role":{"@id":"role_speaker"}},{"@id":"NT1-98007-001.jpg","@type":"File","contentSize":1658368,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:31.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-001.jpg","doi":"10.4225/72/575C8A369D680","essenceId":1010709},{"@id":"NT1-98007-002.jpg","@type":"File","contentSize":1816576,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:37.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-002.jpg","doi":"10.4225/72/575C8A3C15C98","essenceId":1010710},{"@id":"NT1-98007-003.jpg","@type":"File","contentSize":1811968,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:43.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-003.jpg","doi":"10.4225/72/575C8A41DD973","essenceId":1010711},{"@id":"NT1-98007-004.jpg","@type":"File","contentSize":1827840,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:48.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-004.jpg","doi":"10.4225/72/575C8A4767685","essenceId":1010712},{"@id":"NT1-98007-005.jpg","@type":"File","contentSize":1853440,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:54.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-005.jpg","doi":"10.4225/72/575C8A4CD8971","essenceId":1010713},{"@id":"NT1-98007-006.jpg","@type":"File","contentSize":1796608,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:59.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-006.jpg","doi":"10.4225/72/575C8A525C618","essenceId":1010714},{"@id":"NT1-98007-007.jpg","@type":"File","contentSize":1780224,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:05.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-007.jpg","doi":"10.4225/72/575C8A5810189","essenceId":1010715},{"@id":"NT1-98007-008.jpg","@type":"File","contentSize":1737728,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:11.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-008.jpg","doi":"10.4225/72/575C8A5DB1113","essenceId":1010716},{"@id":"NT1-98007-009.jpg","@type":"File","contentSize":1781760,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:16.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-009.jpg","doi":"10.4225/72/575C8A63479C1","essenceId":1010717},{"@id":"NT1-98007-010.jpg","@type":"File","contentSize":1797632,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:22.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-010.jpg","doi":"10.4225/72/575C8A68B23D2","essenceId":1010718},{"@id":"NT1-98007-011.jpg","@type":"File","contentSize":1800704,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:28.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-011.jpg","doi":"10.4225/72/575C8A6E73D01","essenceId":1010719},{"@id":"NT1-98007-012.jpg","@type":"File","contentSize":1822720,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:33.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-012.jpg","doi":"10.4225/72/575C8A742DE00","essenceId":1010720},{"@id":"NT1-98007-013.jpg","@type":"File","contentSize":1809920,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:39.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-013.jpg","doi":"10.4225/72/575C8A79B1B0F","essenceId":1010721},{"@id":"NT1-98007-014.jpg","@type":"File","contentSize":1821696,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:44.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-014.jpg","doi":"10.4225/72/575C8A7F3F253","essenceId":1010722},{"@id":"NT1-98007-015.jpg","@type":"File","contentSize":1626624,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:50.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-015.jpg","doi":"10.4225/72/575C8A84C0022","essenceId":1010723},{"@id":"NT1-98007-016.jpg","@type":"File","contentSize":1633792,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:56.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-016.jpg","doi":"10.4225/72/575C8A8A9A944","essenceId":1010724},{"@id":"NT1-98007-017.jpg","@type":"File","contentSize":1870336,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:01.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-017.jpg","doi":"10.4225/72/575C8A90506E6","essenceId":1010725},{"@id":"NT1-98007-018.jpg","@type":"File","contentSize":1858560,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:07.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-018.jpg","doi":"10.4225/72/575C8A95DF462","essenceId":1010726},{"@id":"NT1-98007-019.jpg","@type":"File","contentSize":1852416,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:12.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-019.jpg","doi":"10.4225/72/575C8A9B56F2F","essenceId":1010727},{"@id":"NT1-98007-020.jpg","@type":"File","contentSize":1838080,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:18.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-020.jpg","doi":"10.4225/72/575C8AA0F09B8","essenceId":1010728},{"@id":"NT1-98007-021.jpg","@type":"File","contentSize":1861120,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:24.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-021.jpg","doi":"10.4225/72/575C8AA6B23AF","essenceId":1010729},{"@id":"NT1-98007-022.jpg","@type":"File","contentSize":1835008,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:29.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-022.jpg","doi":"10.4225/72/575C8AAC3A545","essenceId":1010730},{"@id":"NT1-98007-023.jpg","@type":"File","contentSize":1827328,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:35.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-023.jpg","doi":"10.4225/72/575C8AB1C991E","essenceId":1010731},{"@id":"NT1-98007-024.jpg","@type":"File","contentSize":1805312,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:40.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-024.jpg","doi":"10.4225/72/575C8AB74847D","essenceId":1010732},{"@id":"NT1-98007-025.jpg","@type":"File","contentSize":1912832,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:46.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-025.jpg","doi":"10.4225/72/575C8ABCD2B44","essenceId":1010733},{"@id":"NT1-98007-026.jpg","@type":"File","contentSize":1889792,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:51.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-026.jpg","doi":"10.4225/72/575C8AC26D69E","essenceId":1010734},{"@id":"NT1-98007-027.jpg","@type":"File","contentSize":1878528,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:57.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-027.jpg","doi":"10.4225/72/575C8AC7F3886","essenceId":1010735},{"@id":"NT1-98007-028.jpg","@type":"File","contentSize":1868288,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:02.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-028.jpg","doi":"10.4225/72/575C8ACD72196","essenceId":1010736},{"@id":"NT1-98007-029.jpg","@type":"File","contentSize":1859584,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:08.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-029.jpg","doi":"10.4225/72/575C8AD2E8E82","essenceId":1010737},{"@id":"NT1-98007-030.jpg","@type":"File","contentSize":1859072,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:13.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-030.jpg","doi":"10.4225/72/575C8AD8775E6","essenceId":1010738},{"@id":"NT1-98007-031.jpg","@type":"File","contentSize":1708544,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:19.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-031.jpg","doi":"10.4225/72/575C8ADDE64B8","essenceId":1010739},{"@id":"NT1-98007-98007A.eaf","@type":"File","contentSize":165674,"dateCreated":"2016-08-01T05:00:06.000Z","dateModified":"2016-08-01T16:01:41.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007A.eaf","doi":"10.4225/72/579F725FDD059","essenceId":1100487},{"@id":"NT1-98007-98007A.flextext","@type":"File","contentSize":141244,"dateCreated":"2016-05-20T04:00:06.000Z","dateModified":"2016-06-24T12:41:36.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007A.flextext","doi":"10.4225/72/576D2A7B75614","essenceId":1086277},{"@id":"NT1-98007-98007A.ixt","@type":"File","contentSize":40299,"dateCreated":"2016-04-18T07:00:07.000Z","dateModified":"2016-06-24T08:25:06.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007A.ixt","doi":"10.4225/72/576CEE5CED1FB","essenceId":1085095},{"@id":"NT1-98007-98007A.mp3","@type":"File","bitrate":128009,"contentSize":43667584,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2019-11-29T09:17:34.000Z","duration":2729.02,"encodingFormat":"audio/mpeg","name":"NT1-98007-98007A.mp3","channels":2,"doi":"10.4225/72/575C8AE370B93","essenceId":1010740,"sampleRate":44100},{"@id":"NT1-98007-98007A.trs","@type":"File","contentSize":28292,"dateCreated":"2016-04-26T10:00:06.000Z","dateModified":"2016-06-24T08:45:27.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007A.trs","doi":"10.4225/72/576CF32101764","essenceId":1085293},{"@id":"NT1-98007-98007A.wav","@type":"File","bitrate":4608000,"contentSize":1571894006,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2019-11-29T09:16:52.000Z","duration":2728.98,"encodingFormat":"audio/x-wav","name":"NT1-98007-98007A.wav","channels":2,"doi":"10.4225/72/575C8AE8E6E6B","essenceId":1010741,"sampleRate":96000},{"@id":"NT1-98007-98007B.eaf","@type":"File","contentSize":118748,"dateCreated":"2016-08-01T05:00:07.000Z","dateModified":"2016-08-01T16:01:47.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007B.eaf","doi":"10.4225/72/579F7265746C0","essenceId":1100493},{"@id":"NT1-98007-98007B.mp3","@type":"File","bitrate":128007,"contentSize":35305600,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2019-11-29T09:12:58.000Z","duration":2206.47,"encodingFormat":"audio/mpeg","name":"NT1-98007-98007B.mp3","channels":2,"doi":"10.4225/72/575C8AEE64BCA","essenceId":1010742,"sampleRate":44100},{"@id":"NT1-98007-98007B.wav","@type":"File","bitrate":4608000,"contentSize":1270917002,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2019-11-29T09:15:22.000Z","duration":2206.45,"encodingFormat":"audio/x-wav","name":"NT1-98007-98007B.wav","channels":2,"doi":"10.4225/72/575C8AF3D2DA0","essenceId":1010743,"sampleRate":96000},{"@id":"NT1-98007-98007az.xml","@type":"File","contentSize":48755,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:46.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007az.xml","doi":"10.4225/72/575C8AF94896E","essenceId":1010744},{"@id":"NT1-98007-A.tab","@type":"File","contentSize":27810,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:57.000Z","encodingFormat":"text/plain","name":"NT1-98007-A.tab","doi":"10.4225/72/575C8B043764B","essenceId":1010746},{"@id":"NT1-98007-A.xml","@type":"File","contentSize":48788,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:05:03.000Z","encodingFormat":"application/xml","name":"NT1-98007-A.xml","doi":"10.4225/72/575C8B09BDC88","essenceId":1010747},{"@id":"NT1-98007-B.tab","@type":"File","contentSize":20239,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:05:08.000Z","encodingFormat":"text/plain","name":"NT1-98007-B.tab","doi":"10.4225/72/575C8B0F4F8F1","essenceId":1010748},{"@id":"NT1-98007-B.xml","@type":"File","contentSize":35289,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:05:14.000Z","encodingFormat":"application/xml","name":"NT1-98007-B.xml","doi":"10.4225/72/575C8B14C6702","essenceId":1010749},{"@id":"Waia Tenene","@type":"Person","familyName":"Tenene","givenName":"Waia","homeLocation":{"@id":"#country_null"},"name":"Waia Tenene","role":{"@id":"role_speaker"}},{"@id":"_:b0","@type":"CreativeWork","name":"Open (subject to agreeing to PDSC access conditions)"},{"@id":"http://nla.gov.au/nla.party-479603","@type":"Person","email":"thien@unimelb.edu.au","familyName":"Thieberger","givenName":"Nick","homeLocation":{"@id":"#country_Australia"},"name":"Nick Thieberger","role":[{"@id":"role_collector"},{"@id":"role_depositor"},{"@id":"role_recorder"}]},{"@id":"http://nla.gov.au/nla.party-593909","@type":"Organization","name":"University of Melbourne"},{"@id":"ro-crate-metadata.json","@type":"CreativeWork","conformsTo":{"@id":"https://w3id.org/ro/crate/1.1/context"},"about":{"@id":"./"}},{"@id":"role_collector","@type":"Role","name":"collector"},{"@id":"role_depositor","@type":"Role","name":"depositor"},{"@id":"role_recorder","@type":"Role","name":"recorder"},{"@id":"role_speaker","@type":"Role","name":"speaker"}]} From 0ead2c24ff7855396f83f07e724af7ce4b18f7be Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 1 Dec 2023 17:24:40 +0100 Subject: [PATCH 12/81] incmoplete metadata label visibility setting --- .../edu/harvard/iq/dataverse/DatasetPage.java | 10 ++++----- .../edu/harvard/iq/dataverse/FilePage.java | 18 ++++++++++----- .../iq/dataverse/mydata/DataRetrieverAPI.java | 17 ++++++++++++-- .../search/SearchIncludeFragment.java | 11 +++++++--- .../iq/dataverse/search/SolrSearchResult.java | 22 +++++++++++++++---- .../iq/dataverse/settings/JvmSettings.java | 1 + src/main/webapp/file.xhtml | 2 +- 7 files changed, 61 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index fc18257196d..7ab24e69152 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -2210,13 +2210,13 @@ private void displayPublishMessage(){ public boolean isValid() { if (valid == null) { - DatasetVersion version = dataset.getLatestVersion(); - if (!version.isDraft()) { + if (workingVersion.isDraft() || (canUpdateDataset() && JvmSettings.UI_SHOW_VALIDITY_LABEL.lookupOptional(Boolean.class).orElse(false))) { + final DatasetVersion newVersion = workingVersion.cloneDatasetVersion(); + newVersion.setDatasetFields(newVersion.initDatasetFields()); + valid = newVersion.isValid(); + } else { valid = true; } - DatasetVersion newVersion = version.cloneDatasetVersion(); - newVersion.setDatasetFields(newVersion.initDatasetFields()); - valid = newVersion.isValid(); } return valid; } diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index bfae80ade27..d9a0fe2d9b0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -31,6 +31,7 @@ import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; @@ -283,13 +284,20 @@ private void displayPublishMessage(){ } } + Boolean valid = null; + public boolean isValid() { - if (!fileMetadata.getDatasetVersion().isDraft()) { - return true; + if (valid == null) { + final DatasetVersion workingVersion = fileMetadata.getDatasetVersion(); + if (workingVersion.isDraft() || (canUpdateDataset() && JvmSettings.UI_SHOW_VALIDITY_LABEL.lookupOptional(Boolean.class).orElse(false))) { + final DatasetVersion newVersion = workingVersion.cloneDatasetVersion(); + newVersion.setDatasetFields(newVersion.initDatasetFields()); + valid = newVersion.isValid(); + } else { + valid = true; + } } - DatasetVersion newVersion = fileMetadata.getDatasetVersion().cloneDatasetVersion(); - newVersion.setDatasetFields(newVersion.initDatasetFields()); - return newVersion.isValid(); + return valid; } private boolean canViewUnpublishedDataset() { diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java index e9898031343..be81de9267d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java @@ -3,11 +3,13 @@ */ package edu.harvard.iq.dataverse.mydata; +import edu.harvard.iq.dataverse.DatasetServiceBean; import edu.harvard.iq.dataverse.DataverseRoleServiceBean; import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.DataverseSession; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.DvObjectServiceBean; +import edu.harvard.iq.dataverse.PermissionsWrapper; import edu.harvard.iq.dataverse.RoleAssigneeServiceBean; import edu.harvard.iq.dataverse.api.auth.AuthRequired; import edu.harvard.iq.dataverse.search.SearchServiceBean; @@ -25,6 +27,7 @@ import edu.harvard.iq.dataverse.search.SearchException; import edu.harvard.iq.dataverse.search.SearchFields; import edu.harvard.iq.dataverse.search.SortBy; +import edu.harvard.iq.dataverse.settings.JvmSettings; import java.util.List; import java.util.Map; @@ -64,7 +67,9 @@ public class DataRetrieverAPI extends AbstractApiBean { private static final String retrieveDataPartialAPIPath = "retrieve"; @Inject - DataverseSession session; + DataverseSession session; + @Inject + PermissionsWrapper permissionsWrapper; @EJB DataverseRoleServiceBean dataverseRoleService; @@ -82,6 +87,8 @@ public class DataRetrieverAPI extends AbstractApiBean { //MyDataQueryHelperServiceBean myDataQueryHelperServiceBean; @EJB GroupServiceBean groupService; + @EJB + DatasetServiceBean datasetService; private List roleList; private DataverseRolePermissionHelper rolePermissionHelper; @@ -482,12 +489,14 @@ private JsonArrayBuilder formatSolrDocs(SolrQueryResponse solrResponse, RoleTagR JsonObjectBuilder myDataCardInfo; JsonArrayBuilder rolesForCard; + DataverseRequest dataverseRequest = createDataverseRequest(authUser); for (SolrSearchResult doc : solrQueryResponse.getSolrSearchResults()){ // ------------------------------------------- // (a) Get core card data from solr // ------------------------------------------- - myDataCardInfo = doc.getJsonForMyData(); + + myDataCardInfo = doc.getJsonForMyData(isValid(doc, dataverseRequest)); if (!doc.getEntity().isInstanceofDataFile()){ String parentAlias = dataverseService.getParentAliasString(doc); @@ -510,4 +519,8 @@ private JsonArrayBuilder formatSolrDocs(SolrQueryResponse solrResponse, RoleTagR return jsonSolrDocsArrayBuilder; } + + private boolean isValid(SolrSearchResult result, DataverseRequest dataverseRequest) { + return result.isValid(x -> permissionsWrapper.canUpdateDataset(dataverseRequest, datasetService.find(x.getEntityId()))); + } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 2ce06541afa..d0100bd79a4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -22,6 +22,7 @@ import edu.harvard.iq.dataverse.ThumbnailServiceWrapper; import edu.harvard.iq.dataverse.WidgetWrapper; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.BundleUtil; import java.time.LocalDate; import java.util.ArrayList; @@ -351,8 +352,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused * https://github.com/IQSS/dataverse/issues/84 */ int numRows = 10; - HttpServletRequest httpServletRequest = (HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest(); - DataverseRequest dataverseRequest = new DataverseRequest(session.getUser(), httpServletRequest); + DataverseRequest dataverseRequest = getDataverseRequest(); List dataverses = new ArrayList<>(); dataverses.add(dataverse); solrQueryResponse = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinal, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null); @@ -1395,8 +1395,13 @@ public boolean isActivelyEmbargoed(SolrSearchResult result) { } } + private DataverseRequest getDataverseRequest() { + final HttpServletRequest httpServletRequest = (HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest(); + return new DataverseRequest(session.getUser(), httpServletRequest); + } + public boolean isValid(SolrSearchResult result) { - return result.isValid(); + return result.isValid(x -> permissionsWrapper.canUpdateDataset(getDataverseRequest(), datasetService.find(x.getEntityId()))); } public enum SortOrder { diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java index 6ad7f9dbbf6..72af8b158dc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java @@ -7,6 +7,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.Predicate; import java.util.logging.Logger; import jakarta.json.Json; @@ -26,6 +27,7 @@ import edu.harvard.iq.dataverse.MetadataBlock; import edu.harvard.iq.dataverse.api.Util; import edu.harvard.iq.dataverse.dataset.DatasetThumbnail; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.DateUtil; import edu.harvard.iq.dataverse.util.json.JsonPrinter; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; @@ -417,7 +419,7 @@ public JsonObject toJsonObject(boolean showRelevance, boolean showEntityIds, boo * * @return */ - public JsonObjectBuilder getJsonForMyData() { + public JsonObjectBuilder getJsonForMyData(boolean isValid) { JsonObjectBuilder myDataJson = json(true, true, true);// boolean showRelevance, boolean showEntityIds, boolean showApiUrls) @@ -425,7 +427,7 @@ public JsonObjectBuilder getJsonForMyData() { .add("is_draft_state", this.isDraftState()).add("is_in_review_state", this.isInReviewState()) .add("is_unpublished_state", this.isUnpublishedState()).add("is_published", this.isPublishedState()) .add("is_deaccesioned", this.isDeaccessionedState()) - .add("is_valid", this.isValid()) + .add("is_valid", isValid) .add("date_to_display_on_card", getDateToDisplayOnCard()); // Add is_deaccessioned attribute, even though MyData currently screens any deaccessioned info out @@ -1265,7 +1267,19 @@ public void setDatasetValid(Boolean datasetValid) { this.datasetValid = datasetValid == null || Boolean.valueOf(datasetValid); } - public boolean isValid() { - return datasetValid; + public boolean isValid(Predicate canUpdateDataset) { + if (this.datasetValid) { + return true; + } + if (!this.getType().equals("datasets")) { + return true; + } + if (this.isDraftState()) { + return false; + } + if (!JvmSettings.UI_SHOW_VALIDITY_LABEL.lookupOptional(Boolean.class).orElse(false)) { + return true; + } + return !canUpdateDataset.test(this); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index cc3272413c7..9f0371178c0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -145,6 +145,7 @@ public enum JvmSettings { SCOPE_UI(PREFIX, "ui"), UI_ALLOW_REVIEW_INCOMPLETE(SCOPE_UI, "allow-review-for-incomplete"), UI_SHOW_VALIDITY_FILTER(SCOPE_UI, "show-validity-filter"), + UI_SHOW_VALIDITY_LABEL(SCOPE_UI, "show-label-for-incomplete-when-published"), // NetCDF SETTINGS SCOPE_NETCDF(PREFIX, "netcdf"), diff --git a/src/main/webapp/file.xhtml b/src/main/webapp/file.xhtml index f69b5c35afd..a5218aedaa6 100644 --- a/src/main/webapp/file.xhtml +++ b/src/main/webapp/file.xhtml @@ -76,7 +76,7 @@ - + From 0cd23fbffb0d3c4a75cb3948333d4c71e7e5110e Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Tue, 5 Dec 2023 18:28:26 +0100 Subject: [PATCH 13/81] added documentation --- ...10116-incomplete-metadata-label-setting.md | 1 + .../source/installation/config.rst | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 doc/release-notes/10116-incomplete-metadata-label-setting.md diff --git a/doc/release-notes/10116-incomplete-metadata-label-setting.md b/doc/release-notes/10116-incomplete-metadata-label-setting.md new file mode 100644 index 00000000000..5fd2de2f287 --- /dev/null +++ b/doc/release-notes/10116-incomplete-metadata-label-setting.md @@ -0,0 +1 @@ +Bug fixed for the ``incomplete metadata`` label being shown for published dataset with incomplete metadata in certain scenarios. This label will now only be shown for draft versions of such datasets. This label can also be made visible for published datasets with the new option ``dataverse.api.show-label-for-incomplete-when-published``, and will be only shown on datasets with incomplete metadata that the logged-in user can edit. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index c707daaad07..bc1f01576c9 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2425,6 +2425,25 @@ Defaults to ``false``. Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_API_ALLOW_INCOMPLETE_METADATA``. Will accept ``[tT][rR][uU][eE]|1|[oO][nN]`` as "true" expressions. +.. _dataverse.api.show-label-for-incomplete-when-published: + +dataverse.api.show-label-for-incomplete-when-published +++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +Even when you do not allow incomplete metadata to be saved in dataverse, some metadata may end up being incomplete, e.g., after making a metadata field mandatory. Datasets where that field is +not filled out, become incomplete, and therefore can be labeled with the ``incomplete metadat`` label. By default, this label is only shown for draft datasets. However, in some situations, you +may want to show the label also for published datasets. For example, when an admin needs to have an overview of the datasets that are no longer valid after a metadata fields update, +enabling this option will show the ``incomplete metadata`` labels on published datasets, but only to the users that can edit the specific datasets. Note that you need to reindex the datasets +after changing the metadata definitions. Reindexing will update the labels and other dataset information according to the new situation. + +When enabled, published datasets with incomplete metadata will have an ``incomplete metadata`` label attached to them, but only for the datasets that the user can edit. You can list these datasets, +for example, with the validity of metadata filter shown in "My Data" page that can be turned on by enabling the :ref:`dataverse.ui.show-validity-filter` option. + +Defaults to ``false``. + +Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable +``DATAVERSE_API_SHOW_LABEL_FOR_INCOMPLETE_WHEN_PUBLISHED``. Will accept ``[tT][rR][uU][eE]|1|[oO][nN]`` as "true" expressions. + .. _dataverse.signposting.level1-author-limit: dataverse.signposting.level1-author-limit @@ -2475,6 +2494,8 @@ Defaults to ``false``. Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_UI_ALLOW_REVIEW_FOR_INCOMPLETE``. Will accept ``[tT][rR][uU][eE]|1|[oO][nN]`` as "true" expressions. +.. _dataverse.ui.show-validity-filter: + dataverse.ui.show-validity-filter +++++++++++++++++++++++++++++++++ From 542a19824aaec5a24d4fd000c61c01c9629db08d Mon Sep 17 00:00:00 2001 From: Ludovic DANIEL Date: Wed, 6 Dec 2023 17:43:21 +0100 Subject: [PATCH 14/81] Fixed forgotten code to make localeStrValue of CVV depends on :MetadataLanguages of the dataset --- src/main/webapp/metadataFragment.xhtml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/webapp/metadataFragment.xhtml b/src/main/webapp/metadataFragment.xhtml index 200d2917b9a..442ee5bc287 100755 --- a/src/main/webapp/metadataFragment.xhtml +++ b/src/main/webapp/metadataFragment.xhtml @@ -292,7 +292,7 @@ @@ -368,12 +368,12 @@ - + @@ -387,7 +387,7 @@ rendered="#{subdsf.datasetFieldType.allowMultiples}" label="#{bundle.select}" multiple="true" filter="#{(subdsf.datasetFieldType.controlledVocabularyValues.size() lt 10) ? 'false':'true'}" filterMatchMode="contains" showHeader="#{(subdsf.datasetFieldType.controlledVocabularyValues.size() lt 10) ? 'false':'true'}"> - +
From 02f2edc2fddd3cf0c39ac6bbdb4381ff0b7ae744 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 7 Dec 2023 15:48:24 +0100 Subject: [PATCH 15/81] typo fix --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index bc1f01576c9..46a83610871 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2431,7 +2431,7 @@ dataverse.api.show-label-for-incomplete-when-published ++++++++++++++++++++++++++++++++++++++++++++++++++++++ Even when you do not allow incomplete metadata to be saved in dataverse, some metadata may end up being incomplete, e.g., after making a metadata field mandatory. Datasets where that field is -not filled out, become incomplete, and therefore can be labeled with the ``incomplete metadat`` label. By default, this label is only shown for draft datasets. However, in some situations, you +not filled out, become incomplete, and therefore can be labeled with the ``incomplete metadata`` label. By default, this label is only shown for draft datasets. However, in some situations, you may want to show the label also for published datasets. For example, when an admin needs to have an overview of the datasets that are no longer valid after a metadata fields update, enabling this option will show the ``incomplete metadata`` labels on published datasets, but only to the users that can edit the specific datasets. Note that you need to reindex the datasets after changing the metadata definitions. Reindexing will update the labels and other dataset information according to the new situation. From f6e5db29f0977187128d99f42b32c08c53af5e74 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Tue, 19 Dec 2023 16:42:44 +0100 Subject: [PATCH 16/81] option renamed: show-label-for-incomplete-when-published -> show-validity-label-when-published --- .../10116-incomplete-metadata-label-setting.md | 2 +- doc/sphinx-guides/source/installation/config.rst | 6 +++--- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 2 +- src/main/java/edu/harvard/iq/dataverse/FilePage.java | 2 +- .../edu/harvard/iq/dataverse/search/SolrSearchResult.java | 2 +- .../java/edu/harvard/iq/dataverse/settings/JvmSettings.java | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/release-notes/10116-incomplete-metadata-label-setting.md b/doc/release-notes/10116-incomplete-metadata-label-setting.md index 5fd2de2f287..970f2c922ae 100644 --- a/doc/release-notes/10116-incomplete-metadata-label-setting.md +++ b/doc/release-notes/10116-incomplete-metadata-label-setting.md @@ -1 +1 @@ -Bug fixed for the ``incomplete metadata`` label being shown for published dataset with incomplete metadata in certain scenarios. This label will now only be shown for draft versions of such datasets. This label can also be made visible for published datasets with the new option ``dataverse.api.show-label-for-incomplete-when-published``, and will be only shown on datasets with incomplete metadata that the logged-in user can edit. +Bug fixed for the ``incomplete metadata`` label being shown for published dataset with incomplete metadata in certain scenarios. This label will now only be shown for draft versions of such datasets. This label can also be made visible for published datasets with the new option ``dataverse.ui.show-validity-label-when-published``, and will be only shown on datasets with incomplete metadata that the logged-in user can edit. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 29e53e80d68..70f35cc8797 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2444,10 +2444,10 @@ Defaults to ``false``. Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_API_ALLOW_INCOMPLETE_METADATA``. Will accept ``[tT][rR][uU][eE]|1|[oO][nN]`` as "true" expressions. -.. _dataverse.api.show-label-for-incomplete-when-published: +.. _dataverse.ui.show-validity-label-when-published: -dataverse.api.show-label-for-incomplete-when-published -++++++++++++++++++++++++++++++++++++++++++++++++++++++ +dataverse.ui.show-validity-label-when-published ++++++++++++++++++++++++++++++++++++++++++++++++ Even when you do not allow incomplete metadata to be saved in dataverse, some metadata may end up being incomplete, e.g., after making a metadata field mandatory. Datasets where that field is not filled out, become incomplete, and therefore can be labeled with the ``incomplete metadata`` label. By default, this label is only shown for draft datasets. However, in some situations, you diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 5402ad01cbc..66ade0c621c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -2243,7 +2243,7 @@ private void displayPublishMessage(){ public boolean isValid() { if (valid == null) { - if (workingVersion.isDraft() || (canUpdateDataset() && JvmSettings.UI_SHOW_VALIDITY_LABEL.lookupOptional(Boolean.class).orElse(false))) { + if (workingVersion.isDraft() || (canUpdateDataset() && JvmSettings.UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED.lookupOptional(Boolean.class).orElse(false))) { final DatasetVersion newVersion = workingVersion.cloneDatasetVersion(); newVersion.setDatasetFields(newVersion.initDatasetFields()); valid = newVersion.isValid(); diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index b7afa83aea0..34624154b12 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -289,7 +289,7 @@ private void displayPublishMessage(){ public boolean isValid() { if (valid == null) { final DatasetVersion workingVersion = fileMetadata.getDatasetVersion(); - if (workingVersion.isDraft() || (canUpdateDataset() && JvmSettings.UI_SHOW_VALIDITY_LABEL.lookupOptional(Boolean.class).orElse(false))) { + if (workingVersion.isDraft() || (canUpdateDataset() && JvmSettings.UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED.lookupOptional(Boolean.class).orElse(false))) { final DatasetVersion newVersion = workingVersion.cloneDatasetVersion(); newVersion.setDatasetFields(newVersion.initDatasetFields()); valid = newVersion.isValid(); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java index 72af8b158dc..496ccf33ee3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java @@ -1277,7 +1277,7 @@ public boolean isValid(Predicate canUpdateDataset) { if (this.isDraftState()) { return false; } - if (!JvmSettings.UI_SHOW_VALIDITY_LABEL.lookupOptional(Boolean.class).orElse(false)) { + if (!JvmSettings.UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED.lookupOptional(Boolean.class).orElse(false)) { return true; } return !canUpdateDataset.test(this); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 3c1ae689263..7ef2030a616 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -145,7 +145,7 @@ public enum JvmSettings { SCOPE_UI(PREFIX, "ui"), UI_ALLOW_REVIEW_INCOMPLETE(SCOPE_UI, "allow-review-for-incomplete"), UI_SHOW_VALIDITY_FILTER(SCOPE_UI, "show-validity-filter"), - UI_SHOW_VALIDITY_LABEL(SCOPE_UI, "show-label-for-incomplete-when-published"), + UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED(SCOPE_UI, "show-validity-label-when-published"), // NetCDF SETTINGS SCOPE_NETCDF(PREFIX, "netcdf"), From 9110adee7c739b21803da912913b52442a7e1e38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20ROUCOU?= Date: Wed, 31 Jan 2024 18:22:03 +0100 Subject: [PATCH 17/81] Sitemap more than 50000 entries IQSS#8936 --- .../iq/dataverse/sitemap/SiteMapUtil.java | 234 +++++++----------- .../iq/dataverse/sitemap/SiteMapUtilTest.java | 58 ++++- 2 files changed, 143 insertions(+), 149 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java b/src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java index 86ae697f771..3077c41fa14 100644 --- a/src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java @@ -1,194 +1,135 @@ package edu.harvard.iq.dataverse.sitemap; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DvObjectContainer; -import edu.harvard.iq.dataverse.settings.ConfigCheckService; -import edu.harvard.iq.dataverse.settings.JvmSettings; -import edu.harvard.iq.dataverse.util.SystemConfig; -import edu.harvard.iq.dataverse.util.xml.XmlValidator; import java.io.File; import java.io.IOException; import java.net.MalformedURLException; -import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.nio.file.StandardCopyOption; -import java.text.SimpleDateFormat; +import java.text.ParseException; +import java.time.format.DateTimeFormatter; import java.util.List; import java.util.logging.Logger; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.OutputKeys; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerConfigurationException; -import javax.xml.transform.TransformerException; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.stream.StreamResult; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.xml.sax.SAXException; + +import com.redfin.sitemapgenerator.W3CDateFormat; +import com.redfin.sitemapgenerator.W3CDateFormat.Pattern; +import com.redfin.sitemapgenerator.WebSitemapGenerator; +import com.redfin.sitemapgenerator.WebSitemapUrl; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DvObjectContainer; +import edu.harvard.iq.dataverse.settings.ConfigCheckService; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.SystemConfig; public class SiteMapUtil { private static final Logger logger = Logger.getLogger(SiteMapUtil.class.getCanonicalName()); + private static DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); - static final String SITEMAP_FILENAME_FINAL = "sitemap.xml"; static final String SITEMAP_FILENAME_STAGED = "sitemap.xml.staged"; + /** @see https://www.sitemaps.org/protocol.html#index */ + static final int SITEMAP_LIMIT = 50000; + - /** - * TODO: Handle more than 50,000 entries in the sitemap. - * - * (As of this writing Harvard Dataverse only has ~3000 dataverses and - * ~30,000 datasets.) - * - * "each Sitemap file that you provide must have no more than 50,000 URLs" - * https://www.sitemaps.org/protocol.html - * - * Consider using a third party library: "One sitemap can contain a maximum - * of 50,000 URLs. (Some sitemaps, like Google News sitemaps, can contain - * only 1,000 URLs.) If you need to put more URLs than that in a sitemap, - * you'll have to use a sitemap index file. Fortunately, WebSitemapGenerator - * can manage the whole thing for you." - * https://github.com/dfabulich/sitemapgen4j - */ public static void updateSiteMap(List dataverses, List datasets) { logger.info("BEGIN updateSiteMap"); - String sitemapPathString = getSitemapPathString(); - String stagedSitemapPathAndFileString = sitemapPathString + File.separator + SITEMAP_FILENAME_STAGED; - String finalSitemapPathAndFileString = sitemapPathString + File.separator + SITEMAP_FILENAME_FINAL; - - Path stagedPath = Paths.get(stagedSitemapPathAndFileString); - if (Files.exists(stagedPath)) { - logger.warning("Unable to update sitemap! The staged file from a previous run already existed. Delete " + stagedSitemapPathAndFileString + " and try again."); + final String dataverseSiteUrl = SystemConfig.getDataverseSiteUrlStatic(); + final String msgErrorFormat = "Problem with %s : %s. The exception is %s"; + final String msgErrorW3CFormat = "%s isn't a valid W3C date time for %s. The exception is %s"; + final String sitemapPathString = getSitemapPathString(); + final String stagedSitemapPathAndFileString = sitemapPathString + File.separator + SITEMAP_FILENAME_STAGED; + final Path stagedSitemapPath = Paths.get(stagedSitemapPathAndFileString); + + if (Files.exists(stagedSitemapPath)) { + logger.warning(String.format( + "Unable to update sitemap! The staged file from a previous run already existed. Delete %s and try again.", + stagedSitemapPathAndFileString)); return; } - DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); - DocumentBuilder documentBuilder = null; + final File directory = new File(sitemapPathString); + if (!directory.exists()) { + directory.mkdir(); + } + + // Use DAY pattern (2024-01-24), local machine timezone + final W3CDateFormat dateFormat = new W3CDateFormat(Pattern.DAY); + WebSitemapGenerator wsg = null; try { - documentBuilder = documentBuilderFactory.newDocumentBuilder(); - } catch (ParserConfigurationException ex) { - logger.warning("Unable to update sitemap! ParserConfigurationException: " + ex.getLocalizedMessage()); + wsg = WebSitemapGenerator.builder(dataverseSiteUrl, directory).autoValidate(true).dateFormat(dateFormat) + .build(); + } catch (MalformedURLException e) { + logger.warning(String.format(msgErrorFormat, "Dataverse site URL", dataverseSiteUrl, e.getLocalizedMessage())); return; } - Document document = documentBuilder.newDocument(); - - Element urlSet = document.createElement("urlset"); - urlSet.setAttribute("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9"); - urlSet.setAttribute("xmlns:xhtml", "http://www.w3.org/1999/xhtml"); - document.appendChild(urlSet); for (Dataverse dataverse : dataverses) { if (!dataverse.isReleased()) { continue; } - Element url = document.createElement("url"); - urlSet.appendChild(url); - - Element loc = document.createElement("loc"); - String dataverseAlias = dataverse.getAlias(); - loc.appendChild(document.createTextNode(SystemConfig.getDataverseSiteUrlStatic() + "/dataverse/" + dataverseAlias)); - url.appendChild(loc); - - Element lastmod = document.createElement("lastmod"); - lastmod.appendChild(document.createTextNode(getLastModDate(dataverse))); - url.appendChild(lastmod); + final String dvAlias = dataverse.getAlias(); + final String dataverseUrl = dataverseSiteUrl + "/dataverse/" + dvAlias; + final String lastModDate = getLastModDate(dataverse); + try { + final WebSitemapUrl url = new WebSitemapUrl.Options(dataverseUrl).lastMod(lastModDate).build(); + wsg.addUrl(url); + } catch (MalformedURLException e) { + logger.fine(String.format(msgErrorFormat, "dataverse URL", dataverseUrl, e.getLocalizedMessage())); + } catch (ParseException e) { + logger.fine(String.format(msgErrorW3CFormat, lastModDate, "dataverse alias " + dvAlias, e.getLocalizedMessage())); + } } for (Dataset dataset : datasets) { - if (!dataset.isReleased()) { - continue; - } - if (dataset.isHarvested()) { - continue; - } // The deaccessioned check is last because it has to iterate through dataset versions. - if (dataset.isDeaccessioned()) { + if (!dataset.isReleased() || dataset.isHarvested() || dataset.isDeaccessioned()) { continue; } - Element url = document.createElement("url"); - urlSet.appendChild(url); - - Element loc = document.createElement("loc"); - String datasetPid = dataset.getGlobalId().asString(); - loc.appendChild(document.createTextNode(SystemConfig.getDataverseSiteUrlStatic() + "/dataset.xhtml?persistentId=" + datasetPid)); - url.appendChild(loc); - - Element lastmod = document.createElement("lastmod"); - lastmod.appendChild(document.createTextNode(getLastModDate(dataset))); - url.appendChild(lastmod); - } - - TransformerFactory transformerFactory = TransformerFactory.newInstance(); - Transformer transformer = null; - try { - transformer = transformerFactory.newTransformer(); - } catch (TransformerConfigurationException ex) { - logger.warning("Unable to update sitemap! TransformerConfigurationException: " + ex.getLocalizedMessage()); - return; - } - transformer.setOutputProperty(OutputKeys.INDENT, "yes"); - transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); - DOMSource source = new DOMSource(document); - File directory = new File(sitemapPathString); - if (!directory.exists()) { - directory.mkdir(); - } - - boolean debug = false; - if (debug) { - logger.info("Writing sitemap to console/logs"); - StreamResult consoleResult = new StreamResult(System.out); + final String datasetPid = dataset.getGlobalId().asString(); + final String datasetUrl = dataverseSiteUrl + "/dataset.xhtml?persistentId=" + datasetPid; + final String lastModDate = getLastModDate(dataset); try { - transformer.transform(source, consoleResult); - } catch (TransformerException ex) { - logger.warning("Unable to print sitemap to the console: " + ex.getLocalizedMessage()); + final WebSitemapUrl url = new WebSitemapUrl.Options(datasetUrl).lastMod(lastModDate).build(); + wsg.addUrl(url); + } catch (MalformedURLException e) { + logger.fine(String.format(msgErrorFormat, "dataset URL", datasetUrl, e.getLocalizedMessage())); + } catch (ParseException e) { + logger.fine(String.format(msgErrorW3CFormat, lastModDate, "dataset " + datasetPid, e.getLocalizedMessage())); } } - logger.info("Writing staged sitemap to " + stagedSitemapPathAndFileString); - StreamResult result = new StreamResult(new File(stagedSitemapPathAndFileString)); + logger.info(String.format("Writing and checking sitemap file into %s", sitemapPathString)); try { - transformer.transform(source, result); - } catch (TransformerException ex) { - logger.warning("Unable to update sitemap! Unable to write staged sitemap to " + stagedSitemapPathAndFileString + ". TransformerException: " + ex.getLocalizedMessage()); - return; - } - - logger.info("Checking staged sitemap for well-formedness. The staged file is " + stagedSitemapPathAndFileString); - try { - XmlValidator.validateXmlWellFormed(stagedSitemapPathAndFileString); + wsg.write(); + if (dataverses.size() + datasets.size() > SITEMAP_LIMIT) { + wsg.writeSitemapsWithIndex(); + } } catch (Exception ex) { - logger.warning("Unable to update sitemap! Staged sitemap file is not well-formed XML! The exception for " + stagedSitemapPathAndFileString + " is " + ex.getLocalizedMessage()); - return; - } - - logger.info("Checking staged sitemap against XML schema. The staged file is " + stagedSitemapPathAndFileString); - URL schemaUrl = null; - try { - schemaUrl = new URL("https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"); - } catch (MalformedURLException ex) { - // This URL is hard coded and it's fine. We should never get MalformedURLException so we just swallow the exception and carry on. - } - try { - XmlValidator.validateXmlSchema(stagedSitemapPathAndFileString, schemaUrl); - } catch (SAXException | IOException ex) { - logger.warning("Unable to update sitemap! Exception caught while checking XML staged file (" + stagedSitemapPathAndFileString + " ) against XML schema: " + ex.getLocalizedMessage()); + final StringBuffer errorMsg = new StringBuffer("Unable to write or validate sitemap ! The exception is "); + errorMsg.append(ex.getLocalizedMessage()); + // Add causes messages exception + Throwable cause = ex.getCause(); + // Fix limit to 5 causes + final int causeLimit = 5; + int cpt = 0; + while (cause != null && cpt < causeLimit) { + errorMsg.append(" with cause ").append(cause.getLocalizedMessage()); + cause = ex.getCause(); + cpt = cpt + 1; + } + logger.warning(errorMsg.toString()); return; } - Path finalPath = Paths.get(finalSitemapPathAndFileString); - logger.info("Copying staged sitemap from " + stagedSitemapPathAndFileString + " to " + finalSitemapPathAndFileString); + logger.info(String.format("Remove staged sitemap %s", stagedSitemapPathAndFileString)); try { - Files.move(stagedPath, finalPath, StandardCopyOption.REPLACE_EXISTING); + Files.deleteIfExists(stagedSitemapPath); } catch (IOException ex) { - logger.warning("Unable to update sitemap! Unable to copy staged sitemap from " + stagedSitemapPathAndFileString + " to " + finalSitemapPathAndFileString + ". IOException: " + ex.getLocalizedMessage()); + logger.warning("Unable to delete sitemap staged file! IOException: " + ex.getLocalizedMessage()); return; } @@ -199,12 +140,11 @@ private static String getLastModDate(DvObjectContainer dvObjectContainer) { // TODO: Decide if YYYY-MM-DD is enough. https://www.sitemaps.org/protocol.html // says "The date of last modification of the file. This date should be in W3C Datetime format. // This format allows you to omit the time portion, if desired, and use YYYY-MM-DD." - return new SimpleDateFormat("yyyy-MM-dd").format(dvObjectContainer.getModificationTime()); + return dvObjectContainer.getModificationTime().toLocalDateTime().format(formatter); } public static boolean stageFileExists() { - String sitemapPathString = getSitemapPathString(); - String stagedSitemapPathAndFileString = sitemapPathString + File.separator + SITEMAP_FILENAME_STAGED; + String stagedSitemapPathAndFileString = getSitemapPathString() + File.separator + SITEMAP_FILENAME_STAGED; Path stagedPath = Paths.get(stagedSitemapPathAndFileString); if (Files.exists(stagedPath)) { logger.warning("Unable to update sitemap! The staged file from a previous run already existed. Delete " + stagedSitemapPathAndFileString + " and try again."); @@ -212,7 +152,7 @@ public static boolean stageFileExists() { } return false; } - + /** * Lookup the location where to generate the sitemap. * @@ -223,6 +163,6 @@ public static boolean stageFileExists() { */ private static String getSitemapPathString() { return JvmSettings.DOCROOT_DIRECTORY.lookup() + File.separator + "sitemap"; - } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java index 41032ffa811..704859be86f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java @@ -11,18 +11,19 @@ import java.io.File; import java.io.IOException; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Date; import java.util.List; import static org.junit.jupiter.api.Assertions.*; -import static org.junit.jupiter.api.Assertions.assertTrue; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -32,6 +33,10 @@ class SiteMapUtilTest { + // see https://www.sitemaps.org/protocol.html#validating + final String xsdSitemap = "https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"; + final String xsdSitemapIndex = "https://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd"; + @TempDir Path tempDir; Path tempDocroot; @@ -105,7 +110,7 @@ void testUpdateSiteMap() throws IOException, ParseException, SAXException { // then String pathToSiteMap = tempDocroot.resolve("sitemap").resolve("sitemap.xml").toString(); assertDoesNotThrow(() -> XmlValidator.validateXmlWellFormed(pathToSiteMap)); - assertTrue(XmlValidator.validateXmlSchema(pathToSiteMap, new URL("https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"))); + assertTrue(XmlValidator.validateXmlSchema(pathToSiteMap, new URL(xsdSitemap))); File sitemapFile = new File(pathToSiteMap); String sitemapString = XmlPrinter.prettyPrintXml(new String(Files.readAllBytes(Paths.get(sitemapFile.getAbsolutePath())))); @@ -116,7 +121,56 @@ void testUpdateSiteMap() throws IOException, ParseException, SAXException { assertFalse(sitemapString.contains(unpublishedPid)); assertFalse(sitemapString.contains(harvestedPid)); assertFalse(sitemapString.contains(deaccessionedPid)); + } + + @Test + void testHugeSiteMap() throws IOException, ParseException, SAXException { + // given + final int nbDataverse = 50; + final int nbDataset = 50000; + + final List dataverses = new ArrayList<>(nbDataverse); + for (int i = 1; i <= nbDataverse; i++) { + final Dataverse publishedDataverse = new Dataverse(); + publishedDataverse.setAlias(String.format("publishedDv%s", i)); + publishedDataverse.setModificationTime(new Timestamp(new Date().getTime())); + publishedDataverse.setPublicationDate(new Timestamp(new Date().getTime())); + dataverses.add(publishedDataverse); + } + + final List datasets = new ArrayList<>(nbDataset); + for (int i = 1; i <= nbDataset; i++) { + final Dataset published = new Dataset(); + published.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, "10.666", String.format("FAKE/published%s", i), null, DOIServiceBean.DOI_RESOLVER_URL, null)); + published.setPublicationDate(new Timestamp(new Date().getTime())); + published.setModificationTime(new Timestamp(new Date().getTime())); + datasets.add(published); + } + // when + SiteMapUtil.updateSiteMap(dataverses, datasets); + + // then + final Path siteMapDir = tempDocroot.resolve("sitemap"); + final String pathToSiteMapIndexFile = siteMapDir.resolve("sitemap_index.xml").toString(); + assertDoesNotThrow(() -> XmlValidator.validateXmlWellFormed(pathToSiteMapIndexFile)); + assertTrue(XmlValidator.validateXmlSchema(pathToSiteMapIndexFile, new URL(xsdSitemapIndex))); + + final File sitemapFile = new File(pathToSiteMapIndexFile); + String sitemapString = XmlPrinter.prettyPrintXml(new String(Files.readAllBytes(Paths.get(sitemapFile.getAbsolutePath())), StandardCharsets.UTF_8)); + // System.out.println("sitemap: " + sitemapString); + + assertTrue(sitemapString.contains("sitemap1.xml")); + assertTrue(sitemapString.contains("sitemap2.xml")); + assertTrue(sitemapString.contains("")); + + final String pathToSiteMap1File = siteMapDir.resolve("sitemap1.xml").toString(); + assertDoesNotThrow(() -> XmlValidator.validateXmlWellFormed(pathToSiteMap1File)); + assertTrue(XmlValidator.validateXmlSchema(pathToSiteMap1File, new URL(xsdSitemap))); + + final String pathToSiteMap2File = siteMapDir.resolve("sitemap2.xml").toString(); + assertDoesNotThrow(() -> XmlValidator.validateXmlWellFormed(pathToSiteMap2File)); + assertTrue(XmlValidator.validateXmlSchema(pathToSiteMap2File, new URL(xsdSitemap))); } } From e47962ee5ef236fafcd88f489e97274d680732f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20ROUCOU?= Date: Fri, 9 Feb 2024 17:21:19 +0100 Subject: [PATCH 18/81] Add missing jar + documentation --- doc/sphinx-guides/source/installation/config.rst | 2 ++ pom.xml | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index a7d7905ca4a..12e00e6cd66 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1745,6 +1745,8 @@ https://demo.dataverse.org/sitemap.xml is the sitemap URL for the Dataverse Proj Once the sitemap has been generated and placed in the domain docroot directory, it will become available to the outside callers at /sitemap/sitemap.xml; it will also be accessible at /sitemap.xml (via a *pretty-faces* rewrite rule). Some search engines will be able to find it at this default location. Some, **including Google**, need to be **specifically instructed** to retrieve it. +On Dataverse installation with more than 50000 Dataverse collections or datasets, sitemap file name is ``sitemap_index.html``, not ``sitemap.xml``. Be aware in previous steps to use the correct file name corresponding to your installation. + One way to submit your sitemap URL to Google is by using their "Search Console" (https://search.google.com/search-console). In order to use the console, you will need to authenticate yourself as the owner of your Dataverse site. Various authentication methods are provided; but if you are already using Google Analytics, the easiest way is to use that account. Make sure you are logged in on Google with the account that has the edit permission on your Google Analytics property; go to the search console and enter the root URL of your Dataverse installation, then choose Google Analytics as the authentication method. Once logged in, click on "Sitemaps" in the menu on the left. (todo: add a screenshot?) Consult `Google's "submit a sitemap" instructions`_ for more information; and/or similar instructions for other search engines. .. _Google's "submit a sitemap" instructions: https://support.google.com/webmasters/answer/183668 diff --git a/pom.xml b/pom.xml index 7c12a45135c..b0a4e509ed4 100644 --- a/pom.xml +++ b/pom.xml @@ -532,6 +532,11 @@ java-json-canonicalization 1.1 + + com.github.dfabulich + sitemapgen4j + 1.1.2 + edu.ucar cdm-core From 8796a1dce7bb3e2000e5e78f7a12a66c4d9ba4e4 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 12 Feb 2024 10:46:25 -0500 Subject: [PATCH 19/81] pushing the language controlled vocab additions suggested in #10197 into a local branch; #8243 --- scripts/api/data/metadatablocks/citation.tsv | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index b21b6bcce57..2f39086464d 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -138,7 +138,7 @@ authorIdentifierScheme DAI 5 authorIdentifierScheme ResearcherID 6 authorIdentifierScheme ScopusID 7 - language Abkhaz 0 + language Abkhaz 0 abk ab language Afar 1 aar aa language Afrikaans 2 afr af language Akan 3 aka ak @@ -220,7 +220,7 @@ language Khmer 79 khm km language Kikuyu, Gikuyu 80 kik ki language Kinyarwanda 81 kin rw - language Kyrgyz 82 + language Kirghiz, Kyrgyz 82 kir ky language Komi 83 kom kv language Kongo 84 kon kg language Korean 85 kor ko @@ -249,7 +249,7 @@ language Nauru 108 nau na language Navajo, Navaho 109 nav nv language Northern Ndebele 110 nde nd - language Nepali 111 nep ne + language Nepali (macrolanguage) 111 nep ne language Ndonga 112 ndo ng language Norwegian Bokmål 113 nob nb language Norwegian Nynorsk 114 nno nn @@ -284,12 +284,12 @@ language Shona 143 sna sn language Sinhala, Sinhalese 144 sin si language Slovak 145 slk slo sk - language Slovene 146 slv sl + language Slovenian 146 slv sl language Somali 147 som so language Southern Sotho 148 sot st language Spanish, Castilian 149 spa es language Sundanese 150 sun su - language Swahili 151 swa sw + language Swahili (macrolanguage) 151 swa sw language Swati 152 ssw ss language Swedish 153 swe sv language Tamil 154 tam ta From 4f8a74c2416c20d3a50fa6b5698a24e3d27804be Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 14 Feb 2024 19:20:01 -0500 Subject: [PATCH 20/81] Alternative long spellings for the Slovene and Kyrgyz languages. #8243 --- scripts/api/data/metadatablocks/citation.tsv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index bcc7ed4866d..70c56b303cd 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -220,7 +220,7 @@ language Khmer 79 khm km language Kikuyu, Gikuyu 80 kik ki language Kinyarwanda 81 kin rw - language Kirghiz, Kyrgyz 82 kir ky + language Kyrgyz 82 kir ky Kirghiz language Komi 83 kom kv language Kongo 84 kon kg language Korean 85 kor ko @@ -284,7 +284,7 @@ language Shona 143 sna sn language Sinhala, Sinhalese 144 sin si language Slovak 145 slk slo sk - language Slovenian 146 slv sl + language Slovene 146 slv sl Slovenian language Somali 147 som so language Southern Sotho 148 sot st language Spanish, Castilian 149 spa es From bde1e75e81f520a205cf4f6779feae3277982221 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20ROUCOU?= Date: Mon, 4 Mar 2024 10:37:03 +0100 Subject: [PATCH 21/81] Update library to GDDC fork --- pom.xml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index b0a4e509ed4..17cff19bc08 100644 --- a/pom.xml +++ b/pom.xml @@ -532,10 +532,11 @@ java-json-canonicalization 1.1 + - com.github.dfabulich + io.gdcc sitemapgen4j - 1.1.2 + 2.0.0 edu.ucar From ad042d2ba365435deb738ee195dac91292f88e77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20ROUCOU?= Date: Wed, 20 Mar 2024 16:03:57 +0100 Subject: [PATCH 22/81] Documentations --- doc/release-notes/8983-more-than-50000-entries-in-sitemap.md | 3 +++ doc/sphinx-guides/source/installation/config.rst | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 doc/release-notes/8983-more-than-50000-entries-in-sitemap.md diff --git a/doc/release-notes/8983-more-than-50000-entries-in-sitemap.md b/doc/release-notes/8983-more-than-50000-entries-in-sitemap.md new file mode 100644 index 00000000000..799aca87709 --- /dev/null +++ b/doc/release-notes/8983-more-than-50000-entries-in-sitemap.md @@ -0,0 +1,3 @@ +The sitemap file generation can handle more than 50,000 entries if needeed with the [sitemapgen4j](https://github.com/gdcc/sitemapgen4j) library, maintained by the Global Dataverse Community Consortium. + +In this case, the Dataverse Admin API `api/admin/sitemap` create a sitemap index file, called `sitemap_index.xml`, in place of the `sitemap.xml` file. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 12e00e6cd66..882df60c5eb 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1745,7 +1745,7 @@ https://demo.dataverse.org/sitemap.xml is the sitemap URL for the Dataverse Proj Once the sitemap has been generated and placed in the domain docroot directory, it will become available to the outside callers at /sitemap/sitemap.xml; it will also be accessible at /sitemap.xml (via a *pretty-faces* rewrite rule). Some search engines will be able to find it at this default location. Some, **including Google**, need to be **specifically instructed** to retrieve it. -On Dataverse installation with more than 50000 Dataverse collections or datasets, sitemap file name is ``sitemap_index.html``, not ``sitemap.xml``. Be aware in previous steps to use the correct file name corresponding to your installation. +According to `Sitemaps.org protocol`_, a sitemap file must have no more than 50,000 URLs and must be no larger than 50MiB. In this case, you have to create sitemap index file witch references multiples sitemap files called ``sitemap_index.xml``, instead of ``sitemap.xml``. Be aware in previous steps to use the correct file name corresponding to your installation. One way to submit your sitemap URL to Google is by using their "Search Console" (https://search.google.com/search-console). In order to use the console, you will need to authenticate yourself as the owner of your Dataverse site. Various authentication methods are provided; but if you are already using Google Analytics, the easiest way is to use that account. Make sure you are logged in on Google with the account that has the edit permission on your Google Analytics property; go to the search console and enter the root URL of your Dataverse installation, then choose Google Analytics as the authentication method. Once logged in, click on "Sitemaps" in the menu on the left. (todo: add a screenshot?) Consult `Google's "submit a sitemap" instructions`_ for more information; and/or similar instructions for other search engines. From 7a4a96d27bae39e4fcce764d43464e1ead009039 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20ROUCOU?= Date: Wed, 20 Mar 2024 18:23:28 +0100 Subject: [PATCH 23/81] Fix broken documentation --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 2b5bd95dd62..74c47b098ca 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2068,7 +2068,7 @@ https://demo.dataverse.org/sitemap.xml is the sitemap URL for the Dataverse Proj Once the sitemap has been generated and placed in the domain docroot directory, it will become available to the outside callers at /sitemap/sitemap.xml; it will also be accessible at /sitemap.xml (via a *pretty-faces* rewrite rule). Some search engines will be able to find it at this default location. Some, **including Google**, need to be **specifically instructed** to retrieve it. -According to `Sitemaps.org protocol`_, a sitemap file must have no more than 50,000 URLs and must be no larger than 50MiB. In this case, you have to create sitemap index file witch references multiples sitemap files called ``sitemap_index.xml``, instead of ``sitemap.xml``. Be aware in previous steps to use the correct file name corresponding to your installation. +According to `Sitemaps.org protocol `_, a sitemap file must have no more than 50,000 URLs and must be no larger than 50MiB. In this case, you have to create sitemap index file witch references multiples sitemap files called ``sitemap_index.xml``, instead of ``sitemap.xml``. Be aware in previous steps to use the correct file name corresponding to your installation. One way to submit your sitemap URL to Google is by using their "Search Console" (https://search.google.com/search-console). In order to use the console, you will need to authenticate yourself as the owner of your Dataverse site. Various authentication methods are provided; but if you are already using Google Analytics, the easiest way is to use that account. Make sure you are logged in on Google with the account that has the edit permission on your Google Analytics property; go to the search console and enter the root URL of your Dataverse installation, then choose Google Analytics as the authentication method. Once logged in, click on "Sitemaps" in the menu on the left. (todo: add a screenshot?) Consult `Google's "submit a sitemap" instructions`_ for more information; and/or similar instructions for other search engines. From 87adc6201e128d2d9d584dc5b05652361b48433c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20ROUCOU?= Date: Thu, 21 Mar 2024 09:14:44 +0100 Subject: [PATCH 24/81] Fix broken UT after develop merge --- .../java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java index a894ea9e0ee..19f985cc984 100644 --- a/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java @@ -141,7 +141,7 @@ void testHugeSiteMap() throws IOException, ParseException, SAXException { final List datasets = new ArrayList<>(nbDataset); for (int i = 1; i <= nbDataset; i++) { final Dataset published = new Dataset(); - published.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, "10.666", String.format("FAKE/published%s", i), null, DOIServiceBean.DOI_RESOLVER_URL, null)); + published.setGlobalId(new GlobalId(AbstractDOIProvider.DOI_PROTOCOL, "10.666", String.format("FAKE/published%s", i), null, AbstractDOIProvider.DOI_RESOLVER_URL, null)); published.setPublicationDate(new Timestamp(new Date().getTime())); published.setModificationTime(new Timestamp(new Date().getTime())); datasets.add(published); From 627fd9ec7d7c6675b7ba1634c1cd2e3984aca030 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20ROUCOU?= Date: Thu, 21 Mar 2024 10:55:22 +0100 Subject: [PATCH 25/81] Uptaded sitemapgen4j dependency to fix validation --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 4aab7f06f20..7c872229099 100644 --- a/pom.xml +++ b/pom.xml @@ -536,7 +536,7 @@ io.gdcc sitemapgen4j - 2.0.0 + 2.1.2 edu.ucar From c1ca59cce8571cf3b0529bbaf5ce835f7ba88119 Mon Sep 17 00:00:00 2001 From: jeromeroucou Date: Mon, 25 Mar 2024 11:50:03 +0100 Subject: [PATCH 26/81] Update doc/release-notes/8983-more-than-50000-entries-in-sitemap.md Co-authored-by: Philip Durbin --- doc/release-notes/8983-more-than-50000-entries-in-sitemap.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/8983-more-than-50000-entries-in-sitemap.md b/doc/release-notes/8983-more-than-50000-entries-in-sitemap.md index 799aca87709..3b2e312fc6c 100644 --- a/doc/release-notes/8983-more-than-50000-entries-in-sitemap.md +++ b/doc/release-notes/8983-more-than-50000-entries-in-sitemap.md @@ -1,3 +1,3 @@ -The sitemap file generation can handle more than 50,000 entries if needeed with the [sitemapgen4j](https://github.com/gdcc/sitemapgen4j) library, maintained by the Global Dataverse Community Consortium. +The sitemap file generation can handle more than 50,000 entries if needed with the [sitemapgen4j](https://github.com/gdcc/sitemapgen4j) library, maintained by the Global Dataverse Community Consortium. In this case, the Dataverse Admin API `api/admin/sitemap` create a sitemap index file, called `sitemap_index.xml`, in place of the `sitemap.xml` file. From 581883aa097fdfe0c9c84b1f587df37624de22a8 Mon Sep 17 00:00:00 2001 From: jeromeroucou Date: Mon, 25 Mar 2024 11:50:27 +0100 Subject: [PATCH 27/81] Update doc/release-notes/8983-more-than-50000-entries-in-sitemap.md Co-authored-by: Philip Durbin --- doc/release-notes/8983-more-than-50000-entries-in-sitemap.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/8983-more-than-50000-entries-in-sitemap.md b/doc/release-notes/8983-more-than-50000-entries-in-sitemap.md index 3b2e312fc6c..d5c161bd292 100644 --- a/doc/release-notes/8983-more-than-50000-entries-in-sitemap.md +++ b/doc/release-notes/8983-more-than-50000-entries-in-sitemap.md @@ -1,3 +1,3 @@ The sitemap file generation can handle more than 50,000 entries if needed with the [sitemapgen4j](https://github.com/gdcc/sitemapgen4j) library, maintained by the Global Dataverse Community Consortium. -In this case, the Dataverse Admin API `api/admin/sitemap` create a sitemap index file, called `sitemap_index.xml`, in place of the `sitemap.xml` file. +In this case, the Dataverse Admin API `api/admin/sitemap` create a sitemap index file, called `sitemap_index.xml`, in place of the `sitemap.xml` file. See the config section of the Installation Guide for details. From 2a3f90df3ac794c73c8c9346f71d9683a8174d5e Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Mon, 25 Mar 2024 14:34:57 -0400 Subject: [PATCH 28/81] Add MIT Lincense --- scripts/api/data/licenses/licenseMIT.json | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 scripts/api/data/licenses/licenseMIT.json diff --git a/scripts/api/data/licenses/licenseMIT.json b/scripts/api/data/licenses/licenseMIT.json new file mode 100644 index 00000000000..ac00b9aac0e --- /dev/null +++ b/scripts/api/data/licenses/licenseMIT.json @@ -0,0 +1,8 @@ +{ + "name": "MIT License", + "uri": "https://spdx.org/licenses/MIT.html", + "shortDescription": "MIT License (MIT).", + "iconUrl": "https://licensebuttons.net/l/by/4.0/88x31.png", + "active": true, + "sortOrder": 8 +} From b9ca45445e1df416a9d272792953a586b69d4787 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Mon, 25 Mar 2024 16:31:15 -0400 Subject: [PATCH 29/81] Release notes --- doc/release-notes/10425-add-MIT-License.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/release-notes/10425-add-MIT-License.md diff --git a/doc/release-notes/10425-add-MIT-License.md b/doc/release-notes/10425-add-MIT-License.md new file mode 100644 index 00000000000..2e468fefbe0 --- /dev/null +++ b/doc/release-notes/10425-add-MIT-License.md @@ -0,0 +1 @@ +A new file has been added to import the MIT License to Dataverse on licenseMIT.json \ No newline at end of file From a0b29394fd532a2e99fd4cbb5768f3eee1b1d45f Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Mon, 25 Mar 2024 16:36:05 -0400 Subject: [PATCH 30/81] Icon removed --- scripts/api/data/licenses/licenseMIT.json | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/api/data/licenses/licenseMIT.json b/scripts/api/data/licenses/licenseMIT.json index ac00b9aac0e..7f2aa086df2 100644 --- a/scripts/api/data/licenses/licenseMIT.json +++ b/scripts/api/data/licenses/licenseMIT.json @@ -2,7 +2,6 @@ "name": "MIT License", "uri": "https://spdx.org/licenses/MIT.html", "shortDescription": "MIT License (MIT).", - "iconUrl": "https://licensebuttons.net/l/by/4.0/88x31.png", "active": true, "sortOrder": 8 } From 8ef7c6e680e84ee7b766622a1146b55a43f1a7b6 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Mon, 25 Mar 2024 16:52:15 -0400 Subject: [PATCH 31/81] Description change and guide section --- doc/sphinx-guides/source/installation/config.rst | 7 +++++++ scripts/api/data/licenses/licenseMIT.json | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 41388f7aa33..9c70c91916c 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1786,6 +1786,13 @@ JSON files for `Creative Commons licenses ` + Adding Custom Licenses ^^^^^^^^^^^^^^^^^^^^^^ diff --git a/scripts/api/data/licenses/licenseMIT.json b/scripts/api/data/licenses/licenseMIT.json index 7f2aa086df2..b02d7d39994 100644 --- a/scripts/api/data/licenses/licenseMIT.json +++ b/scripts/api/data/licenses/licenseMIT.json @@ -1,7 +1,7 @@ { "name": "MIT License", "uri": "https://spdx.org/licenses/MIT.html", - "shortDescription": "MIT License (MIT).", + "shortDescription": "Massachusetts Institute of Technology License (MIT).", "active": true, "sortOrder": 8 } From 9d75f1ace66b79862d894347899599a1af4edd16 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Mon, 25 Mar 2024 16:56:05 -0400 Subject: [PATCH 32/81] Fix license name --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 9c70c91916c..b5bce6b870d 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1791,7 +1791,7 @@ Adding Software Licenses JSON files for Software Licenses are provided below. -- :download:`licenseCC0-1.0.json <../../../../scripts/api/data/licenses/licenseMIT.json>` +- :download:`licenseMIT.json <../../../../scripts/api/data/licenses/licenseMIT.json>` Adding Custom Licenses ^^^^^^^^^^^^^^^^^^^^^^ From 16c0ffaa642cf9a3abfd7463c1528dc89fa070a9 Mon Sep 17 00:00:00 2001 From: jeromeroucou Date: Wed, 3 Apr 2024 10:55:33 +0200 Subject: [PATCH 33/81] Update doc/sphinx-guides/source/installation/config.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 74c47b098ca..b1d70dafc40 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2068,7 +2068,7 @@ https://demo.dataverse.org/sitemap.xml is the sitemap URL for the Dataverse Proj Once the sitemap has been generated and placed in the domain docroot directory, it will become available to the outside callers at /sitemap/sitemap.xml; it will also be accessible at /sitemap.xml (via a *pretty-faces* rewrite rule). Some search engines will be able to find it at this default location. Some, **including Google**, need to be **specifically instructed** to retrieve it. -According to `Sitemaps.org protocol `_, a sitemap file must have no more than 50,000 URLs and must be no larger than 50MiB. In this case, you have to create sitemap index file witch references multiples sitemap files called ``sitemap_index.xml``, instead of ``sitemap.xml``. Be aware in previous steps to use the correct file name corresponding to your installation. +According to `Sitemaps.org protocol `_, a sitemap file must have no more than 50,000 URLs and must be no larger than 50MiB. In this case, the protocol instructs you to create a sitemap index file called ``sitemap_index.xml`` (instead of ``sitemap.xml``), which references multiples sitemap files. Be aware in previous steps to use the correct file name corresponding to your situation. One way to submit your sitemap URL to Google is by using their "Search Console" (https://search.google.com/search-console). In order to use the console, you will need to authenticate yourself as the owner of your Dataverse site. Various authentication methods are provided; but if you are already using Google Analytics, the easiest way is to use that account. Make sure you are logged in on Google with the account that has the edit permission on your Google Analytics property; go to the search console and enter the root URL of your Dataverse installation, then choose Google Analytics as the authentication method. Once logged in, click on "Sitemaps" in the menu on the left. (todo: add a screenshot?) Consult `Google's "submit a sitemap" instructions`_ for more information; and/or similar instructions for other search engines. From e6ff44e9c8a86da0e9eff013bfca4c5cc5ef8934 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20ROUCOU?= Date: Wed, 3 Apr 2024 14:48:04 +0200 Subject: [PATCH 34/81] Docs : update with reviews feedback --- .../8983-more-than-50000-entries-in-sitemap.md | 6 +++++- doc/sphinx-guides/source/installation/config.rst | 15 +++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/doc/release-notes/8983-more-than-50000-entries-in-sitemap.md b/doc/release-notes/8983-more-than-50000-entries-in-sitemap.md index d5c161bd292..6fcf3180283 100644 --- a/doc/release-notes/8983-more-than-50000-entries-in-sitemap.md +++ b/doc/release-notes/8983-more-than-50000-entries-in-sitemap.md @@ -1,3 +1,7 @@ The sitemap file generation can handle more than 50,000 entries if needed with the [sitemapgen4j](https://github.com/gdcc/sitemapgen4j) library, maintained by the Global Dataverse Community Consortium. -In this case, the Dataverse Admin API `api/admin/sitemap` create a sitemap index file, called `sitemap_index.xml`, in place of the `sitemap.xml` file. See the config section of the Installation Guide for details. +In this case, the Dataverse Admin API `api/admin/sitemap` create a sitemap index file, called `sitemap_index.xml`, in place of the single `sitemap.xml` file. This created file reference multiples simple sitemap file, named ``sitemap1.xml``, ``sitemap2.xml``, etc. This referenced files will be as many files as necessary to contain the URLs of dataverses and datasets presents your installation, while respecting the limit of 50,000 URLs per file. See the [config section of the Installation Guide](https://guides.dataverse.org/en/latest/installation/config.html#creating-a-sitemap-and-submitting-it-to-search-engines) for details. + +A HTML preview can be found [here](https://dataverse-guide--10321.org.readthedocs.build/en/10321/installation/config.html#creating-a-sitemap-and-submitting-it-to-search-engines). + +For more information, see [#8936](https://github.com/IQSS/dataverse/issues/8936). diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index b1d70dafc40..bd185e2d008 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2052,6 +2052,9 @@ If you are not fronting Payara with Apache you'll need to prevent Payara from se Creating a Sitemap and Submitting it to Search Engines ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Sitemap file +############ + Search engines have an easier time indexing content when you provide them a sitemap. The Dataverse Software sitemap includes URLs to all published Dataverse collections and all published datasets that are not harvested or deaccessioned. Create or update your sitemap by adding the following curl command to cron to run nightly or as you see fit: @@ -2068,12 +2071,20 @@ https://demo.dataverse.org/sitemap.xml is the sitemap URL for the Dataverse Proj Once the sitemap has been generated and placed in the domain docroot directory, it will become available to the outside callers at /sitemap/sitemap.xml; it will also be accessible at /sitemap.xml (via a *pretty-faces* rewrite rule). Some search engines will be able to find it at this default location. Some, **including Google**, need to be **specifically instructed** to retrieve it. -According to `Sitemaps.org protocol `_, a sitemap file must have no more than 50,000 URLs and must be no larger than 50MiB. In this case, the protocol instructs you to create a sitemap index file called ``sitemap_index.xml`` (instead of ``sitemap.xml``), which references multiples sitemap files. Be aware in previous steps to use the correct file name corresponding to your situation. - One way to submit your sitemap URL to Google is by using their "Search Console" (https://search.google.com/search-console). In order to use the console, you will need to authenticate yourself as the owner of your Dataverse site. Various authentication methods are provided; but if you are already using Google Analytics, the easiest way is to use that account. Make sure you are logged in on Google with the account that has the edit permission on your Google Analytics property; go to the search console and enter the root URL of your Dataverse installation, then choose Google Analytics as the authentication method. Once logged in, click on "Sitemaps" in the menu on the left. (todo: add a screenshot?) Consult `Google's "submit a sitemap" instructions`_ for more information; and/or similar instructions for other search engines. .. _Google's "submit a sitemap" instructions: https://support.google.com/webmasters/answer/183668 +Sitemap index file +################## + +According to `Sitemaps.org protocol `_, a sitemap file must have no more than 50,000 URLs and must be no larger than 50MiB. In this case, the protocol instructs you to create a sitemap index file called ``sitemap_index.xml`` (instead of ``sitemap.xml``), which references multiples sitemap files. In this case, the created files containing the URLs will be named ``sitemap1.xml``, ``sitemap2.xml``, etc. This referenced files are also generated in the same place as other sitemap files and there will be as many files as necessary to contain the URLs of dataverses and datasets presents your installation, while respecting the limit of 50,000 URLs per file. Dataverse will automatically detect whether you need to create a single ``sitemap.xml`` file, or several files. However, you must be careful to use the correct file name corresponding on your situation. + +If there are more than 50,000 dataverses and datasets, the sitemap file created or updated will default to the location: + +``/usr/local/payara6/glassfish/domains/domain1/docroot/sitemap/sitemap_index.xml`` + +Moreover, it can also be accessed at ``/sitemap/sitemap_index.xml`` or ``/sitemap_index.xml``. In case of "Google Search Console" is used to submit the sitemap file, one of the previous URLs have to be used with the ``sitemap_index.xml`` file name. Putting Your Dataverse Installation on the Map at dataverse.org +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ From f2f1ce11a709b24047dbfdb1e670d9f7c3c95d1b Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 10 Apr 2024 14:46:37 -0400 Subject: [PATCH 35/81] Dropping the "macrolanguage" from the names of 2 language families, per conversation with requestor. #8243 --- scripts/api/data/metadatablocks/citation.tsv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index 6fa2bed6482..ead82199e63 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -249,7 +249,7 @@ language Nauru 108 nau na language Navajo, Navaho 109 nav nv language Northern Ndebele 110 nde nd - language Nepali (macrolanguage) 111 nep ne + language Nepali 111 nep ne language Ndonga 112 ndo ng language Norwegian Bokmål 113 nob nb language Norwegian Nynorsk 114 nno nn @@ -289,7 +289,7 @@ language Southern Sotho 148 sot st language Spanish, Castilian 149 spa es language Sundanese 150 sun su - language Swahili (macrolanguage) 151 swa sw + language Swahili 151 swa sw language Swati 152 ssw ss language Swedish 153 swe sv language Tamil 154 tam ta From 5acb13a4d7554686fc1ba6e53e6adb7b89f5af23 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 10 Apr 2024 14:59:44 -0400 Subject: [PATCH 36/81] Addresses the cases of languages with the names like "Navajo, Navaho" - I'm leaving the main name intact (so that the block update will still works), but adding both versions as extra alternative names, so that either is importable. #8243 --- scripts/api/data/metadatablocks/citation.tsv | 50 ++++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index ead82199e63..613854b677c 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -156,17 +156,17 @@ language Bashkir 15 bak ba language Basque 16 eus baq eu language Belarusian 17 bel be - language Bengali, Bangla 18 ben bn + language Bengali, Bangla 18 ben bn Bengali Bangla language Bihari 19 bih bh language Bislama 20 bis bi language Bosnian 21 bos bs language Breton 22 bre br language Bulgarian 23 bul bg language Burmese 24 mya bur my - language Catalan,Valencian 25 cat ca + language Catalan,Valencian 25 cat ca Catalan Valencian language Chamorro 26 cha ch language Chechen 27 che ce - language Chichewa, Chewa, Nyanja 28 nya ny + language Chichewa, Chewa, Nyanja 28 nya ny Chichewa Chewa Nyanja language Chinese 29 zho chi zh language Chuvash 30 chv cv language Cornish 31 cor kw @@ -175,7 +175,7 @@ language Croatian 34 hrv src hr language Czech 35 ces cze cs language Danish 36 dan da - language Divehi, Dhivehi, Maldivian 37 div dv + language Divehi, Dhivehi, Maldivian 37 div dv Divehi Dhivehi Maldivian language Dutch 38 nld dut nl language Dzongkha 39 dzo dz language English 40 eng en @@ -186,14 +186,14 @@ language Fijian 45 fij fj language Finnish 46 fin fi language French 47 fra fre fr - language Fula, Fulah, Pulaar, Pular 48 ful ff + language Fula, Fulah, Pulaar, Pular 48 ful ff Fula Fulah Pulaar Pular language Galician 49 glg gl language Georgian 50 kat geo ka language German 51 deu ger de - language Greek (modern) 52 gre ell el + language Greek (modern) 52 gre ell el Greek language Guaraní 53 grn gn language Gujarati 54 guj gu - language Haitian, Haitian Creole 55 hat ht + language Haitian, Haitian Creole 55 hat ht Haitian Haitian Creole language Hausa 56 hau ha language Hebrew (modern) 57 heb he language Herero 58 her hz @@ -212,24 +212,24 @@ language Inuktitut 71 iku iu language Japanese 72 jpn ja language Javanese 73 jav jv - language Kalaallisut, Greenlandic 74 kal kl + language Kalaallisut, Greenlandic 74 kal kl Kalaallisut Greenlandic language Kannada 75 kan kn language Kanuri 76 kau kr language Kashmiri 77 kas ks language Kazakh 78 kaz kk language Khmer 79 khm km - language Kikuyu, Gikuyu 80 kik ki + language Kikuyu, Gikuyu 80 kik ki Kikuyu Gikuyu language Kinyarwanda 81 kin rw language Kyrgyz 82 kir ky Kirghiz language Komi 83 kom kv language Kongo 84 kon kg language Korean 85 kor ko language Kurdish 86 kur ku - language Kwanyama, Kuanyama 87 kua kj + language Kwanyama, Kuanyama 87 kua kj Kwanyama Kuanyama language Latin 88 lat la - language Luxembourgish, Letzeburgesch 89 ltz lb + language Luxembourgish, Letzeburgesch 89 ltz lb Luxembourgish Letzeburgesch language Ganda 90 lug lg - language Limburgish, Limburgan, Limburger 91 lim li + language Limburgish, Limburgan, Limburger 91 lim li Limburgish Limburgan Limburger language Lingala 92 lin ln language Lao 93 lao lo language Lithuanian 94 lit lt @@ -247,7 +247,7 @@ language Mixtepec Mixtec 106 mix language Mongolian 107 mon mn language Nauru 108 nau na - language Navajo, Navaho 109 nav nv + language Navajo, Navaho 109 nav nv Navajo Navaho language Northern Ndebele 110 nde nd language Nepali 111 nep ne language Ndonga 112 ndo ng @@ -257,16 +257,16 @@ language Nuosu 116 language Southern Ndebele 117 nbl nr language Occitan 118 oci oc - language Ojibwe, Ojibwa 119 oji oj + language Ojibwe, Ojibwa 119 oji oj Ojibwe Ojibwa language Old Church Slavonic,Church Slavonic,Old Bulgarian 120 chu cu language Oromo 121 orm om language Oriya 122 ori or - language Ossetian, Ossetic 123 oss os - language Panjabi, Punjabi 124 pan pa + language Ossetian, Ossetic 123 oss os Ossetian Ossetic + language Panjabi, Punjabi 124 pan pa Panjabi Punjabi language Pāli 125 pli pi language Persian (Farsi) 126 per fas fa language Polish 127 pol pl - language Pashto, Pushto 128 pus ps + language Pashto, Pushto 128 pus ps Pashto Pushto language Portuguese 129 por pt language Quechua 130 que qu language Romansh 131 roh rm @@ -280,14 +280,14 @@ language Samoan 139 smo sm language Sango 140 sag sg language Serbian 141 srp scc sr - language Scottish Gaelic, Gaelic 142 gla gd + language Scottish Gaelic, Gaelic 142 gla gd Scottish Gaelic Gaelic language Shona 143 sna sn - language Sinhala, Sinhalese 144 sin si + language Sinhala, Sinhalese 144 sin si Sinhala Sinhalese language Slovak 145 slk slo sk language Slovene 146 slv sl Slovenian language Somali 147 som so language Southern Sotho 148 sot st - language Spanish, Castilian 149 spa es + language Spanish, Castilian 149 spa es Spanish Castilian language Sundanese 150 sun su language Swahili 151 swa sw language Swati 152 ssw ss @@ -297,17 +297,17 @@ language Tajik 156 tgk tg language Thai 157 tha th language Tigrinya 158 tir ti - language Tibetan Standard, Tibetan, Central 159 tib bod bo + language Tibetan Standard, Tibetan, Central 159 tib bod bo Tibetan Standard Tibetan Central language Turkmen 160 tuk tk language Tagalog 161 tgl tl language Tswana 162 tsn tn - language Tonga (Tonga Islands) 163 ton to + language Tonga (Tonga Islands) 163 ton to Tonga language Turkish 164 tur tr language Tsonga 165 tso ts language Tatar 166 tat tt language Twi 167 twi tw language Tahitian 168 tah ty - language Uyghur, Uighur 169 uig ug + language Uyghur, Uighur 169 uig ug Uyghur Uighur language Ukrainian 170 ukr uk language Urdu 171 urd ur language Uzbek 172 uzb uz @@ -321,6 +321,6 @@ language Xhosa 180 xho xh language Yiddish 181 yid yi language Yoruba 182 yor yo - language Zhuang, Chuang 183 zha za - language Zulu 184 zul zu + language Zhuang, Chuang 183 zha za Zhuang Chuang + language Zulu 184 zul zu language Not applicable 185 From 63244776c5e25487eadd166aec19b81cf35323c4 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 10 Apr 2024 17:58:22 -0400 Subject: [PATCH 37/81] Fixed the tabs in the entries for Catalan and Haitian Creole; added the ISO 639-1 and -2 codes for Nuosu. #8243 --- scripts/api/data/metadatablocks/citation.tsv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index 613854b677c..a86c566bf71 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -163,7 +163,7 @@ language Breton 22 bre br language Bulgarian 23 bul bg language Burmese 24 mya bur my - language Catalan,Valencian 25 cat ca Catalan Valencian + language Catalan,Valencian 25 cat ca Catalan Valencian language Chamorro 26 cha ch language Chechen 27 che ce language Chichewa, Chewa, Nyanja 28 nya ny Chichewa Chewa Nyanja @@ -193,7 +193,7 @@ language Greek (modern) 52 gre ell el Greek language Guaraní 53 grn gn language Gujarati 54 guj gu - language Haitian, Haitian Creole 55 hat ht Haitian Haitian Creole + language Haitian, Haitian Creole 55 hat ht Haitian Haitian Creole language Hausa 56 hau ha language Hebrew (modern) 57 heb he language Herero 58 her hz @@ -254,7 +254,7 @@ language Norwegian Bokmål 113 nob nb language Norwegian Nynorsk 114 nno nn language Norwegian 115 nor no - language Nuosu 116 + language Nuosu 116 iii ii language Southern Ndebele 117 nbl nr language Occitan 118 oci oc language Ojibwe, Ojibwa 119 oji oj Ojibwe Ojibwa From 091ad2187bdd3f562151073bef706c1b753758c6 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 10 Apr 2024 18:01:51 -0400 Subject: [PATCH 38/81] One extra alternative name for Nuosu (from the ISO 893-2 articles on Wikipedia) #8243 --- scripts/api/data/metadatablocks/citation.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index a86c566bf71..a7befb8c773 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -254,7 +254,7 @@ language Norwegian Bokmål 113 nob nb language Norwegian Nynorsk 114 nno nn language Norwegian 115 nor no - language Nuosu 116 iii ii + language Nuosu 116 iii ii Sichuan Yi language Southern Ndebele 117 nbl nr language Occitan 118 oci oc language Ojibwe, Ojibwa 119 oji oj Ojibwe Ojibwa From d7a727fa977394b9f79794852ea216b5f74423d9 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 10 Apr 2024 19:08:39 -0400 Subject: [PATCH 39/81] a few minor cosmetic improvements (rearranging the 3 letter codes in the order in which they are listed in the current ISO 639-3 table) #8243 --- scripts/api/data/metadatablocks/citation.tsv | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index a7befb8c773..35cac820f7f 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -190,7 +190,7 @@ language Galician 49 glg gl language Georgian 50 kat geo ka language German 51 deu ger de - language Greek (modern) 52 gre ell el Greek + language Greek (modern) 52 ell gre el Greek language Guaraní 53 grn gn language Gujarati 54 guj gu language Haitian, Haitian Creole 55 hat ht Haitian Haitian Creole @@ -238,10 +238,10 @@ language Manx 97 glv gv language Macedonian 98 mkd mac mk language Malagasy 99 mlg mg - language Malay 100 may msa ms + language Malay 100 msa may ms language Malayalam 101 mal ml language Maltese 102 mlt mt - language Māori 103 mao mri mi + language Māori 103 mri mao mi Maori language Marathi (Marāṭhī) 104 mar mr language Marshallese 105 mah mh language Mixtepec Mixtec 106 mix @@ -264,7 +264,7 @@ language Ossetian, Ossetic 123 oss os Ossetian Ossetic language Panjabi, Punjabi 124 pan pa Panjabi Punjabi language Pāli 125 pli pi - language Persian (Farsi) 126 per fas fa + language Persian (Farsi) 126 fas per fa language Polish 127 pol pl language Pashto, Pushto 128 pus ps Pashto Pushto language Portuguese 129 por pt @@ -297,7 +297,7 @@ language Tajik 156 tgk tg language Thai 157 tha th language Tigrinya 158 tir ti - language Tibetan Standard, Tibetan, Central 159 tib bod bo Tibetan Standard Tibetan Central + language Tibetan Standard, Tibetan, Central 159 bod tib bo Tibetan Standard Tibetan Central language Turkmen 160 tuk tk language Tagalog 161 tgl tl language Tswana 162 tsn tn From 5e0c73fd5d620ebc67315d7625a1f5b30f2452c3 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 11 Apr 2024 12:42:21 -0400 Subject: [PATCH 40/81] update MIT license and add guidance #10425 #8512 --- doc/release-notes/10425-add-MIT-License.md | 4 +++- .../source/installation/config.rst | 19 ++++++++++++++++++- scripts/api/data/licenses/licenseMIT.json | 6 +++--- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/doc/release-notes/10425-add-MIT-License.md b/doc/release-notes/10425-add-MIT-License.md index 2e468fefbe0..95d6fb38ded 100644 --- a/doc/release-notes/10425-add-MIT-License.md +++ b/doc/release-notes/10425-add-MIT-License.md @@ -1 +1,3 @@ -A new file has been added to import the MIT License to Dataverse on licenseMIT.json \ No newline at end of file +A new file has been added to import the MIT License to Dataverse: licenseMIT.json. + +Documentation has been added to explain the procedure for adding new licenses to the guides. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index b5bce6b870d..4e08ee6c845 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1789,10 +1789,27 @@ JSON files for `Creative Commons licenses ` +Contributing to the Collection of Standard Licenses Above +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you do not find the license JSON you need above, you are encouraged to contribute it to this documentation. Following the Dataverse 6.2 release, we have standardized on the following procedure: + +- Look for the license at https://spdx.org/licenses/ +- ``cd scripts/api/data/licenses`` +- Copy an existing license as a starting point. +- Name your file using the SPDX identifier. For example, if the identifier is ``MIT``, you should name your file ``licenseMIT.json``. +- For the ``name`` field, use the "short identifier" from the SPDX landing page (e.g. ``MIT``). +- For the ``description`` field, use the "full name" from the SPDX landing page (e.g. ``MIT License``). +- For the ``uri`` field, go to the SPDX landing page for the license and click on the link under "other web pages for this license". Let any redirection happen and then copy the URL (e.g. ``https://opensource.org/license/mit``) into the ``uri`` field. +- For the ``active`` field, put ``true``. +- For the ``sortOrder`` field, put the next sequention number after checking previous files with ``grep sortOrder scripts/api/data/licenses/*``. + +Note that prior to Dataverse 6.2, various license above have been added that do not adhere perfectly with this procedure. For example, the ``name`` for the CC0 license is ``CC0 1.0`` (no dash) rather than ``CC0-1.0`` (with a dash). We are keeping the existing names for backward compatibility. For more on standarizing license configuration, see https://github.com/IQSS/dataverse/issues/8512 + Adding Custom Licenses ^^^^^^^^^^^^^^^^^^^^^^ diff --git a/scripts/api/data/licenses/licenseMIT.json b/scripts/api/data/licenses/licenseMIT.json index b02d7d39994..8a5ea485201 100644 --- a/scripts/api/data/licenses/licenseMIT.json +++ b/scripts/api/data/licenses/licenseMIT.json @@ -1,7 +1,7 @@ { - "name": "MIT License", - "uri": "https://spdx.org/licenses/MIT.html", - "shortDescription": "Massachusetts Institute of Technology License (MIT).", + "name": "MIT", + "uri": "https://opensource.org/license/mit", + "shortDescription": "MIT License", "active": true, "sortOrder": 8 } From 8072d91fb115fc916833e5a570ed72d7122d1a29 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 12 Apr 2024 13:12:57 -0400 Subject: [PATCH 41/81] a release note. #8243 --- .../8243-improve-language-controlled-vocab.md | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 doc/release-notes/8243-improve-language-controlled-vocab.md diff --git a/doc/release-notes/8243-improve-language-controlled-vocab.md b/doc/release-notes/8243-improve-language-controlled-vocab.md new file mode 100644 index 00000000000..15b2b46c02d --- /dev/null +++ b/doc/release-notes/8243-improve-language-controlled-vocab.md @@ -0,0 +1,11 @@ +The Controlled Vocabuary Values list for the metadata field Language in the Citation block has been improved, with some missing two- and three-letter ISO 639 codes added, as well as more alternative names for some of the languages, making all these extra language identifiers importable. + +To be added to the 6.3 release instructions: + +Update the Citation block, to incorporate the improved controlled vocabulary for language [plus whatever other improvements may be made to the block in other PRs]: + +``` +wget https://raw.githubusercontent.com/IQSS/dataverse/v6.3/scripts/api/data/metadatablocks/citation.tsv +curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file citation.tsv +``` + From 7cd778983d87356a4d2698470ba5c7b422744194 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 18 Apr 2024 14:53:20 -0400 Subject: [PATCH 42/81] rename release note snippet with "8936" #8936 --- ...s-in-sitemap.md => 8936-more-than-50000-entries-in-sitemap.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename doc/release-notes/{8983-more-than-50000-entries-in-sitemap.md => 8936-more-than-50000-entries-in-sitemap.md} (100%) diff --git a/doc/release-notes/8983-more-than-50000-entries-in-sitemap.md b/doc/release-notes/8936-more-than-50000-entries-in-sitemap.md similarity index 100% rename from doc/release-notes/8983-more-than-50000-entries-in-sitemap.md rename to doc/release-notes/8936-more-than-50000-entries-in-sitemap.md From ceb8c0f89009a6d6da147ae9553f37f4f47be081 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 18 Apr 2024 15:01:12 -0400 Subject: [PATCH 43/81] simplify release note, add upgrade section #8936 --- .../8936-more-than-50000-entries-in-sitemap.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/release-notes/8936-more-than-50000-entries-in-sitemap.md b/doc/release-notes/8936-more-than-50000-entries-in-sitemap.md index 6fcf3180283..7b367e328c1 100644 --- a/doc/release-notes/8936-more-than-50000-entries-in-sitemap.md +++ b/doc/release-notes/8936-more-than-50000-entries-in-sitemap.md @@ -1,7 +1,11 @@ -The sitemap file generation can handle more than 50,000 entries if needed with the [sitemapgen4j](https://github.com/gdcc/sitemapgen4j) library, maintained by the Global Dataverse Community Consortium. +Dataverse can now handle more than 50,000 items when generating sitemap files, splitting the content across multiple files to comply with the Sitemap protocol. -In this case, the Dataverse Admin API `api/admin/sitemap` create a sitemap index file, called `sitemap_index.xml`, in place of the single `sitemap.xml` file. This created file reference multiples simple sitemap file, named ``sitemap1.xml``, ``sitemap2.xml``, etc. This referenced files will be as many files as necessary to contain the URLs of dataverses and datasets presents your installation, while respecting the limit of 50,000 URLs per file. See the [config section of the Installation Guide](https://guides.dataverse.org/en/latest/installation/config.html#creating-a-sitemap-and-submitting-it-to-search-engines) for details. +For details see https://dataverse-guide--10321.org.readthedocs.build/en/10321/installation/config.html#creating-a-sitemap-and-submitting-it-to-search-engines #8936 and #10321. -A HTML preview can be found [here](https://dataverse-guide--10321.org.readthedocs.build/en/10321/installation/config.html#creating-a-sitemap-and-submitting-it-to-search-engines). +## Upgrade instructions -For more information, see [#8936](https://github.com/IQSS/dataverse/issues/8936). +If your installation has more than 50,000 entries, you should re-submit your sitemap URL to Google or other search engines. The file in the URL will change from ``sitemap.xml`` to ``sitemap_index.xml``. + +As explained at https://dataverse-guide--10321.org.readthedocs.build/en/10321/installation/config.html#creating-a-sitemap-and-submitting-it-to-search-engines this is the command for regenerating your sitemap: + +`curl -X POST http://localhost:8080/api/admin/sitemap` From b228fe7ee5572fca6ada2c646b2f81a696f62170 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 18 Apr 2024 15:01:36 -0400 Subject: [PATCH 44/81] rewrite sitemap docs (50,000 items now supported) #8936 --- .../source/installation/config.rst | 38 +++++++++++-------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index bd185e2d008..e4ff65f059e 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2052,39 +2052,47 @@ If you are not fronting Payara with Apache you'll need to prevent Payara from se Creating a Sitemap and Submitting it to Search Engines ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Sitemap file -############ +Creating a Sitemap +################## -Search engines have an easier time indexing content when you provide them a sitemap. The Dataverse Software sitemap includes URLs to all published Dataverse collections and all published datasets that are not harvested or deaccessioned. +Search engines have an easier time indexing content when you provide them a sitemap. Dataverse can generate a sitemap that includes URLs to all published collections and all published datasets that are not harvested or deaccessioned. Create or update your sitemap by adding the following curl command to cron to run nightly or as you see fit: ``curl -X POST http://localhost:8080/api/admin/sitemap`` -This will create or update a file in the following location unless you have customized your installation directory for Payara: +On a Dataverse installation with many datasets, the creation or updating of the sitemap can take a while. You can check Payara's server.log file for "BEGIN updateSiteMap" and "END updateSiteMap" lines to know when the process started and stopped and any errors in between. + +For compliance with the `Sitemap protocol `_, the generated sitemap will be a single file with 50,000 items or fewer or it will be split into multiple files. + +Single Sitemap File +################### + +If you have 50,000 items or fewer, a single sitemap will be generated in the following location (unless you have customized your installation directory for Payara): ``/usr/local/payara6/glassfish/domains/domain1/docroot/sitemap/sitemap.xml`` -On Dataverse installation with many datasets, the creation or updating of the sitemap can take a while. You can check Payara's server.log file for "BEGIN updateSiteMap" and "END updateSiteMap" lines to know when the process started and stopped and any errors in between. +Once the sitemap has been generated in the location above, it will be served at ``/sitemap.xml`` like this: https://demo.dataverse.org/sitemap.xml -https://demo.dataverse.org/sitemap.xml is the sitemap URL for the Dataverse Project Demo site and yours should be similar. +Multiple Sitemap Files (Sitemap Index File) +########################################### -Once the sitemap has been generated and placed in the domain docroot directory, it will become available to the outside callers at /sitemap/sitemap.xml; it will also be accessible at /sitemap.xml (via a *pretty-faces* rewrite rule). Some search engines will be able to find it at this default location. Some, **including Google**, need to be **specifically instructed** to retrieve it. +According to the `Sitemaps.org protocol `_, a sitemap file must have no more than 50,000 URLs and must be no larger than 50MiB. In this case, the protocol instructs you to create a sitemap index file called ``sitemap_index.xml`` (instead of ``sitemap.xml``), which references multiple sitemap files. In this case, the created files containing the URLs will be named ``sitemap1.xml``, ``sitemap2.xml``, etc. The referenced files are also generated in the same place as other sitemap files and there will be as many files as necessary to contain the URLs of collections and datasets present in your installation, while respecting the limit of 50,000 URLs per file. Dataverse will automatically detect whether you need to create a single ``sitemap.xml`` file or several files and generate them for you. However, when submitting your sitemap file to Google or other search engines as described below, you must be careful to use the correct file name corresponding to your situation. -One way to submit your sitemap URL to Google is by using their "Search Console" (https://search.google.com/search-console). In order to use the console, you will need to authenticate yourself as the owner of your Dataverse site. Various authentication methods are provided; but if you are already using Google Analytics, the easiest way is to use that account. Make sure you are logged in on Google with the account that has the edit permission on your Google Analytics property; go to the search console and enter the root URL of your Dataverse installation, then choose Google Analytics as the authentication method. Once logged in, click on "Sitemaps" in the menu on the left. (todo: add a screenshot?) Consult `Google's "submit a sitemap" instructions`_ for more information; and/or similar instructions for other search engines. +If you have over 50,000 items, a sitemap index file will be generated in the following location (unless you have customized your installation directory for Payara): -.. _Google's "submit a sitemap" instructions: https://support.google.com/webmasters/answer/183668 +``/usr/local/payara6/glassfish/domains/domain1/docroot/sitemap/sitemap_index.xml`` -Sitemap index file -################## +Once the sitemap has been generated in the location above, it will be served at ``/sitemap_index.xml`` like this: https://demo.dataverse.org/sitemap_index.xml -According to `Sitemaps.org protocol `_, a sitemap file must have no more than 50,000 URLs and must be no larger than 50MiB. In this case, the protocol instructs you to create a sitemap index file called ``sitemap_index.xml`` (instead of ``sitemap.xml``), which references multiples sitemap files. In this case, the created files containing the URLs will be named ``sitemap1.xml``, ``sitemap2.xml``, etc. This referenced files are also generated in the same place as other sitemap files and there will be as many files as necessary to contain the URLs of dataverses and datasets presents your installation, while respecting the limit of 50,000 URLs per file. Dataverse will automatically detect whether you need to create a single ``sitemap.xml`` file, or several files. However, you must be careful to use the correct file name corresponding on your situation. +Submitting Your Sitemap to Search Engines +######################################### -If there are more than 50,000 dataverses and datasets, the sitemap file created or updated will default to the location: +Some search engines will be able to find your sitemap file at ``/sitemap.xml`` or ``sitemap_index.xml``, but others, **including Google**, need to be **specifically instructed** to retrieve it. -``/usr/local/payara6/glassfish/domains/domain1/docroot/sitemap/sitemap_index.xml`` +One way to submit your sitemap URL to Google is by using their "Search Console" (https://search.google.com/search-console). In order to use the console, you will need to authenticate yourself as the owner of your Dataverse site. Various authentication methods are provided; but if you are already using Google Analytics, the easiest way is to use that account. Make sure you are logged in on Google with the account that has the edit permission on your Google Analytics property; go to the Search Console and enter the root URL of your Dataverse installation, then choose Google Analytics as the authentication method. Once logged in, click on "Sitemaps" in the menu on the left. Consult `Google's "submit a sitemap" instructions`_ for more information. -Moreover, it can also be accessed at ``/sitemap/sitemap_index.xml`` or ``/sitemap_index.xml``. In case of "Google Search Console" is used to submit the sitemap file, one of the previous URLs have to be used with the ``sitemap_index.xml`` file name. +.. _Google's "submit a sitemap" instructions: https://support.google.com/webmasters/answer/183668 Putting Your Dataverse Installation on the Map at dataverse.org +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ From 527e6e06198e92acc22181df0602c4dde3f11115 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Fri, 19 Apr 2024 10:37:06 -0400 Subject: [PATCH 45/81] URI change --- scripts/api/data/licenses/license-Apache-2.0.json | 8 ++++++++ scripts/api/data/licenses/licenseMIT.json | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 scripts/api/data/licenses/license-Apache-2.0.json diff --git a/scripts/api/data/licenses/license-Apache-2.0.json b/scripts/api/data/licenses/license-Apache-2.0.json new file mode 100644 index 00000000000..0fdd071ea6e --- /dev/null +++ b/scripts/api/data/licenses/license-Apache-2.0.json @@ -0,0 +1,8 @@ +{ + "name": "Apache-2.0", + "uri": "https://spdx.org/licenses/Apache-2.0", + "shortDescription": "Apache License 2.0", + "active": true, + "sortOrder": 9 + } + \ No newline at end of file diff --git a/scripts/api/data/licenses/licenseMIT.json b/scripts/api/data/licenses/licenseMIT.json index 8a5ea485201..d5315d42a28 100644 --- a/scripts/api/data/licenses/licenseMIT.json +++ b/scripts/api/data/licenses/licenseMIT.json @@ -1,6 +1,6 @@ { "name": "MIT", - "uri": "https://opensource.org/license/mit", + "uri": "https://spdx.org/licenses/MIT", "shortDescription": "MIT License", "active": true, "sortOrder": 8 From 5734dd198c452ab356de574aced3409354f71ac2 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Fri, 19 Apr 2024 14:20:24 -0400 Subject: [PATCH 46/81] Update to docs --- doc/sphinx-guides/source/installation/config.rst | 9 +++++---- .../{license-Apache-2.0.json => licenseApache-2.0.json} | 0 2 files changed, 5 insertions(+), 4 deletions(-) rename scripts/api/data/licenses/{license-Apache-2.0.json => licenseApache-2.0.json} (100%) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 4e08ee6c845..fac23d49225 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1792,6 +1792,7 @@ Adding Software Licenses JSON files for software licenses are provided below. - :download:`licenseMIT.json <../../../../scripts/api/data/licenses/licenseMIT.json>` +- :download:`licenseApache-2.0.json <../../../../scripts/api/data/licenses/licenseApache-2.0.json>` Contributing to the Collection of Standard Licenses Above ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1801,10 +1802,10 @@ If you do not find the license JSON you need above, you are encouraged to contri - Look for the license at https://spdx.org/licenses/ - ``cd scripts/api/data/licenses`` - Copy an existing license as a starting point. -- Name your file using the SPDX identifier. For example, if the identifier is ``MIT``, you should name your file ``licenseMIT.json``. -- For the ``name`` field, use the "short identifier" from the SPDX landing page (e.g. ``MIT``). -- For the ``description`` field, use the "full name" from the SPDX landing page (e.g. ``MIT License``). -- For the ``uri`` field, go to the SPDX landing page for the license and click on the link under "other web pages for this license". Let any redirection happen and then copy the URL (e.g. ``https://opensource.org/license/mit``) into the ``uri`` field. +- Name your file using the SPDX identifier. For example, if the identifier is ``Apache-2.0``, you should name your file ``licenseApache-2.0.json``. +- For the ``name`` field, use the "short identifier" from the SPDX landing page (e.g. ``Apache-2.0``). +- For the ``description`` field, use the "full name" from the SPDX landing page (e.g. ``Apache License 2.0``). +- For the ``uri`` field, go to the SPDX landing page for the license and remove the extension ``.html`` from your browser and then copy the URL (e.g. ``https://spdx.org/licenses/Apache-2.0``) into the ``uri`` field. - For the ``active`` field, put ``true``. - For the ``sortOrder`` field, put the next sequention number after checking previous files with ``grep sortOrder scripts/api/data/licenses/*``. diff --git a/scripts/api/data/licenses/license-Apache-2.0.json b/scripts/api/data/licenses/licenseApache-2.0.json similarity index 100% rename from scripts/api/data/licenses/license-Apache-2.0.json rename to scripts/api/data/licenses/licenseApache-2.0.json From f071658df01ba5dc5fcc8820ff4e9987bf85fd4d Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 22 Apr 2024 16:18:52 +0200 Subject: [PATCH 47/81] removed unneeded ro-crate test file --- .../edu/harvard/iq/dataverse/util/FileUtilTest.java | 12 ++++-------- src/test/resources/fileutil/ro-crate-metadata.json | 1 - 2 files changed, 4 insertions(+), 9 deletions(-) delete mode 100644 src/test/resources/fileutil/ro-crate-metadata.json diff --git a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java index bf63b6d39b3..29a7ae9934e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java @@ -390,14 +390,10 @@ public void testDetermineFileTypeROCrate() { assertEquals("Metadata", FileUtil.getIndexableFacetFileType(rocrate)); final File roCrateFile = new File("src/test/resources/fileutil/ro-crate-metadata.json"); - if (roCrateFile.exists()) { - try { - assertEquals(roCrateContentType, FileUtil.determineFileType(roCrateFile, "ro-crate-metadata.json")); - } catch (IOException ex) { - Logger.getLogger(FileUtilTest.class.getName()).log(Level.SEVERE, null, ex); - } - } else { - fail("File does not exist: " + roCrateFile.toPath().toString()); + try { + assertEquals(roCrateContentType, FileUtil.determineFileType(roCrateFile, "ro-crate-metadata.json")); + } catch (IOException ex) { + fail(ex); } // test ";" removal diff --git a/src/test/resources/fileutil/ro-crate-metadata.json b/src/test/resources/fileutil/ro-crate-metadata.json deleted file mode 100644 index 5ac4b24ad2b..00000000000 --- a/src/test/resources/fileutil/ro-crate-metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"@context":["https://w3id.org/ro/crate/1.1/context",{"metadataContext":"https://language-archives.services/metadata-definitions#metadataContext","orthographicNotes":"https://language-archives.services/metadata-definitions#orthographicNotes","media":"https://language-archives.services/metadata-definitions#media","comments":"https://language-archives.services/metadata-definitions#comments","private":"https://language-archives.services/metadata-definitions#private","depositFormReceived":"https://language-archives.services/metadata-definitions#depositFormReceived","subjectLanguages":"https://language-archives.services/metadata-definitions#subjectLanguages","originatedOn":"https://language-archives.services/metadata-definitions#originatedOn","languageAsGiven":"https://language-archives.services/metadata-definitions#languageAsGiven","contentLanguages":"https://language-archives.services/metadata-definitions#contentLanguages","dialect":"https://language-archives.services/metadata-definitions#dialect","discourseType":"https://language-archives.services/metadata-definitions#discourseType","bornDigital":"https://language-archives.services/metadata-definitions#bornDigital","accessNarrative":"https://language-archives.services/metadata-definitions#accessNarrative","receivedOn":"https://language-archives.services/metadata-definitions#receivedOn","digitisedOn":"https://language-archives.services/metadata-definitions#digitisedOn","ingestNotes":"https://language-archives.services/metadata-definitions#ingestNotes","adminComment":"https://language-archives.services/metadata-definitions#adminComment","external":"https://language-archives.services/metadata-definitions#external","dataType":"https://language-archives.services/metadata-definitions#dataType","citeAs":"https://language-archives.services/metadata-definitions#citeAs","originatedOnNarrative":"https://language-archives.services/metadata-definitions#originatedOnNarrative","countries":"https://language-archives.services/metadata-definitions#countries","tapesReturned":"https://language-archives.services/metadata-definitions#tapesReturned","originalMedia":"https://language-archives.services/metadata-definitions#originalMedia","metadataExportable":"https://language-archives.services/metadata-definitions#metadataExportable","metadataImportedOn":"https://language-archives.services/metadata-definitions#metadataImportedOn","metadataExportedOn":"https://language-archives.services/metadata-definitions#metadataExportedOn","tracking":"https://language-archives.services/metadata-definitions#tracking","fieldsOfResearch":"https://language-archives.services/metadata-definitions#fieldsOfResearch","role":"https://language-archives.services/metadata-definitions#role","doi":"https://language-archives.services/metadata-definitions#doi","sampleRate":"https://language-archives.services/metadata-definitions#sampleRate","channels":"https://language-archives.services/metadata-definitions#channels","fps":"https://language-archives.services/metadata-definitions#fps","essenceId":"https://language-archives.services/metadata-definitions#essenceId"}],"@graph":[{"@id":"#Erakor village","@type":"Place","name":"Erakor village"},{"@id":"#country_Australia","@type":"Country","name":"Australia"},{"@id":"#country_Vanuatu","@type":"Country","code":"VU","name":"Vanuatu"},{"@id":"#country_null","@type":"Country"},{"@id":"#geo_166.427,-22.283,166.467,-22.241","@type":"GeoShape","box":"166.427,-22.283 166.467,-22.241"},{"@id":"#geo_168.159,-17.83,168.594,-17.585","@type":"GeoShape","box":"168.159,-17.83 168.594,-17.585"},{"@id":"#geo_168.217,-17.8235,168.317,-17.7235","@type":"GeoShape","box":"168.217,-17.8235 168.317,-17.7235"},{"@id":"#identifier_collectionId","@type":"PropertyValue","name":"collectionIdentifier","value":"NT1"},{"@id":"#identifier_doi","@type":"PropertyValue","name":"doi","value":"10.4225/72/56F94A61DA9EC"},{"@id":"#identifier_domain","@type":"PropertyValue","name":"domain","value":"paradisec.org.au"},{"@id":"#identifier_hashid","@type":"PropertyValue","name":"hashId","value":"72b3dc1401c8ff06aacba0990a128fc113cf9ad5275f494b05c1142177356561bd7f4c0e8800bade2cbbbed75f6d9d019894735ad7e40762684d243a442d658a"},{"@id":"#identifier_id","@type":"PropertyValue","name":"id","value":"/paradisec.org.au/NT1/98007"},{"@id":"#identifier_itemId","@type":"PropertyValue","name":"itemIdentifier","value":"98007"},{"@id":"#language_bis","@type":"Language","code":"bis","location":{"@id":"#geo_166.427,-22.283,166.467,-22.241"},"name":"Bislama"},{"@id":"#language_erk","@type":"Language","code":"erk","location":{"@id":"#geo_168.159,-17.83,168.594,-17.585"},"name":"Efate, South"},{"@id":"#place_geo_168.217,-17.8235,168.317,-17.7235","@type":"Place","geo":{"@id":"#geo_168.217,-17.8235,168.317,-17.7235"}},{"@id":"./","@type":["Dataset","RepositoryObject"],"additionalType":"item","contentLocation":[{"@id":"#Erakor village"},{"@id":"#place_geo_168.217,-17.8235,168.317,-17.7235"}],"contributor":[{"@id":"http://nla.gov.au/nla.party-479603"},{"@id":"Kalsarap Namaf"},{"@id":"Iokopeth null"},{"@id":"John Maklen"},{"@id":"Waia Tenene"}],"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2018-05-17T04:13:04.000Z","description":"NT1-98007. Text #047 (speaker is John Maklen. Text title: History of villages before Erakor); Text #048 (speaker is John Maklen. Text title: Mantu the flying fox and Erromango); Text #049. Text title: Asaraf (speaker is John Maklen);Text #050. Text title: Mumu and Kotkot (speaker is John Maklen); Text #051. Text title: Natopu ni Erakor—the spirit who lives at Erakor (speaker is John Maklen);Text #038. Text title: The need for respect (speaker is Iokopeth) Stories can be seen at NT8-TEXT. There are time-aligned transcripts of this item and handwritten transcripts by Manuel Wayane scanned as jpg files.","hasPart":[{"@id":"NT1-98007-001.jpg"},{"@id":"NT1-98007-002.jpg"},{"@id":"NT1-98007-003.jpg"},{"@id":"NT1-98007-004.jpg"},{"@id":"NT1-98007-005.jpg"},{"@id":"NT1-98007-006.jpg"},{"@id":"NT1-98007-007.jpg"},{"@id":"NT1-98007-008.jpg"},{"@id":"NT1-98007-009.jpg"},{"@id":"NT1-98007-010.jpg"},{"@id":"NT1-98007-011.jpg"},{"@id":"NT1-98007-012.jpg"},{"@id":"NT1-98007-013.jpg"},{"@id":"NT1-98007-014.jpg"},{"@id":"NT1-98007-015.jpg"},{"@id":"NT1-98007-016.jpg"},{"@id":"NT1-98007-017.jpg"},{"@id":"NT1-98007-018.jpg"},{"@id":"NT1-98007-019.jpg"},{"@id":"NT1-98007-020.jpg"},{"@id":"NT1-98007-021.jpg"},{"@id":"NT1-98007-022.jpg"},{"@id":"NT1-98007-023.jpg"},{"@id":"NT1-98007-024.jpg"},{"@id":"NT1-98007-025.jpg"},{"@id":"NT1-98007-026.jpg"},{"@id":"NT1-98007-027.jpg"},{"@id":"NT1-98007-028.jpg"},{"@id":"NT1-98007-029.jpg"},{"@id":"NT1-98007-030.jpg"},{"@id":"NT1-98007-031.jpg"},{"@id":"NT1-98007-98007A.mp3"},{"@id":"NT1-98007-98007A.wav"},{"@id":"NT1-98007-98007B.mp3"},{"@id":"NT1-98007-98007B.wav"},{"@id":"NT1-98007-98007az.xml"},{"@id":"NT1-98007-A.tab"},{"@id":"NT1-98007-A.xml"},{"@id":"NT1-98007-B.tab"},{"@id":"NT1-98007-B.xml"},{"@id":"NT1-98007-98007A.ixt"},{"@id":"NT1-98007-98007A.trs"},{"@id":"NT1-98007-98007A.flextext"},{"@id":"NT1-98007-98007A.eaf"},{"@id":"NT1-98007-98007B.eaf"}],"identifier":[{"@id":"#identifier_domain"},{"@id":"#identifier_id"},{"@id":"#identifier_hashid"},{"@id":"#identifier_itemId"},{"@id":"#identifier_collectionId"},{"@id":"#identifier_doi"}],"license":{"@id":"_:b0"},"memberOf":{"@id":"/paradisec.org.au/NT1"},"name":"Recordings in South Efate","publisher":{"@id":"http://nla.gov.au/nla.party-593909"},"bornDigital":0,"contentLanguages":[{"@id":"#language_bis"},{"@id":"#language_erk"}],"countries":{"@id":"#country_Vanuatu"},"digitisedOn":"Sun Dec 31 2000 13:00:00 GMT+0000 (Coordinated Universal Time)","external":0,"languageAsGiven":"Nafsan","metadataExportable":1,"originalMedia":"audiocassette","originatedOn":"1998-10-03","private":0,"subjectLanguages":{"@id":"#language_erk"},"tapesReturned":0},{"@id":"Iokopeth null","@type":"Person","givenName":"Iokopeth","homeLocation":{"@id":"#country_null"},"name":"Iokopeth","role":{"@id":"role_speaker"}},{"@id":"John Maklen","@type":"Person","familyName":"Maklen","givenName":"John","homeLocation":{"@id":"#country_null"},"name":"John Maklen","role":{"@id":"role_speaker"}},{"@id":"Kalsarap Namaf","@type":"Person","familyName":"Namaf","givenName":"Kalsarap","homeLocation":{"@id":"#country_null"},"name":"Kalsarap Namaf","role":{"@id":"role_speaker"}},{"@id":"NT1-98007-001.jpg","@type":"File","contentSize":1658368,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:31.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-001.jpg","doi":"10.4225/72/575C8A369D680","essenceId":1010709},{"@id":"NT1-98007-002.jpg","@type":"File","contentSize":1816576,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:37.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-002.jpg","doi":"10.4225/72/575C8A3C15C98","essenceId":1010710},{"@id":"NT1-98007-003.jpg","@type":"File","contentSize":1811968,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:43.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-003.jpg","doi":"10.4225/72/575C8A41DD973","essenceId":1010711},{"@id":"NT1-98007-004.jpg","@type":"File","contentSize":1827840,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:48.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-004.jpg","doi":"10.4225/72/575C8A4767685","essenceId":1010712},{"@id":"NT1-98007-005.jpg","@type":"File","contentSize":1853440,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:54.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-005.jpg","doi":"10.4225/72/575C8A4CD8971","essenceId":1010713},{"@id":"NT1-98007-006.jpg","@type":"File","contentSize":1796608,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:59.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-006.jpg","doi":"10.4225/72/575C8A525C618","essenceId":1010714},{"@id":"NT1-98007-007.jpg","@type":"File","contentSize":1780224,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:05.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-007.jpg","doi":"10.4225/72/575C8A5810189","essenceId":1010715},{"@id":"NT1-98007-008.jpg","@type":"File","contentSize":1737728,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:11.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-008.jpg","doi":"10.4225/72/575C8A5DB1113","essenceId":1010716},{"@id":"NT1-98007-009.jpg","@type":"File","contentSize":1781760,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:16.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-009.jpg","doi":"10.4225/72/575C8A63479C1","essenceId":1010717},{"@id":"NT1-98007-010.jpg","@type":"File","contentSize":1797632,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:22.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-010.jpg","doi":"10.4225/72/575C8A68B23D2","essenceId":1010718},{"@id":"NT1-98007-011.jpg","@type":"File","contentSize":1800704,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:28.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-011.jpg","doi":"10.4225/72/575C8A6E73D01","essenceId":1010719},{"@id":"NT1-98007-012.jpg","@type":"File","contentSize":1822720,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:33.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-012.jpg","doi":"10.4225/72/575C8A742DE00","essenceId":1010720},{"@id":"NT1-98007-013.jpg","@type":"File","contentSize":1809920,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:39.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-013.jpg","doi":"10.4225/72/575C8A79B1B0F","essenceId":1010721},{"@id":"NT1-98007-014.jpg","@type":"File","contentSize":1821696,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:44.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-014.jpg","doi":"10.4225/72/575C8A7F3F253","essenceId":1010722},{"@id":"NT1-98007-015.jpg","@type":"File","contentSize":1626624,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:50.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-015.jpg","doi":"10.4225/72/575C8A84C0022","essenceId":1010723},{"@id":"NT1-98007-016.jpg","@type":"File","contentSize":1633792,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:56.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-016.jpg","doi":"10.4225/72/575C8A8A9A944","essenceId":1010724},{"@id":"NT1-98007-017.jpg","@type":"File","contentSize":1870336,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:01.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-017.jpg","doi":"10.4225/72/575C8A90506E6","essenceId":1010725},{"@id":"NT1-98007-018.jpg","@type":"File","contentSize":1858560,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:07.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-018.jpg","doi":"10.4225/72/575C8A95DF462","essenceId":1010726},{"@id":"NT1-98007-019.jpg","@type":"File","contentSize":1852416,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:12.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-019.jpg","doi":"10.4225/72/575C8A9B56F2F","essenceId":1010727},{"@id":"NT1-98007-020.jpg","@type":"File","contentSize":1838080,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:18.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-020.jpg","doi":"10.4225/72/575C8AA0F09B8","essenceId":1010728},{"@id":"NT1-98007-021.jpg","@type":"File","contentSize":1861120,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:24.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-021.jpg","doi":"10.4225/72/575C8AA6B23AF","essenceId":1010729},{"@id":"NT1-98007-022.jpg","@type":"File","contentSize":1835008,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:29.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-022.jpg","doi":"10.4225/72/575C8AAC3A545","essenceId":1010730},{"@id":"NT1-98007-023.jpg","@type":"File","contentSize":1827328,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:35.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-023.jpg","doi":"10.4225/72/575C8AB1C991E","essenceId":1010731},{"@id":"NT1-98007-024.jpg","@type":"File","contentSize":1805312,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:40.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-024.jpg","doi":"10.4225/72/575C8AB74847D","essenceId":1010732},{"@id":"NT1-98007-025.jpg","@type":"File","contentSize":1912832,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:46.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-025.jpg","doi":"10.4225/72/575C8ABCD2B44","essenceId":1010733},{"@id":"NT1-98007-026.jpg","@type":"File","contentSize":1889792,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:51.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-026.jpg","doi":"10.4225/72/575C8AC26D69E","essenceId":1010734},{"@id":"NT1-98007-027.jpg","@type":"File","contentSize":1878528,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:57.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-027.jpg","doi":"10.4225/72/575C8AC7F3886","essenceId":1010735},{"@id":"NT1-98007-028.jpg","@type":"File","contentSize":1868288,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:02.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-028.jpg","doi":"10.4225/72/575C8ACD72196","essenceId":1010736},{"@id":"NT1-98007-029.jpg","@type":"File","contentSize":1859584,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:08.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-029.jpg","doi":"10.4225/72/575C8AD2E8E82","essenceId":1010737},{"@id":"NT1-98007-030.jpg","@type":"File","contentSize":1859072,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:13.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-030.jpg","doi":"10.4225/72/575C8AD8775E6","essenceId":1010738},{"@id":"NT1-98007-031.jpg","@type":"File","contentSize":1708544,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:19.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-031.jpg","doi":"10.4225/72/575C8ADDE64B8","essenceId":1010739},{"@id":"NT1-98007-98007A.eaf","@type":"File","contentSize":165674,"dateCreated":"2016-08-01T05:00:06.000Z","dateModified":"2016-08-01T16:01:41.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007A.eaf","doi":"10.4225/72/579F725FDD059","essenceId":1100487},{"@id":"NT1-98007-98007A.flextext","@type":"File","contentSize":141244,"dateCreated":"2016-05-20T04:00:06.000Z","dateModified":"2016-06-24T12:41:36.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007A.flextext","doi":"10.4225/72/576D2A7B75614","essenceId":1086277},{"@id":"NT1-98007-98007A.ixt","@type":"File","contentSize":40299,"dateCreated":"2016-04-18T07:00:07.000Z","dateModified":"2016-06-24T08:25:06.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007A.ixt","doi":"10.4225/72/576CEE5CED1FB","essenceId":1085095},{"@id":"NT1-98007-98007A.mp3","@type":"File","bitrate":128009,"contentSize":43667584,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2019-11-29T09:17:34.000Z","duration":2729.02,"encodingFormat":"audio/mpeg","name":"NT1-98007-98007A.mp3","channels":2,"doi":"10.4225/72/575C8AE370B93","essenceId":1010740,"sampleRate":44100},{"@id":"NT1-98007-98007A.trs","@type":"File","contentSize":28292,"dateCreated":"2016-04-26T10:00:06.000Z","dateModified":"2016-06-24T08:45:27.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007A.trs","doi":"10.4225/72/576CF32101764","essenceId":1085293},{"@id":"NT1-98007-98007A.wav","@type":"File","bitrate":4608000,"contentSize":1571894006,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2019-11-29T09:16:52.000Z","duration":2728.98,"encodingFormat":"audio/x-wav","name":"NT1-98007-98007A.wav","channels":2,"doi":"10.4225/72/575C8AE8E6E6B","essenceId":1010741,"sampleRate":96000},{"@id":"NT1-98007-98007B.eaf","@type":"File","contentSize":118748,"dateCreated":"2016-08-01T05:00:07.000Z","dateModified":"2016-08-01T16:01:47.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007B.eaf","doi":"10.4225/72/579F7265746C0","essenceId":1100493},{"@id":"NT1-98007-98007B.mp3","@type":"File","bitrate":128007,"contentSize":35305600,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2019-11-29T09:12:58.000Z","duration":2206.47,"encodingFormat":"audio/mpeg","name":"NT1-98007-98007B.mp3","channels":2,"doi":"10.4225/72/575C8AEE64BCA","essenceId":1010742,"sampleRate":44100},{"@id":"NT1-98007-98007B.wav","@type":"File","bitrate":4608000,"contentSize":1270917002,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2019-11-29T09:15:22.000Z","duration":2206.45,"encodingFormat":"audio/x-wav","name":"NT1-98007-98007B.wav","channels":2,"doi":"10.4225/72/575C8AF3D2DA0","essenceId":1010743,"sampleRate":96000},{"@id":"NT1-98007-98007az.xml","@type":"File","contentSize":48755,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:46.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007az.xml","doi":"10.4225/72/575C8AF94896E","essenceId":1010744},{"@id":"NT1-98007-A.tab","@type":"File","contentSize":27810,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:57.000Z","encodingFormat":"text/plain","name":"NT1-98007-A.tab","doi":"10.4225/72/575C8B043764B","essenceId":1010746},{"@id":"NT1-98007-A.xml","@type":"File","contentSize":48788,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:05:03.000Z","encodingFormat":"application/xml","name":"NT1-98007-A.xml","doi":"10.4225/72/575C8B09BDC88","essenceId":1010747},{"@id":"NT1-98007-B.tab","@type":"File","contentSize":20239,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:05:08.000Z","encodingFormat":"text/plain","name":"NT1-98007-B.tab","doi":"10.4225/72/575C8B0F4F8F1","essenceId":1010748},{"@id":"NT1-98007-B.xml","@type":"File","contentSize":35289,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:05:14.000Z","encodingFormat":"application/xml","name":"NT1-98007-B.xml","doi":"10.4225/72/575C8B14C6702","essenceId":1010749},{"@id":"Waia Tenene","@type":"Person","familyName":"Tenene","givenName":"Waia","homeLocation":{"@id":"#country_null"},"name":"Waia Tenene","role":{"@id":"role_speaker"}},{"@id":"_:b0","@type":"CreativeWork","name":"Open (subject to agreeing to PDSC access conditions)"},{"@id":"http://nla.gov.au/nla.party-479603","@type":"Person","email":"thien@unimelb.edu.au","familyName":"Thieberger","givenName":"Nick","homeLocation":{"@id":"#country_Australia"},"name":"Nick Thieberger","role":[{"@id":"role_collector"},{"@id":"role_depositor"},{"@id":"role_recorder"}]},{"@id":"http://nla.gov.au/nla.party-593909","@type":"Organization","name":"University of Melbourne"},{"@id":"ro-crate-metadata.json","@type":"CreativeWork","conformsTo":{"@id":"https://w3id.org/ro/crate/1.1/context"},"about":{"@id":"./"}},{"@id":"role_collector","@type":"Role","name":"collector"},{"@id":"role_depositor","@type":"Role","name":"depositor"},{"@id":"role_recorder","@type":"Role","name":"recorder"},{"@id":"role_speaker","@type":"Role","name":"speaker"}]} From 95dd558c3906f3f9792fa1289d303cb0ca0092df Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Tue, 23 Apr 2024 19:22:48 +0200 Subject: [PATCH 48/81] mime filetype lookup refactoring --- .../harvard/iq/dataverse/util/FileUtil.java | 65 ++++++++++--------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index cd9a138a621..7e38dacd4e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -433,12 +433,10 @@ public static String retestIngestableFileType(File file, String fileType) { } public static String determineFileType(File f, String fileName) throws IOException{ - final ResourceBundle bundle = BundleUtil.getResourceBundle("MimeTypeDetectionByFileName"); - if (bundle.keySet().contains(fileName)) { - return bundle.getString(fileName); + String fileType = lookupFileTypeByFileName(fileName); + if (fileType != null) { + return fileType; } - - String fileType = null; String fileExtension = getFileExtension(fileName); @@ -497,17 +495,17 @@ public static String determineFileType(File f, String fileName) throws IOExcepti if (fileType != null && fileType.startsWith("text/plain") && STATISTICAL_FILE_EXTENSION.containsKey(fileExtension)) { fileType = STATISTICAL_FILE_EXTENSION.get(fileExtension); } else { - fileType = determineFileTypeByNameAndExtension(fileName); + fileType = lookupFileTypeByExtension(fileName); } logger.fine("mime type recognized by extension: "+fileType); } } else { logger.fine("fileExtension is null"); - String fileTypeByName = lookupFileTypeFromPropertiesFile(fileName); - if(!StringUtil.isEmpty(fileTypeByName)) { - logger.fine(String.format("mime type: %s recognized by filename: %s", fileTypeByName, fileName)); - fileType = fileTypeByName; + final String fileTypeByExtension = lookupFileTypeByExtensionFromPropertiesFile(fileName); + if(!StringUtil.isEmpty(fileTypeByExtension)) { + logger.fine(String.format("mime type: %s recognized by extension: %s", fileTypeByExtension, fileName)); + fileType = fileTypeByExtension; } } @@ -552,38 +550,41 @@ public static String determineFileType(File f, String fileName) throws IOExcepti return fileType; } - public static String determineFileTypeByNameAndExtension(String fileName) { - final ResourceBundle bundle = BundleUtil.getResourceBundle("MimeTypeDetectionByFileName"); - if (bundle.keySet().contains(fileName)) { - return bundle.getString(fileName); + public static String determineFileTypeByNameAndExtension(final String fileName) { + final String fileType = lookupFileTypeByFileName(fileName); + if (fileType != null) { + return fileType; } - - String mimetypesFileTypeMapResult = MIME_TYPE_MAP.getContentType(fileName); + return lookupFileTypeByExtension(fileName); + } + + private static String lookupFileTypeByExtension(final String fileName) { + final String mimetypesFileTypeMapResult = MIME_TYPE_MAP.getContentType(fileName); logger.fine("MimetypesFileTypeMap type by extension, for " + fileName + ": " + mimetypesFileTypeMapResult); - if (mimetypesFileTypeMapResult != null) { - if ("application/octet-stream".equals(mimetypesFileTypeMapResult)) { - return lookupFileTypeFromPropertiesFile(fileName); - } else { - return mimetypesFileTypeMapResult; - } - } else { + if (mimetypesFileTypeMapResult == null) { return null; } + if ("application/octet-stream".equals(mimetypesFileTypeMapResult)) { + return lookupFileTypeByExtensionFromPropertiesFile(fileName); + } + return mimetypesFileTypeMapResult; + } + + private static String lookupFileTypeByFileName(final String fileName) { + return lookupFileTypeFromPropertiesFile("MimeTypeDetectionByFileName", fileName); } - public static String lookupFileTypeFromPropertiesFile(String fileName) { - String fileKey = FilenameUtils.getExtension(fileName); - String propertyFileName = "MimeTypeDetectionByFileExtension"; - if(fileKey == null || fileKey.isEmpty()) { - fileKey = fileName; - propertyFileName = "MimeTypeDetectionByFileName"; + private static String lookupFileTypeByExtensionFromPropertiesFile(final String fileName) { + final String fileKey = FilenameUtils.getExtension(fileName); + return lookupFileTypeFromPropertiesFile("MimeTypeDetectionByFileExtension", fileKey); + } - } - String propertyFileNameOnDisk = propertyFileName + ".properties"; + private static String lookupFileTypeFromPropertiesFile(final String propertyFileName, final String fileKey) { + final String propertyFileNameOnDisk = propertyFileName + ".properties"; try { logger.fine("checking " + propertyFileNameOnDisk + " for file key " + fileKey); return BundleUtil.getStringFromPropertyFile(fileKey, propertyFileName); - } catch (MissingResourceException ex) { + } catch (final MissingResourceException ex) { logger.info(fileKey + " is a filename/extension Dataverse doesn't know about. Consider adding it to the " + propertyFileNameOnDisk + " file."); return null; } From b578dfae0386f457665319d242af930207a298eb Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 23 Apr 2024 15:41:10 -0400 Subject: [PATCH 49/81] provide guidance to Java devs on wildcard imports #5336 --- doc/sphinx-guides/source/developers/coding-style.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/sphinx-guides/source/developers/coding-style.rst b/doc/sphinx-guides/source/developers/coding-style.rst index 9da7836bbf4..2a1c0d5d232 100755 --- a/doc/sphinx-guides/source/developers/coding-style.rst +++ b/doc/sphinx-guides/source/developers/coding-style.rst @@ -18,6 +18,11 @@ Tabs vs. Spaces Don't use tabs. Use 4 spaces. +Imports +^^^^^^^ + +Wildcard imports are neither encouraged nor discouraged. + Braces Placement ^^^^^^^^^^^^^^^^ From 8b6e201d5f8c881fe08725eeed2864716a3d3f3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20ROUCOU?= Date: Wed, 24 Apr 2024 16:21:01 +0200 Subject: [PATCH 50/81] Fix sitemap URL location sitemap URL must have "/sitemap/" to be accessible in case of sitemap index file --- .../iq/dataverse/sitemap/SiteMapUtil.java | 15 ++-- src/main/webapp/WEB-INF/pretty-config.xml | 5 ++ .../iq/dataverse/sitemap/SiteMapUtilTest.java | 76 ++++++++++++++++--- 3 files changed, 80 insertions(+), 16 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java b/src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java index 3077c41fa14..8408e7d91f2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java @@ -25,13 +25,14 @@ public class SiteMapUtil { - private static final Logger logger = Logger.getLogger(SiteMapUtil.class.getCanonicalName()); - private static DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); - + static final String DATE_PATTERN = "yyyy-MM-dd"; static final String SITEMAP_FILENAME_STAGED = "sitemap.xml.staged"; /** @see https://www.sitemaps.org/protocol.html#index */ static final int SITEMAP_LIMIT = 50000; + private static final Logger logger = Logger.getLogger(SiteMapUtil.class.getCanonicalName()); + private static DateTimeFormatter formatter = DateTimeFormatter.ofPattern(DATE_PATTERN); + public static void updateSiteMap(List dataverses, List datasets) { @@ -56,11 +57,15 @@ public static void updateSiteMap(List dataverses, List datas directory.mkdir(); } - // Use DAY pattern (2024-01-24), local machine timezone + // Use DAY pattern (YYYY-MM-DD), local machine timezone final W3CDateFormat dateFormat = new W3CDateFormat(Pattern.DAY); WebSitemapGenerator wsg = null; try { - wsg = WebSitemapGenerator.builder(dataverseSiteUrl, directory).autoValidate(true).dateFormat(dateFormat) + // All sitemap files are in "sitemap" folder, see "getSitemapPathString" method. + // But with pretty-faces configuration, "sitemap.xml" and "sitemap_index.xml" are accessible directly, + // like "https://demo.dataverse.org/sitemap.xml". So "/sitemap/" need to be added on "WebSitemapGenerator" + // in order to have valid URL for sitemap location. + wsg = WebSitemapGenerator.builder(dataverseSiteUrl + "/sitemap/", directory).autoValidate(true).dateFormat(dateFormat) .build(); } catch (MalformedURLException e) { logger.warning(String.format(msgErrorFormat, "Dataverse site URL", dataverseSiteUrl, e.getLocalizedMessage())); diff --git a/src/main/webapp/WEB-INF/pretty-config.xml b/src/main/webapp/WEB-INF/pretty-config.xml index ab5f37a1051..5f8f4877af8 100644 --- a/src/main/webapp/WEB-INF/pretty-config.xml +++ b/src/main/webapp/WEB-INF/pretty-config.xml @@ -27,4 +27,9 @@ + + + + + \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java index 19f985cc984..f17cb825986 100644 --- a/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java @@ -18,6 +18,7 @@ import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Date; @@ -25,6 +26,7 @@ import static org.junit.jupiter.api.Assertions.*; +import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -127,14 +129,20 @@ void testUpdateSiteMap() throws IOException, ParseException, SAXException { void testHugeSiteMap() throws IOException, ParseException, SAXException { // given final int nbDataverse = 50; - final int nbDataset = 50000; + final int nbDataset = SiteMapUtil.SITEMAP_LIMIT; + final Timestamp now = new Timestamp(new Date().getTime()); + // Regex validate dataset URL + final String sitemapUrlRegex = ".*/dataset\\.xhtml\\?persistentId=doi:10\\.666/FAKE/published[0-9]{1,5}$"; + // Regex validate sitemap URL: must include "/sitemap/" to be accessible because there is no pretty-faces rewrite + final String sitemapIndexUrlRegex = ".*/sitemap/sitemap[1-2]\\.xml$"; + final String today = LocalDateTime.now().format(DateTimeFormatter.ofPattern(SiteMapUtil.DATE_PATTERN)); final List dataverses = new ArrayList<>(nbDataverse); for (int i = 1; i <= nbDataverse; i++) { final Dataverse publishedDataverse = new Dataverse(); publishedDataverse.setAlias(String.format("publishedDv%s", i)); - publishedDataverse.setModificationTime(new Timestamp(new Date().getTime())); - publishedDataverse.setPublicationDate(new Timestamp(new Date().getTime())); + publishedDataverse.setModificationTime(now); + publishedDataverse.setPublicationDate(now); dataverses.add(publishedDataverse); } @@ -142,8 +150,8 @@ void testHugeSiteMap() throws IOException, ParseException, SAXException { for (int i = 1; i <= nbDataset; i++) { final Dataset published = new Dataset(); published.setGlobalId(new GlobalId(AbstractDOIProvider.DOI_PROTOCOL, "10.666", String.format("FAKE/published%s", i), null, AbstractDOIProvider.DOI_RESOLVER_URL, null)); - published.setPublicationDate(new Timestamp(new Date().getTime())); - published.setModificationTime(new Timestamp(new Date().getTime())); + published.setPublicationDate(now); + published.setModificationTime(now); datasets.add(published); } @@ -153,24 +161,70 @@ void testHugeSiteMap() throws IOException, ParseException, SAXException { // then final Path siteMapDir = tempDocroot.resolve("sitemap"); final String pathToSiteMapIndexFile = siteMapDir.resolve("sitemap_index.xml").toString(); + final String pathToSiteMap1File = siteMapDir.resolve("sitemap1.xml").toString(); + final String pathToSiteMap2File = siteMapDir.resolve("sitemap2.xml").toString(); + + // validate sitemap_index.xml file with XSD assertDoesNotThrow(() -> XmlValidator.validateXmlWellFormed(pathToSiteMapIndexFile)); assertTrue(XmlValidator.validateXmlSchema(pathToSiteMapIndexFile, new URL(xsdSitemapIndex))); - final File sitemapFile = new File(pathToSiteMapIndexFile); + // verify sitemap_index.xml content + File sitemapFile = new File(pathToSiteMapIndexFile); String sitemapString = XmlPrinter.prettyPrintXml(new String(Files.readAllBytes(Paths.get(sitemapFile.getAbsolutePath())), StandardCharsets.UTF_8)); // System.out.println("sitemap: " + sitemapString); - assertTrue(sitemapString.contains("sitemap1.xml")); - assertTrue(sitemapString.contains("sitemap2.xml")); - assertTrue(sitemapString.contains("")); + String[] lines = sitemapString.split("\n"); + for (int i = 0; i < lines.length; i++) { + String line = lines[i].strip(); + if (StringUtils.isNotBlank(line)) { + if (i == 0) { + assertEquals("", line); + } else if (i == 1) { + assertEquals("", line); + } else if (i == 2) { + assertEquals("", line); + } else if (line.startsWith("")) { + final String errorWithSitemapIndexUrl = String.format("Sitemap URL must match with \"%s\" but was \"%s\"", sitemapIndexUrlRegex, line); + assertTrue(line.matches(sitemapIndexUrlRegex), errorWithSitemapIndexUrl); + } else if (line.startsWith("")) { + assertEquals(String.format("%s", today), line); + } + } + } - final String pathToSiteMap1File = siteMapDir.resolve("sitemap1.xml").toString(); + // validate sitemap1.xml file with XSD assertDoesNotThrow(() -> XmlValidator.validateXmlWellFormed(pathToSiteMap1File)); assertTrue(XmlValidator.validateXmlSchema(pathToSiteMap1File, new URL(xsdSitemap))); - final String pathToSiteMap2File = siteMapDir.resolve("sitemap2.xml").toString(); + // validate sitemap2.xml file with XSD assertDoesNotThrow(() -> XmlValidator.validateXmlWellFormed(pathToSiteMap2File)); assertTrue(XmlValidator.validateXmlSchema(pathToSiteMap2File, new URL(xsdSitemap))); + + // verify sitemap2.xml content + sitemapFile = new File(pathToSiteMap2File); + sitemapString = XmlPrinter.prettyPrintXml(new String(Files.readAllBytes(Paths.get(sitemapFile.getAbsolutePath())), StandardCharsets.UTF_8)); + + lines = sitemapString.split("\n"); + assertEquals("", lines[0].strip()); + assertEquals("", lines[1].strip()); + boolean isContainsLocTag = false; + boolean isContainsLastmodTag = false; + // loop over 10 lines only, just need to validate the and tags + for (int i = 5; i < 15; i++) { + String line = lines[i].strip(); + if (StringUtils.isNotBlank(line)) { + if (line.startsWith("")) { + isContainsLocTag = true; + final String errorWithSitemapIndexUrl = String.format("Sitemap URL must match with \"%s\" but was \"%s\"", sitemapUrlRegex, line); + assertTrue(line.matches(sitemapUrlRegex), errorWithSitemapIndexUrl); + } else if (line.startsWith("")) { + isContainsLastmodTag = true; + assertEquals(String.format("%s", today), line); + } + } + } + assertTrue(isContainsLocTag, "Sitemap file must contains tag"); + assertTrue(isContainsLastmodTag, "Sitemap file must contains tag"); } } From 7c6d101c8bcbdf34fda45514491cdd7cc010441f Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 24 Apr 2024 11:25:26 -0400 Subject: [PATCH 51/81] various sitemap doc tweaks #8936 --- doc/sphinx-guides/source/installation/config.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index e4ff65f059e..73d9afb0141 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2077,7 +2077,7 @@ Once the sitemap has been generated in the location above, it will be served at Multiple Sitemap Files (Sitemap Index File) ########################################### -According to the `Sitemaps.org protocol `_, a sitemap file must have no more than 50,000 URLs and must be no larger than 50MiB. In this case, the protocol instructs you to create a sitemap index file called ``sitemap_index.xml`` (instead of ``sitemap.xml``), which references multiple sitemap files. In this case, the created files containing the URLs will be named ``sitemap1.xml``, ``sitemap2.xml``, etc. The referenced files are also generated in the same place as other sitemap files and there will be as many files as necessary to contain the URLs of collections and datasets present in your installation, while respecting the limit of 50,000 URLs per file. Dataverse will automatically detect whether you need to create a single ``sitemap.xml`` file or several files and generate them for you. However, when submitting your sitemap file to Google or other search engines as described below, you must be careful to use the correct file name corresponding to your situation. +According to the `Sitemaps.org protocol `_, a sitemap file must have no more than 50,000 URLs and must be no larger than 50MiB. In this case, the protocol instructs you to create a sitemap index file called ``sitemap_index.xml`` (instead of ``sitemap.xml``), which references multiple sitemap files named ``sitemap1.xml``, ``sitemap2.xml``, etc. These referenced files are also generated in the same place as other sitemap files (``domain1/docroot/sitemap``) and there will be as many files as necessary to contain the URLs of collections and datasets present in your installation, while respecting the limit of 50,000 URLs per file. If you have over 50,000 items, a sitemap index file will be generated in the following location (unless you have customized your installation directory for Payara): @@ -2085,10 +2085,14 @@ If you have over 50,000 items, a sitemap index file will be generated in the fol Once the sitemap has been generated in the location above, it will be served at ``/sitemap_index.xml`` like this: https://demo.dataverse.org/sitemap_index.xml +Note that the sitemap is also available at (for example) https://demo.dataverse.org/sitemap/sitemap_index.xml and in that ``sitemap`` directory you will find the files it references such as ``sitemap1.xml``, ``sitemap2.xml``, etc. + Submitting Your Sitemap to Search Engines ######################################### -Some search engines will be able to find your sitemap file at ``/sitemap.xml`` or ``sitemap_index.xml``, but others, **including Google**, need to be **specifically instructed** to retrieve it. +Some search engines will be able to find your sitemap file at ``/sitemap.xml`` or ``/sitemap_index.xml``, but others, **including Google**, need to be **specifically instructed** to retrieve it. + +As described above, Dataverse will automatically detect whether you need to create a single sitemap file or several files and generate them for you. However, when submitting your sitemap file to Google or other search engines, you must be careful to supply the correct file name (``sitemap.xml`` or ``sitemap_index.xml``) depending on your situation. One way to submit your sitemap URL to Google is by using their "Search Console" (https://search.google.com/search-console). In order to use the console, you will need to authenticate yourself as the owner of your Dataverse site. Various authentication methods are provided; but if you are already using Google Analytics, the easiest way is to use that account. Make sure you are logged in on Google with the account that has the edit permission on your Google Analytics property; go to the Search Console and enter the root URL of your Dataverse installation, then choose Google Analytics as the authentication method. Once logged in, click on "Sitemaps" in the menu on the left. Consult `Google's "submit a sitemap" instructions`_ for more information. From afcbdfede13757fdb16b794a50aa568687a639ac Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Thu, 2 May 2024 16:27:07 +0200 Subject: [PATCH 52/81] dataset is always checked for validity while indexing, even when already published --- .../iq/dataverse/search/IndexServiceBean.java | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 886326980c2..61aa73cdeff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -836,15 +836,9 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set Date: Thu, 2 May 2024 16:36:07 +0200 Subject: [PATCH 53/81] fix for collections not showing up when both validity facets (valid and incmoplete) are checked --- .../edu/harvard/iq/dataverse/mydata/MyDataFilterParams.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFilterParams.java b/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFilterParams.java index 2ab248fcc0b..277fa9ee12f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFilterParams.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFilterParams.java @@ -292,7 +292,7 @@ public String getSolrFragmentForPublicationStatus(){ } public String getSolrFragmentForDatasetValidity(){ - if ((this.datasetValidities == null) || (this.datasetValidities.isEmpty())){ + if ((this.datasetValidities == null) || (this.datasetValidities.isEmpty()) || (this.datasetValidities.size() > 1)){ return ""; } From 75e87e4fa45800da4b307fd54f32b7747e244d8c Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Thu, 2 May 2024 18:16:55 +0200 Subject: [PATCH 54/81] removed unused method --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 090c71b4579..96a8270932c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -2307,10 +2307,6 @@ public boolean isValid() { return valid; } - public boolean isValidOrCanReviewIncomplete() { - return isValid() || JvmSettings.UI_ALLOW_REVIEW_INCOMPLETE.lookupOptional(Boolean.class).orElse(false); - } - private void displayLockInfo(Dataset dataset) { // Various info messages, when the dataset is locked (for various reasons): if (dataset.isLocked() && canUpdateDataset()) { From d4c71964d313090c1e17ebed111a7799d8a44dd7 Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Thu, 2 May 2024 19:25:10 +0200 Subject: [PATCH 55/81] reverted removing method that is used by the frontend --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 96a8270932c..090c71b4579 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -2307,6 +2307,10 @@ public boolean isValid() { return valid; } + public boolean isValidOrCanReviewIncomplete() { + return isValid() || JvmSettings.UI_ALLOW_REVIEW_INCOMPLETE.lookupOptional(Boolean.class).orElse(false); + } + private void displayLockInfo(Dataset dataset) { // Various info messages, when the dataset is locked (for various reasons): if (dataset.isLocked() && canUpdateDataset()) { From 2ee5bfff71d642b9494d67f14c6da129f89c554b Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Thu, 2 May 2024 21:32:06 +0200 Subject: [PATCH 56/81] fixed incomplete metadata being indexed as complete in some cases --- .../edu/harvard/iq/dataverse/DatasetPage.java | 2 +- .../harvard/iq/dataverse/DatasetVersion.java | 21 +++++++++++++++++++ .../iq/dataverse/search/IndexServiceBean.java | 2 +- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 090c71b4579..0427c38af64 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -2298,7 +2298,7 @@ public boolean isValid() { if (valid == null) { if (workingVersion.isDraft() || (canUpdateDataset() && JvmSettings.UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED.lookupOptional(Boolean.class).orElse(false))) { final DatasetVersion newVersion = workingVersion.cloneDatasetVersion(); - newVersion.setDatasetFields(newVersion.initDatasetFields()); + newVersion.setDatasetFields(newVersion.initDatasetFields(true)); valid = newVersion.isValid(); } else { valid = true; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 5fd963f3931..1f13c8812cc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1585,6 +1585,10 @@ private DatasetField initDatasetField(DatasetField dsf) { } public List initDatasetFields() { + return initDatasetFields(false); + } + + public List initDatasetFields(boolean removeEmptyValues) { //retList - Return List of values List retList = new ArrayList<>(); //Running into null on create new dataset @@ -1605,6 +1609,9 @@ public List initDatasetFields() { for (DatasetField dsf : retList) { if (dsfType.equals(dsf.getDatasetFieldType())) { add = false; + if (removeEmptyValues) { + removeEmptyValues(dsf); + } break; } } @@ -1622,6 +1629,20 @@ public List initDatasetFields() { return retList; } + private void removeEmptyValues(DatasetField dsf) { + if (dsf.getDatasetFieldType().isPrimitive()) { // primitive + final Iterator i = dsf.getDatasetFieldValues().iterator(); + while (i.hasNext()) { + final String v = i.next().getValue(); + if (StringUtils.isBlank(v) || DatasetField.NA_VALUE.equals(v)) { + i.remove(); + } + } + } else { + dsf.getDatasetFieldCompoundValues().forEach(cv -> cv.getChildDatasetFields().forEach(v -> removeEmptyValues(v))); + } + } + /** * For the current server, create link back to this Dataset * diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 61aa73cdeff..7bb72b2875a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -837,7 +837,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set Date: Fri, 3 May 2024 17:52:25 +0200 Subject: [PATCH 57/81] adding release note --- .../8655-re-add-cell-counting-biomedical-tsv.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 doc/release-notes/8655-re-add-cell-counting-biomedical-tsv.md diff --git a/doc/release-notes/8655-re-add-cell-counting-biomedical-tsv.md b/doc/release-notes/8655-re-add-cell-counting-biomedical-tsv.md new file mode 100644 index 00000000000..295f206871f --- /dev/null +++ b/doc/release-notes/8655-re-add-cell-counting-biomedical-tsv.md @@ -0,0 +1,12 @@ +## Release Highlights + +### Life Science Metadata + +Re-adding value `cell counting` to Life Science metadatablock's Measurement Type vocabularies accidentally removed in `v5.1`. + +## Upgrade Instructions + +### Update the Life Science metadata block + +- `wget https://github.com/IQSS/dataverse/releases/download/v6.3/biomedical.tsv` +- `curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @biomedical.tsv -H "Content-type: text/tab-separated-values"` \ No newline at end of file From 232029eaa7658c50e69dd057127df1780cab941f Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 6 May 2024 14:52:00 +0200 Subject: [PATCH 58/81] fix for permission wrapper not available in mydata -> if it is your data, you are allowed to see incomplete metadata label on published datasets when the flag is enabled --- .../edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java index c68ef9b7cc9..7cca76d3a45 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java @@ -67,8 +67,6 @@ public class DataRetrieverAPI extends AbstractApiBean { @Inject DataverseSession session; - @Inject - PermissionsWrapper permissionsWrapper; @EJB DataverseRoleServiceBean dataverseRoleService; @@ -525,6 +523,6 @@ private JsonArrayBuilder formatSolrDocs(SolrQueryResponse solrResponse, RoleTagR } private boolean isValid(SolrSearchResult result, DataverseRequest dataverseRequest) { - return result.isValid(x -> permissionsWrapper.canUpdateDataset(dataverseRequest, datasetService.find(x.getEntityId()))); + return result.isValid(x -> true); } } \ No newline at end of file From ff4742a5bd6510b39d5459279fe9976c20bf632e Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Tue, 7 May 2024 13:07:27 +0200 Subject: [PATCH 59/81] unused variable cleanup --- .../edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java index 7cca76d3a45..9c4000c2008 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java @@ -9,7 +9,6 @@ import edu.harvard.iq.dataverse.DataverseSession; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.DvObjectServiceBean; -import edu.harvard.iq.dataverse.PermissionsWrapper; import edu.harvard.iq.dataverse.RoleAssigneeServiceBean; import edu.harvard.iq.dataverse.api.auth.AuthRequired; import edu.harvard.iq.dataverse.search.SearchServiceBean; @@ -27,7 +26,6 @@ import edu.harvard.iq.dataverse.search.SearchException; import edu.harvard.iq.dataverse.search.SearchFields; import edu.harvard.iq.dataverse.search.SortBy; -import edu.harvard.iq.dataverse.settings.JvmSettings; import java.util.List; import java.util.Map; @@ -491,14 +489,13 @@ private JsonArrayBuilder formatSolrDocs(SolrQueryResponse solrResponse, RoleTagR JsonObjectBuilder myDataCardInfo; JsonArrayBuilder rolesForCard; - DataverseRequest dataverseRequest = createDataverseRequest(authUser); for (SolrSearchResult doc : solrQueryResponse.getSolrSearchResults()){ // ------------------------------------------- // (a) Get core card data from solr // ------------------------------------------- - myDataCardInfo = doc.getJsonForMyData(isValid(doc, dataverseRequest)); + myDataCardInfo = doc.getJsonForMyData(isValid(doc)); if (!doc.getEntity().isInstanceofDataFile()){ String parentAlias = dataverseService.getParentAliasString(doc); @@ -522,7 +519,7 @@ private JsonArrayBuilder formatSolrDocs(SolrQueryResponse solrResponse, RoleTagR } - private boolean isValid(SolrSearchResult result, DataverseRequest dataverseRequest) { + private boolean isValid(SolrSearchResult result) { return result.isValid(x -> true); } } \ No newline at end of file From 8525c9acdef86cf33a56cf3d017f1cf5181a9db7 Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Tue, 7 May 2024 14:36:18 +0200 Subject: [PATCH 60/81] cleaned up file page logic for incomplete metadata --- src/main/java/edu/harvard/iq/dataverse/FilePage.java | 2 +- src/main/webapp/file.xhtml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index f0a75c9c467..5ba4af0449e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -322,7 +322,7 @@ public boolean isValid() { final DatasetVersion workingVersion = fileMetadata.getDatasetVersion(); if (workingVersion.isDraft() || (canUpdateDataset() && JvmSettings.UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED.lookupOptional(Boolean.class).orElse(false))) { final DatasetVersion newVersion = workingVersion.cloneDatasetVersion(); - newVersion.setDatasetFields(newVersion.initDatasetFields()); + newVersion.setDatasetFields(newVersion.initDatasetFields(true)); valid = newVersion.isValid(); } else { valid = true; diff --git a/src/main/webapp/file.xhtml b/src/main/webapp/file.xhtml index 99dacc46050..835764d9cf5 100644 --- a/src/main/webapp/file.xhtml +++ b/src/main/webapp/file.xhtml @@ -77,7 +77,7 @@ - + From 724f2388191dc164feff917411a3d2e21d52f11a Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Tue, 7 May 2024 17:01:05 +0200 Subject: [PATCH 61/81] refactored isValid in DatasetVersion --- .../edu/harvard/iq/dataverse/DatasetPage.java | 4 +- .../harvard/iq/dataverse/DatasetVersion.java | 52 +++++++++++-------- .../edu/harvard/iq/dataverse/FilePage.java | 4 +- .../iq/dataverse/search/IndexServiceBean.java | 5 +- 4 files changed, 33 insertions(+), 32 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 0427c38af64..2afe3b33981 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -2297,9 +2297,7 @@ private void displayPublishMessage(){ public boolean isValid() { if (valid == null) { if (workingVersion.isDraft() || (canUpdateDataset() && JvmSettings.UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED.lookupOptional(Boolean.class).orElse(false))) { - final DatasetVersion newVersion = workingVersion.cloneDatasetVersion(); - newVersion.setDatasetFields(newVersion.initDatasetFields(true)); - valid = newVersion.isValid(); + valid = workingVersion.isValid(); } else { valid = true; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 1f13c8812cc..943693355a3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1585,10 +1585,6 @@ private DatasetField initDatasetField(DatasetField dsf) { } public List initDatasetFields() { - return initDatasetFields(false); - } - - public List initDatasetFields(boolean removeEmptyValues) { //retList - Return List of values List retList = new ArrayList<>(); //Running into null on create new dataset @@ -1609,9 +1605,6 @@ public List initDatasetFields(boolean removeEmptyValues) { for (DatasetField dsf : retList) { if (dsfType.equals(dsf.getDatasetFieldType())) { add = false; - if (removeEmptyValues) { - removeEmptyValues(dsf); - } break; } } @@ -1629,20 +1622,6 @@ public List initDatasetFields(boolean removeEmptyValues) { return retList; } - private void removeEmptyValues(DatasetField dsf) { - if (dsf.getDatasetFieldType().isPrimitive()) { // primitive - final Iterator i = dsf.getDatasetFieldValues().iterator(); - while (i.hasNext()) { - final String v = i.next().getValue(); - if (StringUtils.isBlank(v) || DatasetField.NA_VALUE.equals(v)) { - i.remove(); - } - } - } else { - dsf.getDatasetFieldCompoundValues().forEach(cv -> cv.getChildDatasetFields().forEach(v -> removeEmptyValues(v))); - } - } - /** * For the current server, create link back to this Dataset * @@ -1749,7 +1728,36 @@ public List> validateRequired() { } public boolean isValid() { - return validate().isEmpty(); + // first clone to leave the original untouched + final DatasetVersion newVersion = this.cloneDatasetVersion(); + // initDatasetFields + newVersion.setDatasetFields(newVersion.initDatasetFields()); + // remove special "N/A" values and empty values + newVersion.removeEmptyValues(); + // check validity of present fields and detect missing mandatory fields + return newVersion.validate().isEmpty(); + } + + private void removeEmptyValues() { + if (this.getDatasetFields() != null) { + for (DatasetField dsf : this.getDatasetFields()) { + removeEmptyValues(dsf); + } + } + } + + private void removeEmptyValues(DatasetField dsf) { + if (dsf.getDatasetFieldType().isPrimitive()) { // primitive + final Iterator i = dsf.getDatasetFieldValues().iterator(); + while (i.hasNext()) { + final String v = i.next().getValue(); + if (StringUtils.isBlank(v) || DatasetField.NA_VALUE.equals(v)) { + i.remove(); + } + } + } else { + dsf.getDatasetFieldCompoundValues().forEach(cv -> cv.getChildDatasetFields().forEach(v -> removeEmptyValues(v))); + } } public Set validate() { diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index 5ba4af0449e..52e1ffd825a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -321,9 +321,7 @@ public boolean isValid() { if (valid == null) { final DatasetVersion workingVersion = fileMetadata.getDatasetVersion(); if (workingVersion.isDraft() || (canUpdateDataset() && JvmSettings.UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED.lookupOptional(Boolean.class).orElse(false))) { - final DatasetVersion newVersion = workingVersion.cloneDatasetVersion(); - newVersion.setDatasetFields(newVersion.initDatasetFields(true)); - valid = newVersion.isValid(); + valid = workingVersion.isValid(); } else { valid = true; } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 7bb72b2875a..e61b93a741f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -835,10 +835,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set Date: Tue, 7 May 2024 11:01:56 -0400 Subject: [PATCH 62/81] Changes to suggest the use of DataCite resources --- doc/sphinx-guides/source/installation/config.rst | 2 +- scripts/api/data/licenses/licenseApache-2.0.json | 2 +- scripts/api/data/licenses/licenseMIT.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index e0cde1cbd46..00d926b20d3 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1877,7 +1877,7 @@ If you do not find the license JSON you need above, you are encouraged to contri - Name your file using the SPDX identifier. For example, if the identifier is ``Apache-2.0``, you should name your file ``licenseApache-2.0.json``. - For the ``name`` field, use the "short identifier" from the SPDX landing page (e.g. ``Apache-2.0``). - For the ``description`` field, use the "full name" from the SPDX landing page (e.g. ``Apache License 2.0``). -- For the ``uri`` field, go to the SPDX landing page for the license and remove the extension ``.html`` from your browser and then copy the URL (e.g. ``https://spdx.org/licenses/Apache-2.0``) into the ``uri`` field. +- For the ``uri`` field, we encourage you to use the same resource that DataCite uses, most of the times this will be the same than the first provided resource on SPDX, but please get in contact with us if you have any questions on the cominity group. - For the ``active`` field, put ``true``. - For the ``sortOrder`` field, put the next sequention number after checking previous files with ``grep sortOrder scripts/api/data/licenses/*``. diff --git a/scripts/api/data/licenses/licenseApache-2.0.json b/scripts/api/data/licenses/licenseApache-2.0.json index 0fdd071ea6e..009d73fc9c2 100644 --- a/scripts/api/data/licenses/licenseApache-2.0.json +++ b/scripts/api/data/licenses/licenseApache-2.0.json @@ -1,6 +1,6 @@ { "name": "Apache-2.0", - "uri": "https://spdx.org/licenses/Apache-2.0", + "uri": "https://www.apache.org/licenses/LICENSE-2.0", "shortDescription": "Apache License 2.0", "active": true, "sortOrder": 9 diff --git a/scripts/api/data/licenses/licenseMIT.json b/scripts/api/data/licenses/licenseMIT.json index d5315d42a28..ddadab45222 100644 --- a/scripts/api/data/licenses/licenseMIT.json +++ b/scripts/api/data/licenses/licenseMIT.json @@ -1,6 +1,6 @@ { "name": "MIT", - "uri": "https://spdx.org/licenses/MIT", + "uri": "https://opensource.org/license/mit/", "shortDescription": "MIT License", "active": true, "sortOrder": 8 From 570649f840f93459776ef0f36906de24b145787a Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Tue, 7 May 2024 14:02:47 -0400 Subject: [PATCH 63/81] Fix the URL to fit the DataCite use --- doc/sphinx-guides/source/installation/config.rst | 4 ++-- scripts/api/data/licenses/licenseApache-2.0.json | 2 +- scripts/api/data/licenses/licenseMIT.json | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 00d926b20d3..887a95ef781 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1877,9 +1877,9 @@ If you do not find the license JSON you need above, you are encouraged to contri - Name your file using the SPDX identifier. For example, if the identifier is ``Apache-2.0``, you should name your file ``licenseApache-2.0.json``. - For the ``name`` field, use the "short identifier" from the SPDX landing page (e.g. ``Apache-2.0``). - For the ``description`` field, use the "full name" from the SPDX landing page (e.g. ``Apache License 2.0``). -- For the ``uri`` field, we encourage you to use the same resource that DataCite uses, most of the times this will be the same than the first provided resource on SPDX, but please get in contact with us if you have any questions on the cominity group. +- For the ``uri`` field, we encourage you to use the same resource that DataCite uses, which is often the same as the first "Other web pages for this license" on the SPDX page for the license. When these differ, or there are other concerns about the URI DataCite uses, please reach out to the community to see if a consensus can be reached. - For the ``active`` field, put ``true``. -- For the ``sortOrder`` field, put the next sequention number after checking previous files with ``grep sortOrder scripts/api/data/licenses/*``. +- For the ``sortOrder`` field, put the next sequential number after checking previous files with ``grep sortOrder scripts/api/data/licenses/*``. Note that prior to Dataverse 6.2, various license above have been added that do not adhere perfectly with this procedure. For example, the ``name`` for the CC0 license is ``CC0 1.0`` (no dash) rather than ``CC0-1.0`` (with a dash). We are keeping the existing names for backward compatibility. For more on standarizing license configuration, see https://github.com/IQSS/dataverse/issues/8512 diff --git a/scripts/api/data/licenses/licenseApache-2.0.json b/scripts/api/data/licenses/licenseApache-2.0.json index 009d73fc9c2..5b7c3cf5c95 100644 --- a/scripts/api/data/licenses/licenseApache-2.0.json +++ b/scripts/api/data/licenses/licenseApache-2.0.json @@ -1,6 +1,6 @@ { "name": "Apache-2.0", - "uri": "https://www.apache.org/licenses/LICENSE-2.0", + "uri": "http://www.apache.org/licenses/LICENSE-2.0", "shortDescription": "Apache License 2.0", "active": true, "sortOrder": 9 diff --git a/scripts/api/data/licenses/licenseMIT.json b/scripts/api/data/licenses/licenseMIT.json index ddadab45222..a879e8a5595 100644 --- a/scripts/api/data/licenses/licenseMIT.json +++ b/scripts/api/data/licenses/licenseMIT.json @@ -1,6 +1,6 @@ { "name": "MIT", - "uri": "https://opensource.org/license/mit/", + "uri": "https://opensource.org/licenses/MIT", "shortDescription": "MIT License", "active": true, "sortOrder": 8 From 798dd5c19dfe2c1ebaca65f531a17f93235a526a Mon Sep 17 00:00:00 2001 From: Vera Clemens <16904069+vera@users.noreply.github.com> Date: Fri, 10 May 2024 18:06:34 +0200 Subject: [PATCH 64/81] fix: NullPointerExceptions in /api/mydata/retrieve (#9581) * fix: NullPointerExceptions in /api/mydata/retrieve * test: add DataRetrieverApiIT to integration-tests.txt * fix(DataRetrieverApiIT): fix test failure related to pretty printing change --------- Co-authored-by: qqmyers --- .../edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java | 2 +- .../edu/harvard/iq/dataverse/search/SolrSearchResult.java | 2 +- .../java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java | 4 ++-- tests/integration-tests.txt | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java index 0a64f42d840..7a6553cfe74 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java @@ -493,7 +493,7 @@ private JsonArrayBuilder formatSolrDocs(SolrQueryResponse solrResponse, RoleTagR // ------------------------------------------- myDataCardInfo = doc.getJsonForMyData(); - if (!doc.getEntity().isInstanceofDataFile()){ + if (doc.getEntity() != null && !doc.getEntity().isInstanceofDataFile()){ String parentAlias = dataverseService.getParentAliasString(doc); myDataCardInfo.add("parent_alias",parentAlias); } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java index 389f96c30ea..2bf6d079a4a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java @@ -421,7 +421,7 @@ public JsonObjectBuilder getJsonForMyData() { if ((this.getParent() != null) && (!this.getParent().isEmpty())) { // System.out.println("keys:" + parent.keySet().toString()); - if (this.entity.isInstanceofDataFile()) { + if (this.entity != null && this.entity.isInstanceofDataFile()) { myDataJson.add("parentIdentifier", this.getParent().get(SolrSearchResult.PARENT_IDENTIFIER)) .add("parentName", this.getParent().get("name")); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java index facb3f7c784..4220d6cd245 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java @@ -35,14 +35,14 @@ public void testRetrieveMyDataAsJsonString() { String badUserIdentifier = "bad-identifier"; Response invalidUserIdentifierResponse = UtilIT.retrieveMyDataAsJsonString(superUserApiToken, badUserIdentifier, emptyRoleIdsList); - assertEquals("{\"success\":false,\"error_message\":\"No user found for: \\\"" + badUserIdentifier + "\\\"\"}", invalidUserIdentifierResponse.prettyPrint()); + assertEquals("{\n \"success\": false,\n \"error_message\": \"No user found for: \\\"" + badUserIdentifier + "\\\"\"\n}", invalidUserIdentifierResponse.prettyPrint()); assertEquals(OK.getStatusCode(), invalidUserIdentifierResponse.getStatusCode()); // Call as superuser with valid user identifier Response createSecondUserResponse = UtilIT.createRandomUser(); String userIdentifier = UtilIT.getUsernameFromResponse(createSecondUserResponse); Response validUserIdentifierResponse = UtilIT.retrieveMyDataAsJsonString(superUserApiToken, userIdentifier, emptyRoleIdsList); - assertEquals("{\"success\":false,\"error_message\":\"Sorry, you have no assigned roles.\"}", validUserIdentifierResponse.prettyPrint()); + assertEquals("{\n \"success\": false,\n \"error_message\": \"Sorry, you have no assigned roles.\"\n}", validUserIdentifierResponse.prettyPrint()); assertEquals(OK.getStatusCode(), validUserIdentifierResponse.getStatusCode()); } } diff --git a/tests/integration-tests.txt b/tests/integration-tests.txt index 3c4f7dce31f..58d8d814bb9 100644 --- a/tests/integration-tests.txt +++ b/tests/integration-tests.txt @@ -1 +1 @@ -DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,ProvIT,S3AccessIT +DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,DataRetrieverApiIT,ProvIT,S3AccessIT From d72d347cf8a505bfc657f531a882291f0d11791d Mon Sep 17 00:00:00 2001 From: sbondka <145585953+sbondka@users.noreply.github.com> Date: Fri, 10 May 2024 20:34:29 +0200 Subject: [PATCH 65/81] Ajout audio/vnd.wave au mime type (#10090) --- src/main/java/propertyFiles/MimeTypeDisplay.properties | 1 + src/main/java/propertyFiles/MimeTypeFacets.properties | 1 + 2 files changed, 2 insertions(+) diff --git a/src/main/java/propertyFiles/MimeTypeDisplay.properties b/src/main/java/propertyFiles/MimeTypeDisplay.properties index 8486a113116..8e5a251abbf 100644 --- a/src/main/java/propertyFiles/MimeTypeDisplay.properties +++ b/src/main/java/propertyFiles/MimeTypeDisplay.properties @@ -207,6 +207,7 @@ audio/ogg=OGG Audio audio/wav=Waveform Audio audio/x-wav=Waveform Audio audio/x-wave=Waveform Audio +audio/vnd.wave=Waveform Audio # Video video/avi=AVI Video video/x-msvideo=AVI Video diff --git a/src/main/java/propertyFiles/MimeTypeFacets.properties b/src/main/java/propertyFiles/MimeTypeFacets.properties index 831c509b860..0dad8daff4c 100644 --- a/src/main/java/propertyFiles/MimeTypeFacets.properties +++ b/src/main/java/propertyFiles/MimeTypeFacets.properties @@ -209,6 +209,7 @@ audio/ogg=Audio audio/wav=Audio audio/x-wav=Audio audio/x-wave=Audio +audio/vnd.wave=Audio # (anything else that looks like audio/* will also be indexed as facet type "Audio") # Video video/avi=Video From 3867cfeaf46723bc28f7d217dc123f772b879f8c Mon Sep 17 00:00:00 2001 From: Steven Ferey Date: Fri, 10 May 2024 21:49:02 +0200 Subject: [PATCH 66/81] 6630 lastlogintime not updated (#10135) * add Added Last Login Date Update * update lastLogin for OAuth2Login * using UserServiceBean for updating --- src/main/java/edu/harvard/iq/dataverse/Shib.java | 3 +++ .../dataverse/authorization/AuthenticationServiceBean.java | 4 +--- .../providers/oauth2/OAuth2LoginBackingBean.java | 5 +++++ .../providers/oauth2/OAuth2LoginBackingBeanTest.java | 4 ++++ 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/Shib.java b/src/main/java/edu/harvard/iq/dataverse/Shib.java index 24c0f9d7926..f9cf061e771 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Shib.java +++ b/src/main/java/edu/harvard/iq/dataverse/Shib.java @@ -59,6 +59,8 @@ public class Shib implements java.io.Serializable { SettingsServiceBean settingsService; @EJB SystemConfig systemConfig; + @EJB + UserServiceBean userService; HttpServletRequest request; @@ -259,6 +261,7 @@ else if (ShibAffiliationOrder.equals("firstAffiliation")) { state = State.REGULAR_LOGIN_INTO_EXISTING_SHIB_ACCOUNT; logger.fine("Found user based on " + userPersistentId + ". Logging in."); logger.fine("Updating display info for " + au.getName()); + userService.updateLastLogin(au); authSvc.updateAuthenticatedUser(au, displayInfo); logInUserAndSetShibAttributes(au); String prettyFacesHomePageString = getPrettyFacesHomePageString(false); diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java index 1c0f5010059..4a8fb123fd4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java @@ -307,11 +307,9 @@ public AuthenticatedUser getUpdateAuthenticatedUser( String authenticationProvid if (user != null && !user.isDeactivated()) { user = userService.updateLastLogin(user); } - + if ( user == null ) { throw new IllegalStateException("Authenticated user does not exist. The functionality to support creating one at this point in authentication has been removed."); - //return createAuthenticatedUser( - // new UserRecordIdentifier(authenticationProviderId, resp.getUserId()), resp.getUserId(), resp.getUserDisplayInfo(), true ); } else { if (BuiltinAuthenticationProvider.PROVIDER_ID.equals(user.getAuthenticatedUserLookup().getAuthenticationProviderId())) { return user; diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBean.java index 0fd0852b4df..8f3dc07fdea 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBean.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.authorization.providers.oauth2; import edu.harvard.iq.dataverse.DataverseSession; +import edu.harvard.iq.dataverse.UserServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationProvider; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.UserRecordIdentifier; @@ -65,6 +66,9 @@ public class OAuth2LoginBackingBean implements Serializable { @EJB SystemConfig systemConfig; + @EJB + UserServiceBean userService; + @Inject DataverseSession session; @@ -128,6 +132,7 @@ public void exchangeCodeForToken() throws IOException { } else { // login the user and redirect to HOME of intended page (if any). // setUser checks for deactivated users. + dvUser = userService.updateLastLogin(dvUser); session.setUser(dvUser); final OAuth2TokenData tokenData = oauthUser.getTokenData(); if (tokenData != null) { diff --git a/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBeanTest.java index 672d7563669..3a63371d7a8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBeanTest.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.authorization.providers.oauth2; import edu.harvard.iq.dataverse.DataverseSession; +import edu.harvard.iq.dataverse.UserServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.UserRecordIdentifier; import edu.harvard.iq.dataverse.authorization.providers.oauth2.impl.GitHubOAuth2APTest; @@ -48,6 +49,7 @@ class OAuth2LoginBackingBeanTest { @Mock AuthenticationServiceBean authenticationServiceBean; @Mock SystemConfig systemConfig; + @Mock UserServiceBean userService; Clock constantClock = Clock.fixed(Instant.now(), ZoneId.systemDefault()); @@ -70,6 +72,7 @@ void setUp() { this.loginBackingBean.clock = constantClock; this.loginBackingBean.authenticationSvc = this.authenticationServiceBean; this.loginBackingBean.systemConfig = this.systemConfig; + this.loginBackingBean.userService = this.userService; lenient().when(this.authenticationServiceBean.getOAuth2Provider(testIdp.getId())).thenReturn(testIdp); } @@ -178,6 +181,7 @@ void existingUser() throws Exception { // also fake the result of the lookup in the auth service doReturn(userIdentifier).when(userRecord).getUserRecordIdentifier(); doReturn(user).when(authenticationServiceBean).lookupUser(userIdentifier); + doReturn(user).when(userService).updateLastLogin(user); // WHEN (& then) // capture the redirect target from the faces context From c678892467192ed036b89d8cd70618faae03feb2 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Mon, 13 May 2024 15:13:00 -0400 Subject: [PATCH 67/81] Added a note to remember people to update the languages --- doc/release-notes/6.2-release-notes.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/release-notes/6.2-release-notes.md b/doc/release-notes/6.2-release-notes.md index f694703f0a6..b2c05a0dafe 100644 --- a/doc/release-notes/6.2-release-notes.md +++ b/doc/release-notes/6.2-release-notes.md @@ -417,12 +417,16 @@ In the following commands we assume that Payara 6 is installed in `/usr/local/pa As noted above, deployment of the war file might take several minutes due a database migration script required for the new storage quotas feature. -6\. Restart Payara +6\. For installations with internationalization: + +- Please remember to update translations via [Dataverse language packs](https://github.com/GlobalDataverseCommunityConsortium/dataverse-language-packs) + +7\. Restart Payara - `service payara stop` - `service payara start` -7\. Update the following Metadata Blocks to reflect the incremental improvements made to the handling of core metadata fields: +8\. Update the following Metadata Blocks to reflect the incremental improvements made to the handling of core metadata fields: ``` wget https://github.com/IQSS/dataverse/releases/download/v6.2/geospatial.tsv @@ -442,7 +446,7 @@ wget https://github.com/IQSS/dataverse/releases/download/v6.2/biomedical.tsv curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/biomedical.tsv ``` -8\. For installations with custom or experimental metadata blocks: +9\. For installations with custom or experimental metadata blocks: - Stop Solr instance (usually `service solr stop`, depending on Solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/6.2/installation/prerequisites.html#solr-init-script)) @@ -455,7 +459,7 @@ curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/ta - Restart Solr instance (usually `service solr restart` depending on solr/OS) -9\. Reindex Solr: +10\. Reindex Solr: For details, see https://guides.dataverse.org/en/6.2/admin/solr-search-index.html but here is the reindex command: From dae5ca7dc46c1272d65b36688f6fded224e7288c Mon Sep 17 00:00:00 2001 From: Guillermo Portas Date: Mon, 13 May 2024 21:30:54 +0200 Subject: [PATCH 68/81] Extend Dataverse metadatablocks API to retrieve metadata fields configured as required in the General Information page (#10498) * Added: querying dataverse field type input levels when searching for fields displayed on create in a dataverse * Changed: json object builder to its original form in JsonPrinter class * Changed: renamed variable in JsonPrinter class * Added: displaying isRequired true for dataset field types of metadatablocks configured as required * Added: API endpoint for updating dataverse input levels * Added: UpdateDataverseInputLevelsCommand * Added: docs for Update Collection Input Levels API endpoint * Added: release notes for #10477 * Added: tweaks to release notes * Added: tweak to release notes * Fixed: doc tweaks * Added: note about overwriting input levels in docs * Fixed: added missing condition in Dataverse.isDatasetFieldTypeRequiredAsInputLevel * Fixed: UpdateDataverseInputLevelsCommand.addInputLevelMetadataBlocks * Added: setMetadataBlocks(true) and empty list check in UpdateDataverseInputLevelsCommand * Fixed: status code in DataversesIT --- ...tadatablocks-api-extension-input-levels.md | 3 + doc/sphinx-guides/source/api/native-api.rst | 41 ++++- .../edu/harvard/iq/dataverse/Dataverse.java | 8 + .../dataverse/MetadataBlockServiceBean.java | 10 +- .../harvard/iq/dataverse/api/Dataverses.java | 146 +++++++--------- .../command/impl/UpdateDataverseCommand.java | 9 +- .../UpdateDataverseInputLevelsCommand.java | 51 ++++++ .../iq/dataverse/util/json/JsonPrinter.java | 54 ++++-- src/main/java/propertyFiles/Bundle.properties | 1 + .../iq/dataverse/api/DataversesIT.java | 157 ++++++++++++++---- .../edu/harvard/iq/dataverse/api/UtilIT.java | 15 ++ 11 files changed, 357 insertions(+), 138 deletions(-) create mode 100644 doc/release-notes/10477-metadatablocks-api-extension-input-levels.md create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseInputLevelsCommand.java diff --git a/doc/release-notes/10477-metadatablocks-api-extension-input-levels.md b/doc/release-notes/10477-metadatablocks-api-extension-input-levels.md new file mode 100644 index 00000000000..77cc7f59773 --- /dev/null +++ b/doc/release-notes/10477-metadatablocks-api-extension-input-levels.md @@ -0,0 +1,3 @@ +Changed ``api/dataverses/{id}/metadatablocks`` so that setting the query parameter ``onlyDisplayedOnCreate=true`` also returns metadata blocks with dataset field type input levels configured as required on the General Information page of the collection, in addition to the metadata blocks and their fields with the property ``displayOnCreate=true`` (which was the original behavior). + +A new endpoint ``api/dataverses/{id}/inputLevels`` has been created for updating the dataset field type input levels of a collection via API. diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index bcc37d6db1c..f22f8727fb0 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -898,7 +898,46 @@ The following attributes are supported: * ``filePIDsEnabled`` ("true" or "false") Restricted to use by superusers and only when the :ref:`:AllowEnablingFilePIDsPerCollection <:AllowEnablingFilePIDsPerCollection>` setting is true. Enables or disables registration of file-level PIDs in datasets within the collection (overriding the instance-wide setting). .. _collection-storage-quotas: - + +Update Collection Input Levels +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Updates the dataset field type input levels in a collection. + +Please note that this endpoint overwrites all the input levels of the collection page, so if you want to keep the existing ones, you will need to add them to the JSON request body. + +If one of the input levels corresponds to a dataset field type belonging to a metadata block that does not exist in the collection, the metadata block will be added to the collection. + +This endpoint expects a JSON with the following format:: + + [ + { + "datasetFieldTypeName": "datasetFieldTypeName1", + "required": true, + "include": true + }, + { + "datasetFieldTypeName": "datasetFieldTypeName2", + "required": true, + "include": true + } + ] + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + export JSON='[{"datasetFieldTypeName":"geographicCoverage", "required":true, "include":true}, {"datasetFieldTypeName":"country", "required":true, "include":true}]' + + curl -X PUT -H "X-Dataverse-key: $API_TOKEN" -H "Content-Type:application/json" "$SERVER_URL/api/dataverses/$ID/inputLevels" -d "$JSON" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -X PUT -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -H "Content-Type:application/json" "https://demo.dataverse.org/api/dataverses/root/inputLevels" -d '[{"datasetFieldTypeName":"geographicCoverage", "required":true, "include":false}, {"datasetFieldTypeName":"country", "required":true, "include":false}]' + Collection Storage Quotas ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index 42db9c1392a..78b1827c798 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -411,6 +411,14 @@ public List getDataverseFieldTypeInputLevels() { return dataverseFieldTypeInputLevels; } + public boolean isDatasetFieldTypeRequiredAsInputLevel(Long datasetFieldTypeId) { + for(DataverseFieldTypeInputLevel dataverseFieldTypeInputLevel : dataverseFieldTypeInputLevels) { + if (dataverseFieldTypeInputLevel.getDatasetFieldType().getId().equals(datasetFieldTypeId) && dataverseFieldTypeInputLevel.isRequired()) { + return true; + } + } + return false; + } public Template getDefaultTemplate() { return defaultTemplate; diff --git a/src/main/java/edu/harvard/iq/dataverse/MetadataBlockServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MetadataBlockServiceBean.java index c4c95fae551..1e2a34f5472 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MetadataBlockServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MetadataBlockServiceBean.java @@ -58,10 +58,18 @@ public List listMetadataBlocksDisplayedOnCreate(Dataverse ownerDa if (ownerDataverse != null) { Root dataverseRoot = criteriaQuery.from(Dataverse.class); + Join datasetFieldTypeInputLevelJoin = dataverseRoot.join("dataverseFieldTypeInputLevels", JoinType.LEFT); + + Predicate requiredPredicate = criteriaBuilder.and( + datasetFieldTypeInputLevelJoin.get("datasetFieldType").in(metadataBlockRoot.get("datasetFieldTypes")), + criteriaBuilder.isTrue(datasetFieldTypeInputLevelJoin.get("required"))); + + Predicate unionPredicate = criteriaBuilder.or(displayOnCreatePredicate, requiredPredicate); + criteriaQuery.where(criteriaBuilder.and( criteriaBuilder.equal(dataverseRoot.get("id"), ownerDataverse.getId()), metadataBlockRoot.in(dataverseRoot.get("metadataBlocks")), - displayOnCreatePredicate + unionPredicate )); } else { criteriaQuery.where(displayOnCreatePredicate); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index 7e5a5e8965c..02b60fdb32a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -1,27 +1,10 @@ package edu.harvard.iq.dataverse.api; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DataverseFacet; -import edu.harvard.iq.dataverse.DataverseContact; -import edu.harvard.iq.dataverse.DataverseFeaturedDataverse; -import edu.harvard.iq.dataverse.DataverseLinkingServiceBean; -import edu.harvard.iq.dataverse.DataverseMetadataBlockFacet; -import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.api.auth.AuthRequired; import edu.harvard.iq.dataverse.api.datadeposit.SwordServiceBean; import edu.harvard.iq.dataverse.api.dto.DataverseMetadataBlockFacetDTO; import edu.harvard.iq.dataverse.authorization.DataverseRole; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.FeaturedDataverseServiceBean; -import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.GuestbookResponseServiceBean; -import edu.harvard.iq.dataverse.GuestbookServiceBean; -import edu.harvard.iq.dataverse.MetadataBlock; -import edu.harvard.iq.dataverse.RoleAssignment; import edu.harvard.iq.dataverse.api.dto.ExplicitGroupDTO; import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; @@ -37,46 +20,7 @@ import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataverse.DataverseUtil; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.engine.command.impl.AddRoleAssigneesToExplicitGroupCommand; -import edu.harvard.iq.dataverse.engine.command.impl.AssignRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreateDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreateExplicitGroupCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreateRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteCollectionQuotaCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseLinkingDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteExplicitGroupCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetSchemaCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetCollectionQuotaCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetCollectionStorageUseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetRootCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseStorageSizeCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetExplicitGroupCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ImportDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.LinkDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ListDataverseContentCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ListExplicitGroupsCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ListFacetsCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ListFeaturedCollectionsCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ListMetadataBlockFacetsCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ListMetadataBlocksCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ListRoleAssignments; -import edu.harvard.iq.dataverse.engine.command.impl.ListRolesCommand; -import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetResult; -import edu.harvard.iq.dataverse.engine.command.impl.MoveDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.PublishDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.RemoveRoleAssigneesFromExplicitGroupCommand; -import edu.harvard.iq.dataverse.engine.command.impl.RevokeRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.SetCollectionQuotaCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseDefaultContributorRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseMetadataBlocksCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateExplicitGroupCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetsCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ValidateDatasetJsonCommand; +import edu.harvard.iq.dataverse.engine.command.impl.*; import edu.harvard.iq.dataverse.pidproviders.PidProvider; import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.JvmSettings; @@ -91,23 +35,14 @@ import edu.harvard.iq.dataverse.util.json.JsonPrinter; import edu.harvard.iq.dataverse.util.json.JsonUtil; -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.brief; -import java.util.Collections; -import java.util.LinkedList; -import java.util.List; -import java.util.TreeSet; +import java.io.StringReader; +import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; import jakarta.ejb.EJB; import jakarta.ejb.EJBException; import jakarta.ejb.Stateless; -import jakarta.json.Json; -import jakarta.json.JsonArrayBuilder; -import jakarta.json.JsonNumber; -import jakarta.json.JsonObject; -import jakarta.json.JsonObjectBuilder; -import jakarta.json.JsonString; -import jakarta.json.JsonValue; +import jakarta.json.*; import jakarta.json.JsonValue.ValueType; import jakarta.json.stream.JsonParsingException; import jakarta.validation.ConstraintViolationException; @@ -131,16 +66,11 @@ import java.io.OutputStream; import java.text.MessageFormat; import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.Date; -import java.util.Map; -import java.util.Optional; import java.util.stream.Collectors; import jakarta.servlet.http.HttpServletResponse; import jakarta.ws.rs.WebApplicationException; import jakarta.ws.rs.core.Context; import jakarta.ws.rs.core.StreamingOutput; -import java.util.ArrayList; import javax.xml.stream.XMLStreamException; /** @@ -172,10 +102,10 @@ public class Dataverses extends AbstractApiBean { @EJB DataverseServiceBean dataverseService; - + @EJB DataverseLinkingServiceBean linkingService; - + @EJB FeaturedDataverseServiceBean featuredDataverseService; @@ -707,6 +637,43 @@ public Response updateAttribute(@Context ContainerRequestContext crc, @PathParam } } + @PUT + @AuthRequired + @Path("{identifier}/inputLevels") + public Response updateInputLevels(@Context ContainerRequestContext crc, @PathParam("identifier") String identifier, String jsonBody) { + try { + Dataverse dataverse = findDataverseOrDie(identifier); + List newInputLevels = parseInputLevels(jsonBody, dataverse); + execCommand(new UpdateDataverseInputLevelsCommand(dataverse, createDataverseRequest(getRequestUser(crc)), newInputLevels)); + return ok(BundleUtil.getStringFromBundle("dataverse.update.success"), JsonPrinter.json(dataverse)); + } catch (WrappedResponse e) { + return e.getResponse(); + } + } + + private List parseInputLevels(String jsonBody, Dataverse dataverse) throws WrappedResponse { + JsonArray inputLevelsArray = Json.createReader(new StringReader(jsonBody)).readArray(); + + List newInputLevels = new ArrayList<>(); + for (JsonValue value : inputLevelsArray) { + JsonObject inputLevel = (JsonObject) value; + String datasetFieldTypeName = inputLevel.getString("datasetFieldTypeName"); + DatasetFieldType datasetFieldType = datasetFieldSvc.findByName(datasetFieldTypeName); + + if (datasetFieldType == null) { + String errorMessage = MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.updateinputlevels.error.invalidfieldtypename"), datasetFieldTypeName); + throw new WrappedResponse(badRequest(errorMessage)); + } + + boolean required = inputLevel.getBoolean("required"); + boolean include = inputLevel.getBoolean("include"); + + newInputLevels.add(new DataverseFieldTypeInputLevel(datasetFieldType, dataverse, required, include)); + } + + return newInputLevels; + } + @DELETE @AuthRequired @Path("{linkingDataverseId}/deleteLink/{linkedDataverseId}") @@ -726,14 +693,15 @@ public Response listMetadataBlocks(@Context ContainerRequestContext crc, @QueryParam("onlyDisplayedOnCreate") boolean onlyDisplayedOnCreate, @QueryParam("returnDatasetFieldTypes") boolean returnDatasetFieldTypes) { try { + Dataverse dataverse = findDataverseOrDie(dvIdtf); final List metadataBlocks = execCommand( new ListMetadataBlocksCommand( createDataverseRequest(getRequestUser(crc)), - findDataverseOrDie(dvIdtf), + dataverse, onlyDisplayedOnCreate ) ); - return ok(json(metadataBlocks, returnDatasetFieldTypes, onlyDisplayedOnCreate)); + return ok(json(metadataBlocks, returnDatasetFieldTypes, onlyDisplayedOnCreate, dataverse)); } catch (WrappedResponse we) { return we.getResponse(); } @@ -836,8 +804,8 @@ public Response listFacets(@Context ContainerRequestContext crc, @PathParam("ide return e.getResponse(); } } - - + + @GET @AuthRequired @Path("{identifier}/featured") @@ -860,19 +828,19 @@ public Response getFeaturedDataverses(@Context ContainerRequestContext crc, @Pat return e.getResponse(); } } - - + + @POST @AuthRequired @Path("{identifier}/featured") /** * Allows user to set featured dataverses - must have edit dataverse permission - * + * */ public Response setFeaturedDataverses(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, String dvAliases) { List dvsFromInput = new LinkedList<>(); - - + + try { for (JsonString dvAlias : Util.asJsonArray(dvAliases).getValuesAs(JsonString.class)) { @@ -886,7 +854,7 @@ public Response setFeaturedDataverses(@Context ContainerRequestContext crc, @Pat if (dvsFromInput.isEmpty()) { return error(Response.Status.BAD_REQUEST, "Please provide a valid Json array of dataverse collection aliases to be featured."); } - + Dataverse dataverse = findDataverseOrDie(dvIdtf); List featuredSource = new ArrayList<>(); List featuredTarget = new ArrayList<>(); @@ -919,7 +887,7 @@ public Response setFeaturedDataverses(@Context ContainerRequestContext crc, @Pat // by passing null for Facets and DataverseFieldTypeInputLevel, those are not changed execCommand(new UpdateDataverseCommand(dataverse, null, featuredTarget, createDataverseRequest(getRequestUser(crc)), null)); return ok("Featured Dataverses of dataverse " + dvIdtf + " updated."); - + } catch (WrappedResponse ex) { return ex.getResponse(); } catch (JsonParsingException jpe){ @@ -927,7 +895,7 @@ public Response setFeaturedDataverses(@Context ContainerRequestContext crc, @Pat } } - + @DELETE @AuthRequired @Path("{identifier}/featured") diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java index fe9415f39f9..bdb69dc918f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java @@ -18,7 +18,6 @@ import java.util.ArrayList; import java.util.List; import java.util.logging.Logger; -import jakarta.persistence.TypedQuery; /** * Update an existing dataverse. @@ -30,10 +29,10 @@ public class UpdateDataverseCommand extends AbstractCommand { private final Dataverse editedDv; private final List facetList; - private final List featuredDataverseList; - private final List inputLevelList; - - private boolean datasetsReindexRequired = false; + private final List featuredDataverseList; + private final List inputLevelList; + + private boolean datasetsReindexRequired = false; public UpdateDataverseCommand(Dataverse editedDv, List facetList, List featuredDataverseList, DataverseRequest aRequest, List inputLevelList ) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseInputLevelsCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseInputLevelsCommand.java new file mode 100644 index 00000000000..cf7b4a6f69c --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseInputLevelsCommand.java @@ -0,0 +1,51 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DataverseFieldTypeInputLevel; +import edu.harvard.iq.dataverse.MetadataBlock; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; + +import java.util.ArrayList; +import java.util.List; + +@RequiredPermissions(Permission.EditDataverse) +public class UpdateDataverseInputLevelsCommand extends AbstractCommand { + private final Dataverse dataverse; + private final List inputLevelList; + + public UpdateDataverseInputLevelsCommand(Dataverse dataverse, DataverseRequest request, List inputLevelList) { + super(request, dataverse); + this.dataverse = dataverse; + this.inputLevelList = new ArrayList<>(inputLevelList); + } + + @Override + public Dataverse execute(CommandContext ctxt) throws CommandException { + if (inputLevelList == null || inputLevelList.isEmpty()) { + throw new CommandException("Error while updating dataverse input levels: Input level list cannot be null or empty", this); + } + addInputLevelMetadataBlocks(); + dataverse.setMetadataBlockRoot(true); + return ctxt.engine().submit(new UpdateDataverseCommand(dataverse, null, null, getRequest(), inputLevelList)); + } + + private void addInputLevelMetadataBlocks() { + List dataverseMetadataBlocks = dataverse.getMetadataBlocks(); + for (DataverseFieldTypeInputLevel inputLevel : inputLevelList) { + MetadataBlock inputLevelMetadataBlock = inputLevel.getDatasetFieldType().getMetadataBlock(); + if (!dataverseHasMetadataBlock(dataverseMetadataBlocks, inputLevelMetadataBlock)) { + dataverseMetadataBlocks.add(inputLevelMetadataBlock); + } + } + dataverse.setMetadataBlocks(dataverseMetadataBlocks); + } + + private boolean dataverseHasMetadataBlock(List dataverseMetadataBlocks, MetadataBlock metadataBlock) { + return dataverseMetadataBlocks.stream().anyMatch(block -> block.getId().equals(metadataBlock.getId())); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index 6c314c4dc2d..95f14b79ece 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -273,7 +273,7 @@ public static JsonObjectBuilder json(Dataverse dv, Boolean hideEmail, Boolean re } if (returnOwners){ bld.add("isPartOf", getOwnersFromDvObject(dv)); - } + } bld.add("permissionRoot", dv.isPermissionRoot()) .add("description", dv.getDescription()) .add("dataverseType", dv.getDataverseType().name()); @@ -294,6 +294,11 @@ public static JsonObjectBuilder json(Dataverse dv, Boolean hideEmail, Boolean re } bld.add("isReleased", dv.isReleased()); + List inputLevels = dv.getDataverseFieldTypeInputLevels(); + if(!inputLevels.isEmpty()) { + bld.add("inputLevels", JsonPrinter.jsonDataverseFieldTypeInputLevels(inputLevels)); + } + return bld; } @@ -589,9 +594,13 @@ public static JsonObjectBuilder json(MetadataBlock block, List fie } public static JsonArrayBuilder json(List metadataBlocks, boolean returnDatasetFieldTypes, boolean printOnlyDisplayedOnCreateDatasetFieldTypes) { + return json(metadataBlocks, returnDatasetFieldTypes, printOnlyDisplayedOnCreateDatasetFieldTypes, null); + } + + public static JsonArrayBuilder json(List metadataBlocks, boolean returnDatasetFieldTypes, boolean printOnlyDisplayedOnCreateDatasetFieldTypes, Dataverse ownerDataverse) { JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); for (MetadataBlock metadataBlock : metadataBlocks) { - arrayBuilder.add(returnDatasetFieldTypes ? json(metadataBlock, printOnlyDisplayedOnCreateDatasetFieldTypes) : brief.json(metadataBlock)); + arrayBuilder.add(returnDatasetFieldTypes ? json(metadataBlock, printOnlyDisplayedOnCreateDatasetFieldTypes, ownerDataverse) : brief.json(metadataBlock)); } return arrayBuilder; } @@ -619,20 +628,25 @@ public static JsonObject json(DatasetField dfv) { } public static JsonObjectBuilder json(MetadataBlock metadataBlock) { - return json(metadataBlock, false); + return json(metadataBlock, false, null); } - public static JsonObjectBuilder json(MetadataBlock metadataBlock, boolean printOnlyDisplayedOnCreateDatasetFieldTypes) { + public static JsonObjectBuilder json(MetadataBlock metadataBlock, boolean printOnlyDisplayedOnCreateDatasetFieldTypes, Dataverse ownerDataverse) { JsonObjectBuilder jsonObjectBuilder = jsonObjectBuilder(); jsonObjectBuilder.add("id", metadataBlock.getId()); jsonObjectBuilder.add("name", metadataBlock.getName()); jsonObjectBuilder.add("displayName", metadataBlock.getDisplayName()); jsonObjectBuilder.add("displayOnCreate", metadataBlock.isDisplayOnCreate()); - JsonObjectBuilder fieldsBuilder = jsonObjectBuilder(); - for (DatasetFieldType datasetFieldType : new TreeSet<>(metadataBlock.getDatasetFieldTypes())) { - if (!printOnlyDisplayedOnCreateDatasetFieldTypes || datasetFieldType.isDisplayOnCreate()) { - fieldsBuilder.add(datasetFieldType.getName(), json(datasetFieldType)); + JsonObjectBuilder fieldsBuilder = Json.createObjectBuilder(); + Set datasetFieldTypes = new TreeSet<>(metadataBlock.getDatasetFieldTypes()); + for (DatasetFieldType datasetFieldType : datasetFieldTypes) { + boolean requiredInOwnerDataverse = ownerDataverse != null && ownerDataverse.isDatasetFieldTypeRequiredAsInputLevel(datasetFieldType.getId()); + boolean displayCondition = !printOnlyDisplayedOnCreateDatasetFieldTypes || + datasetFieldType.isDisplayOnCreate() || + requiredInOwnerDataverse; + if (displayCondition) { + fieldsBuilder.add(datasetFieldType.getName(), json(datasetFieldType, ownerDataverse)); } } @@ -642,6 +656,10 @@ public static JsonObjectBuilder json(MetadataBlock metadataBlock, boolean printO } public static JsonObjectBuilder json(DatasetFieldType fld) { + return json(fld, null); + } + + public static JsonObjectBuilder json(DatasetFieldType fld, Dataverse ownerDataverse) { JsonObjectBuilder fieldsBld = jsonObjectBuilder(); fieldsBld.add("name", fld.getName()); fieldsBld.add("displayName", fld.getDisplayName()); @@ -654,8 +672,11 @@ public static JsonObjectBuilder json(DatasetFieldType fld) { fieldsBld.add("multiple", fld.isAllowMultiples()); fieldsBld.add("isControlledVocabulary", fld.isControlledVocabulary()); fieldsBld.add("displayFormat", fld.getDisplayFormat()); - fieldsBld.add("isRequired", fld.isRequired()); fieldsBld.add("displayOrder", fld.getDisplayOrder()); + + boolean requiredInOwnerDataverse = ownerDataverse != null && ownerDataverse.isDatasetFieldTypeRequiredAsInputLevel(fld.getId()); + fieldsBld.add("isRequired", requiredInOwnerDataverse || fld.isRequired()); + if (fld.isControlledVocabulary()) { // If the field has a controlled vocabulary, // add all values to the resulting JSON @@ -665,10 +686,11 @@ public static JsonObjectBuilder json(DatasetFieldType fld) { } fieldsBld.add("controlledVocabularyValues", jab); } + if (!fld.getChildDatasetFieldTypes().isEmpty()) { JsonObjectBuilder subFieldsBld = jsonObjectBuilder(); for (DatasetFieldType subFld : fld.getChildDatasetFieldTypes()) { - subFieldsBld.add(subFld.getName(), JsonPrinter.json(subFld)); + subFieldsBld.add(subFld.getName(), JsonPrinter.json(subFld, ownerDataverse)); } fieldsBld.add("childFields", subFieldsBld); } @@ -1342,4 +1364,16 @@ private static JsonObjectBuilder jsonLicense(DatasetVersion dsv) { } return licenseJsonObjectBuilder; } + + public static JsonArrayBuilder jsonDataverseFieldTypeInputLevels(List inputLevels) { + JsonArrayBuilder jsonArrayOfInputLevels = Json.createArrayBuilder(); + for (DataverseFieldTypeInputLevel inputLevel : inputLevels) { + NullSafeJsonBuilder inputLevelJsonObject = NullSafeJsonBuilder.jsonObjectBuilder(); + inputLevelJsonObject.add("datasetFieldTypeName", inputLevel.getDatasetFieldType().getName()); + inputLevelJsonObject.add("required", inputLevel.isRequired()); + inputLevelJsonObject.add("include", inputLevel.isInclude()); + jsonArrayOfInputLevels.add(inputLevelJsonObject); + } + return jsonArrayOfInputLevels; + } } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 02d848df1e3..1e847d3eeb3 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -947,6 +947,7 @@ dataverse.default=(Default) dataverse.metadatalanguage.setatdatasetcreation=Chosen at Dataset Creation dataverse.guestbookentry.atdownload=Guestbook Entry At Download dataverse.guestbookentry.atrequest=Guestbook Entry At Access Request +dataverse.updateinputlevels.error.invalidfieldtypename=Invalid dataset field type name: {0} # rolesAndPermissionsFragment.xhtml # advanced.xhtml diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java index f3472aa43a4..01f4a4646fe 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java @@ -24,17 +24,15 @@ import org.junit.jupiter.api.Test; import static jakarta.ws.rs.core.Response.Status.*; -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.not; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.hamcrest.CoreMatchers.*; +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.hasItemInArray; +import static org.junit.jupiter.api.Assertions.*; + import java.nio.file.Files; import io.restassured.path.json.JsonPath; -import static jakarta.ws.rs.core.Response.Status.OK; import org.hamcrest.CoreMatchers; -import static org.hamcrest.CoreMatchers.containsString; import org.hamcrest.Matchers; public class DataversesIT { @@ -704,26 +702,52 @@ public void testListMetadataBlocks() { Response setMetadataBlocksResponse = UtilIT.setMetadataBlocks(dataverseAlias, Json.createArrayBuilder().add("citation").add("astrophysics"), apiToken); setMetadataBlocksResponse.then().assertThat().statusCode(OK.getStatusCode()); + String[] testInputLevelNames = {"geographicCoverage", "country"}; + Response updateDataverseInputLevelsResponse = UtilIT.updateDataverseInputLevels(dataverseAlias, testInputLevelNames, apiToken); + updateDataverseInputLevelsResponse.then().assertThat().statusCode(OK.getStatusCode()); + // Dataverse not found Response listMetadataBlocksResponse = UtilIT.listMetadataBlocks("-1", false, false, apiToken); listMetadataBlocksResponse.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); // Existent dataverse and no optional params + String[] expectedAllMetadataBlockDisplayNames = {"Astronomy and Astrophysics Metadata", "Citation Metadata", "Geospatial Metadata"}; + listMetadataBlocksResponse = UtilIT.listMetadataBlocks(dataverseAlias, false, false, apiToken); listMetadataBlocksResponse.then().assertThat().statusCode(OK.getStatusCode()); listMetadataBlocksResponse.then().assertThat() .statusCode(OK.getStatusCode()) .body("data[0].fields", equalTo(null)) - .body("data.size()", equalTo(2)); + .body("data[1].fields", equalTo(null)) + .body("data[2].fields", equalTo(null)) + .body("data.size()", equalTo(3)); + + String actualMetadataBlockDisplayName1 = listMetadataBlocksResponse.then().extract().path("data[0].displayName"); + String actualMetadataBlockDisplayName2 = listMetadataBlocksResponse.then().extract().path("data[1].displayName"); + String actualMetadataBlockDisplayName3 = listMetadataBlocksResponse.then().extract().path("data[2].displayName"); + assertNotEquals(actualMetadataBlockDisplayName1, actualMetadataBlockDisplayName2); + assertNotEquals(actualMetadataBlockDisplayName1, actualMetadataBlockDisplayName3); + assertNotEquals(actualMetadataBlockDisplayName2, actualMetadataBlockDisplayName3); + assertThat(expectedAllMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName1)); + assertThat(expectedAllMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName2)); + assertThat(expectedAllMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName3)); // Existent dataverse and onlyDisplayedOnCreate=true + String[] expectedOnlyDisplayedOnCreateMetadataBlockDisplayNames = {"Citation Metadata", "Geospatial Metadata"}; + listMetadataBlocksResponse = UtilIT.listMetadataBlocks(dataverseAlias, true, false, apiToken); listMetadataBlocksResponse.then().assertThat().statusCode(OK.getStatusCode()); listMetadataBlocksResponse.then().assertThat() .statusCode(OK.getStatusCode()) .body("data[0].fields", equalTo(null)) - .body("data[0].displayName", equalTo("Citation Metadata")) - .body("data.size()", equalTo(1)); + .body("data[1].fields", equalTo(null)) + .body("data.size()", equalTo(2)); + + actualMetadataBlockDisplayName1 = listMetadataBlocksResponse.then().extract().path("data[0].displayName"); + actualMetadataBlockDisplayName2 = listMetadataBlocksResponse.then().extract().path("data[1].displayName"); + assertNotEquals(actualMetadataBlockDisplayName1, actualMetadataBlockDisplayName2); + assertThat(expectedOnlyDisplayedOnCreateMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName1)); + assertThat(expectedOnlyDisplayedOnCreateMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName2)); // Existent dataverse and returnDatasetFieldTypes=true listMetadataBlocksResponse = UtilIT.listMetadataBlocks(dataverseAlias, false, true, apiToken); @@ -731,7 +755,19 @@ public void testListMetadataBlocks() { listMetadataBlocksResponse.then().assertThat() .statusCode(OK.getStatusCode()) .body("data[0].fields", not(equalTo(null))) - .body("data.size()", equalTo(2)); + .body("data[1].fields", not(equalTo(null))) + .body("data[2].fields", not(equalTo(null))) + .body("data.size()", equalTo(3)); + + actualMetadataBlockDisplayName1 = listMetadataBlocksResponse.then().extract().path("data[0].displayName"); + actualMetadataBlockDisplayName2 = listMetadataBlocksResponse.then().extract().path("data[1].displayName"); + actualMetadataBlockDisplayName3 = listMetadataBlocksResponse.then().extract().path("data[2].displayName"); + assertNotEquals(actualMetadataBlockDisplayName1, actualMetadataBlockDisplayName2); + assertNotEquals(actualMetadataBlockDisplayName1, actualMetadataBlockDisplayName3); + assertNotEquals(actualMetadataBlockDisplayName2, actualMetadataBlockDisplayName3); + assertThat(expectedAllMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName1)); + assertThat(expectedAllMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName2)); + assertThat(expectedAllMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName3)); // Existent dataverse and onlyDisplayedOnCreate=true and returnDatasetFieldTypes=true listMetadataBlocksResponse = UtilIT.listMetadataBlocks(dataverseAlias, true, true, apiToken); @@ -739,8 +775,26 @@ public void testListMetadataBlocks() { listMetadataBlocksResponse.then().assertThat() .statusCode(OK.getStatusCode()) .body("data[0].fields", not(equalTo(null))) - .body("data[0].displayName", equalTo("Citation Metadata")) - .body("data.size()", equalTo(1)); + .body("data[1].fields", not(equalTo(null))) + .body("data.size()", equalTo(2)); + + actualMetadataBlockDisplayName1 = listMetadataBlocksResponse.then().extract().path("data[0].displayName"); + actualMetadataBlockDisplayName2 = listMetadataBlocksResponse.then().extract().path("data[1].displayName"); + assertNotEquals(actualMetadataBlockDisplayName1, actualMetadataBlockDisplayName2); + assertThat(expectedOnlyDisplayedOnCreateMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName1)); + assertThat(expectedOnlyDisplayedOnCreateMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName2)); + + // Check dataset fields for the updated input levels are retrieved + int geospatialMetadataBlockIndex = actualMetadataBlockDisplayName2.equals("Geospatial Metadata") ? 1 : 0; + + listMetadataBlocksResponse.then().assertThat() + .body(String.format("data[%d].fields.size()", geospatialMetadataBlockIndex), equalTo(2)); + + String actualMetadataField1 = listMetadataBlocksResponse.then().extract().path(String.format("data[%d].fields.geographicCoverage.name", geospatialMetadataBlockIndex)); + String actualMetadataField2 = listMetadataBlocksResponse.then().extract().path(String.format("data[%d].fields.country.name", geospatialMetadataBlockIndex)); + + assertNotNull(actualMetadataField1); + assertNotNull(actualMetadataField2); // User has no permissions on the requested dataverse Response createSecondUserResponse = UtilIT.createRandomUser(); @@ -753,7 +807,7 @@ public void testListMetadataBlocks() { listMetadataBlocksResponse = UtilIT.listMetadataBlocks(secondDataverseAlias, true, true, apiToken); listMetadataBlocksResponse.then().assertThat().statusCode(UNAUTHORIZED.getStatusCode()); } - + @Test public void testFeatureDataverse() throws Exception { @@ -762,42 +816,42 @@ public void testFeatureDataverse() throws Exception { Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); - + Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken); assertEquals(200, publishDataverse.getStatusCode()); - - Response createSubDVToBeFeatured = UtilIT.createSubDataverse(UtilIT.getRandomDvAlias() + "-feature", null, apiToken, dataverseAlias); + + Response createSubDVToBeFeatured = UtilIT.createSubDataverse(UtilIT.getRandomDvAlias() + "-feature", null, apiToken, dataverseAlias); String subDataverseAlias = UtilIT.getAliasFromResponse(createSubDVToBeFeatured); - + //publish a sub dataverse so that the owner will have something to feature - Response createSubDVToBePublished = UtilIT.createSubDataverse(UtilIT.getRandomDvAlias() + "-pub", null, apiToken, dataverseAlias); + Response createSubDVToBePublished = UtilIT.createSubDataverse(UtilIT.getRandomDvAlias() + "-pub", null, apiToken, dataverseAlias); assertEquals(201, createSubDVToBePublished.getStatusCode()); String subDataverseAliasPub = UtilIT.getAliasFromResponse(createSubDVToBePublished); publishDataverse = UtilIT.publishDataverseViaNativeApi(subDataverseAliasPub, apiToken); assertEquals(200, publishDataverse.getStatusCode()); - + //can't feature a dataverse that is unpublished Response featureSubDVResponseUnpublished = UtilIT.addFeaturedDataverse(dataverseAlias, subDataverseAlias, apiToken); featureSubDVResponseUnpublished.prettyPrint(); assertEquals(400, featureSubDVResponseUnpublished.getStatusCode()); featureSubDVResponseUnpublished.then().assertThat() .body(containsString("may not be featured")); - + //can't feature a dataverse you don't own Response featureSubDVResponseNotOwned = UtilIT.addFeaturedDataverse(dataverseAlias, "root", apiToken); featureSubDVResponseNotOwned.prettyPrint(); assertEquals(400, featureSubDVResponseNotOwned.getStatusCode()); featureSubDVResponseNotOwned.then().assertThat() .body(containsString("may not be featured")); - + //can't feature a dataverse that doesn't exist Response featureSubDVResponseNotExist = UtilIT.addFeaturedDataverse(dataverseAlias, "dummy-alias-sek-foobar-333", apiToken); featureSubDVResponseNotExist.prettyPrint(); assertEquals(400, featureSubDVResponseNotExist.getStatusCode()); featureSubDVResponseNotExist.then().assertThat() .body(containsString("Can't find dataverse collection")); - + publishDataverse = UtilIT.publishDataverseViaNativeApi(subDataverseAlias, apiToken); assertEquals(200, publishDataverse.getStatusCode()); @@ -805,32 +859,71 @@ public void testFeatureDataverse() throws Exception { Response featureSubDVResponse = UtilIT.addFeaturedDataverse(dataverseAlias, subDataverseAlias, apiToken); featureSubDVResponse.prettyPrint(); assertEquals(OK.getStatusCode(), featureSubDVResponse.getStatusCode()); - - + + Response getFeaturedDataverseResponse = UtilIT.getFeaturedDataverses(dataverseAlias, apiToken); getFeaturedDataverseResponse.prettyPrint(); assertEquals(OK.getStatusCode(), getFeaturedDataverseResponse.getStatusCode()); getFeaturedDataverseResponse.then().assertThat() .body("data[0]", equalTo(subDataverseAlias)); - + Response deleteFeaturedDataverseResponse = UtilIT.deleteFeaturedDataverses(dataverseAlias, apiToken); deleteFeaturedDataverseResponse.prettyPrint(); - + assertEquals(OK.getStatusCode(), deleteFeaturedDataverseResponse.getStatusCode()); deleteFeaturedDataverseResponse.then().assertThat() .body(containsString("Featured dataverses have been removed")); - + Response deleteSubCollectionResponse = UtilIT.deleteDataverse(subDataverseAlias, apiToken); deleteSubCollectionResponse.prettyPrint(); assertEquals(OK.getStatusCode(), deleteSubCollectionResponse.getStatusCode()); - + Response deleteSubCollectionPubResponse = UtilIT.deleteDataverse(subDataverseAliasPub, apiToken); deleteSubCollectionResponse.prettyPrint(); assertEquals(OK.getStatusCode(), deleteSubCollectionPubResponse.getStatusCode()); - + Response deleteCollectionResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); deleteCollectionResponse.prettyPrint(); assertEquals(OK.getStatusCode(), deleteCollectionResponse.getStatusCode()); } - + + @Test + public void testUpdateInputLevels() { + Response createUserResponse = UtilIT.createRandomUser(); + String apiToken = UtilIT.getApiTokenFromResponse(createUserResponse); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + // Update valid input levels + String[] testInputLevelNames = {"geographicCoverage", "country"}; + Response updateDataverseInputLevelsResponse = UtilIT.updateDataverseInputLevels(dataverseAlias, testInputLevelNames, apiToken); + updateDataverseInputLevelsResponse.then().assertThat() + .body("data.inputLevels[0].required", equalTo(true)) + .body("data.inputLevels[0].include", equalTo(true)) + .body("data.inputLevels[1].required", equalTo(true)) + .body("data.inputLevels[1].include", equalTo(true)) + .statusCode(OK.getStatusCode()); + String actualFieldTypeName1 = updateDataverseInputLevelsResponse.then().extract().path("data.inputLevels[0].datasetFieldTypeName"); + String actualFieldTypeName2 = updateDataverseInputLevelsResponse.then().extract().path("data.inputLevels[1].datasetFieldTypeName"); + assertNotEquals(actualFieldTypeName1, actualFieldTypeName2); + assertThat(testInputLevelNames, hasItemInArray(actualFieldTypeName1)); + assertThat(testInputLevelNames, hasItemInArray(actualFieldTypeName2)); + + // Update input levels with an invalid field type name + String[] testInvalidInputLevelNames = {"geographicCoverage", "invalid1"}; + updateDataverseInputLevelsResponse = UtilIT.updateDataverseInputLevels(dataverseAlias, testInvalidInputLevelNames, apiToken); + updateDataverseInputLevelsResponse.then().assertThat() + .body("message", equalTo("Invalid dataset field type name: invalid1")) + .statusCode(BAD_REQUEST.getStatusCode()); + + // Update invalid empty input levels + testInputLevelNames = new String[]{}; + updateDataverseInputLevelsResponse = UtilIT.updateDataverseInputLevels(dataverseAlias, testInputLevelNames, apiToken); + updateDataverseInputLevelsResponse.prettyPrint(); + updateDataverseInputLevelsResponse.then().assertThat() + .body("message", equalTo("Error while updating dataverse input levels: Input level list cannot be null or empty")) + .statusCode(INTERNAL_SERVER_ERROR.getStatusCode()); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 4326250a157..507c9b302b3 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3939,4 +3939,19 @@ static Response requestGlobusUploadPaths(Integer datasetId, JsonObject body, Str .post("/api/datasets/" + datasetId + "/requestGlobusUploadPaths"); } + static Response updateDataverseInputLevels(String dataverseAlias, String[] inputLevelNames, String apiToken) { + JsonArrayBuilder contactArrayBuilder = Json.createArrayBuilder(); + for(String inputLevelName : inputLevelNames) { + contactArrayBuilder.add(Json.createObjectBuilder() + .add("datasetFieldTypeName", inputLevelName) + .add("required", true) + .add("include", true) + ); + } + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(contactArrayBuilder.build().toString()) + .contentType(ContentType.JSON) + .put("/api/dataverses/" + dataverseAlias + "/inputLevels"); + } } From 75a1e8ce1f815cd3d10dd36a57f4f2b20f19e97f Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Mon, 13 May 2024 16:54:18 -0400 Subject: [PATCH 69/81] Typo --- doc/release-notes/6.2-release-notes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/6.2-release-notes.md b/doc/release-notes/6.2-release-notes.md index b2c05a0dafe..0f076d32cf8 100644 --- a/doc/release-notes/6.2-release-notes.md +++ b/doc/release-notes/6.2-release-notes.md @@ -419,7 +419,7 @@ As noted above, deployment of the war file might take several minutes due a data 6\. For installations with internationalization: -- Please remember to update translations via [Dataverse language packs](https://github.com/GlobalDataverseCommunityConsortium/dataverse-language-packs) +- Please remember to update translations via [Dataverse language packs](https://github.com/GlobalDataverseCommunityConsortium/dataverse-language-packs). 7\. Restart Payara From c40838c6b5005596cb284a4ae2f9f5bba59f6560 Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Tue, 14 May 2024 16:23:51 +0200 Subject: [PATCH 70/81] change the default to false for UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED --- .../10116-incomplete-metadata-label-setting.md | 2 +- doc/sphinx-guides/source/installation/config.rst | 13 ++++++------- .../java/edu/harvard/iq/dataverse/DatasetPage.java | 2 +- .../java/edu/harvard/iq/dataverse/FilePage.java | 2 +- .../iq/dataverse/search/SolrSearchResult.java | 2 +- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/doc/release-notes/10116-incomplete-metadata-label-setting.md b/doc/release-notes/10116-incomplete-metadata-label-setting.md index 970f2c922ae..769100c3804 100644 --- a/doc/release-notes/10116-incomplete-metadata-label-setting.md +++ b/doc/release-notes/10116-incomplete-metadata-label-setting.md @@ -1 +1 @@ -Bug fixed for the ``incomplete metadata`` label being shown for published dataset with incomplete metadata in certain scenarios. This label will now only be shown for draft versions of such datasets. This label can also be made visible for published datasets with the new option ``dataverse.ui.show-validity-label-when-published``, and will be only shown on datasets with incomplete metadata that the logged-in user can edit. +Bug fixed for the ``incomplete metadata`` label being shown for published dataset with incomplete metadata in certain scenarios. This label will now be shown for draft versions of such datasets and published datasets that the user can edit. This label can also be made invisible for published datasets (regardless of edit rights) with the new option ``dataverse.ui.show-validity-label-when-published`` set to `false`. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index c8959912220..f1ce1aff59f 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2901,15 +2901,14 @@ dataverse.ui.show-validity-label-when-published +++++++++++++++++++++++++++++++++++++++++++++++ Even when you do not allow incomplete metadata to be saved in dataverse, some metadata may end up being incomplete, e.g., after making a metadata field mandatory. Datasets where that field is -not filled out, become incomplete, and therefore can be labeled with the ``incomplete metadata`` label. By default, this label is only shown for draft datasets. However, in some situations, you -may want to show the label also for published datasets. For example, when an admin needs to have an overview of the datasets that are no longer valid after a metadata fields update, -enabling this option will show the ``incomplete metadata`` labels on published datasets, but only to the users that can edit the specific datasets. Note that you need to reindex the datasets -after changing the metadata definitions. Reindexing will update the labels and other dataset information according to the new situation. +not filled out, become incomplete, and therefore can be labeled with the ``incomplete metadata`` label. By default, this label is only shown for draft datasets and published datasets that the +user can edit. This option can be disabled by setting it to ``false`` where only draft datasets with incomplete metadata will have that label. When disabled, all published dataset will not have +that label. Note that you need to reindex the datasets after changing the metadata definitions. Reindexing will update the labels and other dataset information according to the new situation. -When enabled, published datasets with incomplete metadata will have an ``incomplete metadata`` label attached to them, but only for the datasets that the user can edit. You can list these datasets, -for example, with the validity of metadata filter shown in "My Data" page that can be turned on by enabling the :ref:`dataverse.ui.show-validity-filter` option. +When enabled (by default), published datasets with incomplete metadata will have an ``incomplete metadata`` label attached to them, but only for the datasets that the user can edit. +You can list these datasets, for example, with the validity of metadata filter shown in "My Data" page that can be turned on by enabling the :ref:`dataverse.ui.show-validity-filter` option. -Defaults to ``false``. +Defaults to ``true``. Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_API_SHOW_LABEL_FOR_INCOMPLETE_WHEN_PUBLISHED``. Will accept ``[tT][rR][uU][eE]|1|[oO][nN]`` as "true" expressions. diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 2afe3b33981..d9cb10026a3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -2296,7 +2296,7 @@ private void displayPublishMessage(){ public boolean isValid() { if (valid == null) { - if (workingVersion.isDraft() || (canUpdateDataset() && JvmSettings.UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED.lookupOptional(Boolean.class).orElse(false))) { + if (workingVersion.isDraft() || (canUpdateDataset() && JvmSettings.UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED.lookupOptional(Boolean.class).orElse(true))) { valid = workingVersion.isValid(); } else { valid = true; diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index 52e1ffd825a..9889d23cf55 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -320,7 +320,7 @@ private void displayPublishMessage(){ public boolean isValid() { if (valid == null) { final DatasetVersion workingVersion = fileMetadata.getDatasetVersion(); - if (workingVersion.isDraft() || (canUpdateDataset() && JvmSettings.UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED.lookupOptional(Boolean.class).orElse(false))) { + if (workingVersion.isDraft() || (canUpdateDataset() && JvmSettings.UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED.lookupOptional(Boolean.class).orElse(true))) { valid = workingVersion.isValid(); } else { valid = true; diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java index bd22beecc8e..507f7815218 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java @@ -1268,7 +1268,7 @@ public boolean isValid(Predicate canUpdateDataset) { if (this.isDraftState()) { return false; } - if (!JvmSettings.UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED.lookupOptional(Boolean.class).orElse(false)) { + if (!JvmSettings.UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED.lookupOptional(Boolean.class).orElse(true)) { return true; } return !canUpdateDataset.test(this); From 1073333d2eaaae786f1dea16d76a588c2a58a7b2 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 15 May 2024 19:37:45 -0400 Subject: [PATCH 71/81] Populated the identifier column in the Language CV, to make future updates easier. Used the first 3-letter code as the identifier for each of the 185 supported languages. #8243 --- scripts/api/data/metadatablocks/citation.tsv | 370 +++++++++---------- 1 file changed, 185 insertions(+), 185 deletions(-) diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index 35cac820f7f..82da5a12eaf 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -138,189 +138,189 @@ authorIdentifierScheme DAI 5 authorIdentifierScheme ResearcherID 6 authorIdentifierScheme ScopusID 7 - language Abkhaz 0 abk ab - language Afar 1 aar aa - language Afrikaans 2 afr af - language Akan 3 aka ak - language Albanian 4 sqi alb sq - language Amharic 5 amh am - language Arabic 6 ara ar - language Aragonese 7 arg an - language Armenian 8 hye arm hy - language Assamese 9 asm as - language Avaric 10 ava av - language Avestan 11 ave ae - language Aymara 12 aym ay - language Azerbaijani 13 aze az - language Bambara 14 bam bm - language Bashkir 15 bak ba - language Basque 16 eus baq eu - language Belarusian 17 bel be - language Bengali, Bangla 18 ben bn Bengali Bangla - language Bihari 19 bih bh - language Bislama 20 bis bi - language Bosnian 21 bos bs - language Breton 22 bre br - language Bulgarian 23 bul bg - language Burmese 24 mya bur my - language Catalan,Valencian 25 cat ca Catalan Valencian - language Chamorro 26 cha ch - language Chechen 27 che ce - language Chichewa, Chewa, Nyanja 28 nya ny Chichewa Chewa Nyanja - language Chinese 29 zho chi zh - language Chuvash 30 chv cv - language Cornish 31 cor kw - language Corsican 32 cos co - language Cree 33 cre cr - language Croatian 34 hrv src hr - language Czech 35 ces cze cs - language Danish 36 dan da - language Divehi, Dhivehi, Maldivian 37 div dv Divehi Dhivehi Maldivian - language Dutch 38 nld dut nl - language Dzongkha 39 dzo dz - language English 40 eng en - language Esperanto 41 epo eo - language Estonian 42 est et - language Ewe 43 ewe ee - language Faroese 44 fao fo - language Fijian 45 fij fj - language Finnish 46 fin fi - language French 47 fra fre fr - language Fula, Fulah, Pulaar, Pular 48 ful ff Fula Fulah Pulaar Pular - language Galician 49 glg gl - language Georgian 50 kat geo ka - language German 51 deu ger de - language Greek (modern) 52 ell gre el Greek - language Guaraní 53 grn gn - language Gujarati 54 guj gu - language Haitian, Haitian Creole 55 hat ht Haitian Haitian Creole - language Hausa 56 hau ha - language Hebrew (modern) 57 heb he - language Herero 58 her hz - language Hindi 59 hin hi - language Hiri Motu 60 hmo ho - language Hungarian 61 hun hu - language Interlingua 62 ina ia - language Indonesian 63 ind id - language Interlingue 64 ile ie - language Irish 65 gle ga - language Igbo 66 ibo ig - language Inupiaq 67 ipk ik - language Ido 68 ido io - language Icelandic 69 isl ice is - language Italian 70 ita it - language Inuktitut 71 iku iu - language Japanese 72 jpn ja - language Javanese 73 jav jv - language Kalaallisut, Greenlandic 74 kal kl Kalaallisut Greenlandic - language Kannada 75 kan kn - language Kanuri 76 kau kr - language Kashmiri 77 kas ks - language Kazakh 78 kaz kk - language Khmer 79 khm km - language Kikuyu, Gikuyu 80 kik ki Kikuyu Gikuyu - language Kinyarwanda 81 kin rw - language Kyrgyz 82 kir ky Kirghiz - language Komi 83 kom kv - language Kongo 84 kon kg - language Korean 85 kor ko - language Kurdish 86 kur ku - language Kwanyama, Kuanyama 87 kua kj Kwanyama Kuanyama - language Latin 88 lat la - language Luxembourgish, Letzeburgesch 89 ltz lb Luxembourgish Letzeburgesch - language Ganda 90 lug lg - language Limburgish, Limburgan, Limburger 91 lim li Limburgish Limburgan Limburger - language Lingala 92 lin ln - language Lao 93 lao lo - language Lithuanian 94 lit lt - language Luba-Katanga 95 lub lu - language Latvian 96 lav lv - language Manx 97 glv gv - language Macedonian 98 mkd mac mk - language Malagasy 99 mlg mg - language Malay 100 msa may ms - language Malayalam 101 mal ml - language Maltese 102 mlt mt - language Māori 103 mri mao mi Maori - language Marathi (Marāṭhī) 104 mar mr - language Marshallese 105 mah mh - language Mixtepec Mixtec 106 mix - language Mongolian 107 mon mn - language Nauru 108 nau na - language Navajo, Navaho 109 nav nv Navajo Navaho - language Northern Ndebele 110 nde nd - language Nepali 111 nep ne - language Ndonga 112 ndo ng - language Norwegian Bokmål 113 nob nb - language Norwegian Nynorsk 114 nno nn - language Norwegian 115 nor no - language Nuosu 116 iii ii Sichuan Yi - language Southern Ndebele 117 nbl nr - language Occitan 118 oci oc - language Ojibwe, Ojibwa 119 oji oj Ojibwe Ojibwa - language Old Church Slavonic,Church Slavonic,Old Bulgarian 120 chu cu - language Oromo 121 orm om - language Oriya 122 ori or - language Ossetian, Ossetic 123 oss os Ossetian Ossetic - language Panjabi, Punjabi 124 pan pa Panjabi Punjabi - language Pāli 125 pli pi - language Persian (Farsi) 126 fas per fa - language Polish 127 pol pl - language Pashto, Pushto 128 pus ps Pashto Pushto - language Portuguese 129 por pt - language Quechua 130 que qu - language Romansh 131 roh rm - language Kirundi 132 run rn - language Romanian 133 ron rum ro - language Russian 134 rus ru - language Sanskrit (Saṁskṛta) 135 san sa - language Sardinian 136 srd sc - language Sindhi 137 snd sd - language Northern Sami 138 sme se - language Samoan 139 smo sm - language Sango 140 sag sg - language Serbian 141 srp scc sr - language Scottish Gaelic, Gaelic 142 gla gd Scottish Gaelic Gaelic - language Shona 143 sna sn - language Sinhala, Sinhalese 144 sin si Sinhala Sinhalese - language Slovak 145 slk slo sk - language Slovene 146 slv sl Slovenian - language Somali 147 som so - language Southern Sotho 148 sot st - language Spanish, Castilian 149 spa es Spanish Castilian - language Sundanese 150 sun su - language Swahili 151 swa sw - language Swati 152 ssw ss - language Swedish 153 swe sv - language Tamil 154 tam ta - language Telugu 155 tel te - language Tajik 156 tgk tg - language Thai 157 tha th - language Tigrinya 158 tir ti - language Tibetan Standard, Tibetan, Central 159 bod tib bo Tibetan Standard Tibetan Central - language Turkmen 160 tuk tk - language Tagalog 161 tgl tl - language Tswana 162 tsn tn - language Tonga (Tonga Islands) 163 ton to Tonga - language Turkish 164 tur tr - language Tsonga 165 tso ts - language Tatar 166 tat tt - language Twi 167 twi tw - language Tahitian 168 tah ty - language Uyghur, Uighur 169 uig ug Uyghur Uighur - language Ukrainian 170 ukr uk - language Urdu 171 urd ur - language Uzbek 172 uzb uz - language Venda 173 ven ve - language Vietnamese 174 vie vi - language Volapük 175 vol vo - language Walloon 176 wln wa - language Welsh 177 cym wel cy - language Wolof 178 wol wo - language Western Frisian 179 fry fy - language Xhosa 180 xho xh - language Yiddish 181 yid yi - language Yoruba 182 yor yo - language Zhuang, Chuang 183 zha za Zhuang Chuang - language Zulu 184 zul zu + language Abkhaz abk 0 abk ab + language Afar aar 1 aar aa + language Afrikaans afr 2 afr af + language Akan aka 3 aka ak + language Albanian sqi 4 sqi alb sq + language Amharic amh 5 amh am + language Arabic ara 6 ara ar + language Aragonese arg 7 arg an + language Armenian hye 8 hye arm hy + language Assamese asm 9 asm as + language Avaric ava 10 ava av + language Avestan ave 11 ave ae + language Aymara aym 12 aym ay + language Azerbaijani aze 13 aze az + language Bambara bam 14 bam bm + language Bashkir bak 15 bak ba + language Basque eus 16 eus baq eu + language Belarusian bel 17 bel be + language Bengali, Bangla ben 18 ben bn Bengali Bangla + language Bihari bih 19 bih bh + language Bislama bis 20 bis bi + language Bosnian bos 21 bos bs + language Breton bre 22 bre br + language Bulgarian bul 23 bul bg + language Burmese mya 24 mya bur my + language Catalan,Valencian cat 25 cat ca Catalan Valencian + language Chamorro cha 26 cha ch + language Chechen che 27 che ce + language Chichewa, Chewa, Nyanja nya 28 nya ny Chichewa Chewa Nyanja + language Chinese zho 29 zho chi zh + language Chuvash chv 30 chv cv + language Cornish cor 31 cor kw + language Corsican cos 32 cos co + language Cree cre 33 cre cr + language Croatian hrv 34 hrv src hr + language Czech ces 35 ces cze cs + language Danish dan 36 dan da + language Divehi, Dhivehi, Maldivian div 37 div dv Divehi Dhivehi Maldivian + language Dutch nld 38 nld dut nl + language Dzongkha dzo 39 dzo dz + language English eng 40 eng en + language Esperanto epo 41 epo eo + language Estonian est 42 est et + language Ewe ewe 43 ewe ee + language Faroese fao 44 fao fo + language Fijian fij 45 fij fj + language Finnish fin 46 fin fi + language French fra 47 fra fre fr + language Fula, Fulah, Pulaar, Pular ful 48 ful ff Fula Fulah Pulaar Pular + language Galician glg 49 glg gl + language Georgian kat 50 kat geo ka + language German deu 51 deu ger de + language Greek (modern) ell 52 ell gre el Greek + language Guaraní grn 53 grn gn + language Gujarati guj 54 guj gu + language Haitian, Haitian Creole hat 55 hat ht Haitian Haitian Creole + language Hausa hau 56 hau ha + language Hebrew (modern) heb 57 heb he + language Herero her 58 her hz + language Hindi hin 59 hin hi + language Hiri Motu hmo 60 hmo ho + language Hungarian hun 61 hun hu + language Interlingua ina 62 ina ia + language Indonesian ind 63 ind id + language Interlingue ile 64 ile ie + language Irish gle 65 gle ga + language Igbo ibo 66 ibo ig + language Inupiaq ipk 67 ipk ik + language Ido ido 68 ido io + language Icelandic isl 69 isl ice is + language Italian ita 70 ita it + language Inuktitut iku 71 iku iu + language Japanese jpn 72 jpn ja + language Javanese jav 73 jav jv + language Kalaallisut, Greenlandic kal 74 kal kl Kalaallisut Greenlandic + language Kannada kan 75 kan kn + language Kanuri kau 76 kau kr + language Kashmiri kas 77 kas ks + language Kazakh kaz 78 kaz kk + language Khmer khm 79 khm km + language Kikuyu, Gikuyu kik 80 kik ki Kikuyu Gikuyu + language Kinyarwanda kin 81 kin rw + language Kyrgyz kir 82 kir ky Kirghiz + language Komi kom 83 kom kv + language Kongo kon 84 kon kg + language Korean kor 85 kor ko + language Kurdish kur 86 kur ku + language Kwanyama, Kuanyama kua 87 kua kj Kwanyama Kuanyama + language Latin lat 88 lat la + language Luxembourgish, Letzeburgesch ltz 89 ltz lb Luxembourgish Letzeburgesch + language Ganda lug 90 lug lg + language Limburgish, Limburgan, Limburger lim 91 lim li Limburgish Limburgan Limburger + language Lingala lin 92 lin ln + language Lao lao 93 lao lo + language Lithuanian lit 94 lit lt + language Luba-Katanga lub 95 lub lu + language Latvian lav 96 lav lv + language Manx glv 97 glv gv + language Macedonian mkd 98 mkd mac mk + language Malagasy mlg 99 mlg mg + language Malay msa 100 msa may ms + language Malayalam mal 101 mal ml + language Maltese mlt 102 mlt mt + language Māori mri 103 mri mao mi Maori + language Marathi (Marāṭhī) mar 104 mar mr + language Marshallese mah 105 mah mh + language Mixtepec Mixtec mix 106 mix + language Mongolian mon 107 mon mn + language Nauru nau 108 nau na + language Navajo, Navaho nav 109 nav nv Navajo Navaho + language Northern Ndebele nde 110 nde nd + language Nepali nep 111 nep ne + language Ndonga ndo 112 ndo ng + language Norwegian Bokmål nob 113 nob nb + language Norwegian Nynorsk nno 114 nno nn + language Norwegian nor 115 nor no + language Nuosu iii 116 iii ii Sichuan Yi + language Southern Ndebele nbl 117 nbl nr + language Occitan oci 118 oci oc + language Ojibwe, Ojibwa oji 119 oji oj Ojibwe Ojibwa + language Old Church Slavonic,Church Slavonic,Old Bulgarian chu 120 chu cu + language Oromo orm 121 orm om + language Oriya ori 122 ori or + language Ossetian, Ossetic oss 123 oss os Ossetian Ossetic + language Panjabi, Punjabi pan 124 pan pa Panjabi Punjabi + language Pāli pli 125 pli pi + language Persian (Farsi) fas 126 fas per fa + language Polish pol 127 pol pl + language Pashto, Pushto pus 128 pus ps Pashto Pushto + language Portuguese por 129 por pt + language Quechua que 130 que qu + language Romansh roh 131 roh rm + language Kirundi run 132 run rn + language Romanian ron 133 ron rum ro + language Russian rus 134 rus ru + language Sanskrit (Saṁskṛta) san 135 san sa + language Sardinian srd 136 srd sc + language Sindhi snd 137 snd sd + language Northern Sami sme 138 sme se + language Samoan smo 139 smo sm + language Sango sag 140 sag sg + language Serbian srp 141 srp scc sr + language Scottish Gaelic, Gaelic gla 142 gla gd Scottish Gaelic Gaelic + language Shona sna 143 sna sn + language Sinhala, Sinhalese sin 144 sin si Sinhala Sinhalese + language Slovak slk 145 slk slo sk + language Slovene slv 146 slv sl Slovenian + language Somali som 147 som so + language Southern Sotho sot 148 sot st + language Spanish, Castilian spa 149 spa es Spanish Castilian + language Sundanese sun 150 sun su + language Swahili swa 151 swa sw + language Swati ssw 152 ssw ss + language Swedish swe 153 swe sv + language Tamil tam 154 tam ta + language Telugu tel 155 tel te + language Tajik tgk 156 tgk tg + language Thai tha 157 tha th + language Tigrinya tir 158 tir ti + language Tibetan Standard, Tibetan, Central bod 159 bod tib bo Tibetan Standard Tibetan Central + language Turkmen tuk 160 tuk tk + language Tagalog tgl 161 tgl tl + language Tswana tsn 162 tsn tn + language Tonga (Tonga Islands) ton 163 ton to Tonga + language Turkish tur 164 tur tr + language Tsonga tso 165 tso ts + language Tatar tat 166 tat tt + language Twi twi 167 twi tw + language Tahitian tah 168 tah ty + language Uyghur, Uighur uig 169 uig ug Uyghur Uighur + language Ukrainian ukr 170 ukr uk + language Urdu urd 171 urd ur + language Uzbek uzb 172 uzb uz + language Venda ven 173 ven ve + language Vietnamese vie 174 vie vi + language Volapük vol 175 vol vo + language Walloon wln 176 wln wa + language Welsh cym 177 cym wel cy + language Wolof wol 178 wol wo + language Western Frisian fry 179 fry fy + language Xhosa xho 180 xho xh + language Yiddish yid 181 yid yi + language Yoruba yor 182 yor yo + language Zhuang, Chuang zha 183 zha za Zhuang Chuang + language Zulu zul 184 zul zu language Not applicable 185 From 91af39fd04639b1d2070af42212b839b239b6cc1 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 16 May 2024 10:03:22 -0400 Subject: [PATCH 72/81] explicitly remove fms from dv --- .../command/impl/DestroyDatasetCommand.java | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java index 877f3b81d7e..be3e28029e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.DataverseRole; @@ -64,17 +65,17 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { throw new PermissionException("Destroy can only be called by superusers.", this, Collections.singleton(Permission.DeleteDatasetDraft), doomed); } + Dataset managedDoomed = ctxt.em().merge(doomed); // If there is a dedicated thumbnail DataFile, it needs to be reset // explicitly, or we'll get a constraint violation when deleting: - doomed.setThumbnailFile(null); - final Dataset managedDoomed = ctxt.em().merge(doomed); - + managedDoomed.setThumbnailFile(null); + // files need to iterate through and remove 'by hand' to avoid // optimistic lock issues... (plus the physical files need to be // deleted too!) - - Iterator dfIt = doomed.getFiles().iterator(); + DatasetVersion dv = managedDoomed.getLatestVersion(); + Iterator dfIt = managedDoomed.getFiles().iterator(); while (dfIt.hasNext()){ DataFile df = dfIt.next(); // Gather potential Solr IDs of files. As of this writing deaccessioned files are never indexed. @@ -85,32 +86,29 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { ctxt.engine().submit(new DeleteDataFileCommand(df, getRequest(), true)); dfIt.remove(); } - - //also, lets delete the uploaded thumbnails! - if (!doomed.isHarvested()) { - deleteDatasetLogo(doomed); - } + dv.setFileMetadatas(null); // ASSIGNMENTS - for (RoleAssignment ra : ctxt.roles().directRoleAssignments(doomed)) { + for (RoleAssignment ra : ctxt.roles().directRoleAssignments(managedDoomed)) { ctxt.em().remove(ra); } // ROLES - for (DataverseRole ra : ctxt.roles().findByOwnerId(doomed.getId())) { + for (DataverseRole ra : ctxt.roles().findByOwnerId(managedDoomed.getId())) { ctxt.em().remove(ra); } - if (!doomed.isHarvested()) { - GlobalId pid = doomed.getGlobalId(); + if (!managedDoomed.isHarvested()) { + //also, lets delete the uploaded thumbnails! + deleteDatasetLogo(managedDoomed); + // and remove the PID (perhaps should be after the remove in case that causes a roll-back?) + GlobalId pid = managedDoomed.getGlobalId(); if (pid != null) { PidProvider pidProvider = PidUtil.getPidProvider(pid.getProviderId()); try { - if (pidProvider.alreadyRegistered(doomed)) { - pidProvider.deleteIdentifier(doomed); - for (DataFile df : doomed.getFiles()) { - pidProvider.deleteIdentifier(df); - } + if (pidProvider.alreadyRegistered(managedDoomed)) { + pidProvider.deleteIdentifier(managedDoomed); + //Files are handled in DeleteDataFileCommand } } catch (Exception e) { logger.log(Level.WARNING, "Identifier deletion was not successful:", e.getMessage()); @@ -120,18 +118,20 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { toReIndex = managedDoomed.getOwner(); - // dataset - ctxt.em().remove(managedDoomed); - // add potential Solr IDs of datasets to list for deletion - String solrIdOfPublishedDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + doomed.getId(); + String solrIdOfPublishedDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + managedDoomed.getId(); datasetAndFileSolrIdsToDelete.add(solrIdOfPublishedDatasetVersion); - String solrIdOfDraftDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + doomed.getId() + IndexServiceBean.draftSuffix; + String solrIdOfDraftDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + managedDoomed.getId() + IndexServiceBean.draftSuffix; datasetAndFileSolrIdsToDelete.add(solrIdOfDraftDatasetVersion); String solrIdOfDraftDatasetVersionPermission = solrIdOfDraftDatasetVersion + IndexServiceBean.discoverabilityPermissionSuffix; datasetAndFileSolrIdsToDelete.add(solrIdOfDraftDatasetVersionPermission); - String solrIdOfDeaccessionedDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + doomed.getId() + IndexServiceBean.deaccessionedSuffix; + String solrIdOfDeaccessionedDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + managedDoomed.getId() + IndexServiceBean.deaccessionedSuffix; datasetAndFileSolrIdsToDelete.add(solrIdOfDeaccessionedDatasetVersion); + + // dataset + ctxt.em().remove(managedDoomed); + + } @Override From 522bee131a9f59c7f91268316a4fc80a38e55ad9 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 17 May 2024 15:21:55 -0400 Subject: [PATCH 73/81] catch null and other dvobject types --- .../iq/dataverse/search/SearchIncludeFragment.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 9be6c34aa8b..f84bd0eae87 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -1495,7 +1495,20 @@ private DataverseRequest getDataverseRequest() { } public boolean isValid(SolrSearchResult result) { + Long id = result.getEntityId(); + + DvObject obj = dvObjectService.findDvObject(id); + if(obj != null && obj instanceof Dataset) { + return result.isValid(x -> permissionsWrapper.canUpdateDataset(getDataverseRequest(), datasetService.find(x.getEntityId()))); + } else if(obj != null && obj instanceof DataFile) { + logger.info("Object is a DataFile"); + } else if(obj != null && obj instanceof Dataverse) { + logger.info("Object is a Dataverse"); + } else if(obj == null) { + logger.info("Object is null"); + } + return result.isValid(x -> true); } public enum SortOrder { From a0c97a763f83fe44fc34f7af2087d8abb7a3ce8b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 17 May 2024 16:49:10 -0400 Subject: [PATCH 74/81] cleanup --- .../harvard/iq/dataverse/search/SearchIncludeFragment.java | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index f84bd0eae87..b9d2181656c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -1501,13 +1501,8 @@ public boolean isValid(SolrSearchResult result) { if(obj != null && obj instanceof Dataset) { return result.isValid(x -> permissionsWrapper.canUpdateDataset(getDataverseRequest(), datasetService.find(x.getEntityId()))); - } else if(obj != null && obj instanceof DataFile) { - logger.info("Object is a DataFile"); - } else if(obj != null && obj instanceof Dataverse) { - logger.info("Object is a Dataverse"); - } else if(obj == null) { - logger.info("Object is null"); } + logger.fine("isValid called for dvObject that is null (or not a dataset), id: " + id + "This can occur if a dataset is deleted while a search is in progress"); return result.isValid(x -> true); } From 788b0463f6228106db0968ad07c727daa44cdbfd Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 21 May 2024 08:52:02 -0400 Subject: [PATCH 75/81] improvement from @ErykKul --- .../search/SearchIncludeFragment.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index b9d2181656c..ee0f74564f4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -1495,15 +1495,15 @@ private DataverseRequest getDataverseRequest() { } public boolean isValid(SolrSearchResult result) { - Long id = result.getEntityId(); - - DvObject obj = dvObjectService.findDvObject(id); - if(obj != null && obj instanceof Dataset) { - - return result.isValid(x -> permissionsWrapper.canUpdateDataset(getDataverseRequest(), datasetService.find(x.getEntityId()))); - } - logger.fine("isValid called for dvObject that is null (or not a dataset), id: " + id + "This can occur if a dataset is deleted while a search is in progress"); - return result.isValid(x -> true); + return result.isValid(x -> { + Long id = x.getEntityId(); + DvObject obj = dvObjectService.findDvObject(id); + if(obj != null && obj instanceof Dataset) { + return permissionsWrapper.canUpdateDataset(getDataverseRequest(), (Dataset) obj); + } + logger.fine("isValid called for dvObject that is null (or not a dataset), id: " + id + "This can occur if a dataset is deleted while a search is in progress"); + return true; + }); } public enum SortOrder { From ba0cdaf347e83042e1921ae3c6cd55d93946f50a Mon Sep 17 00:00:00 2001 From: luddaniel <83018819+luddaniel@users.noreply.github.com> Date: Tue, 21 May 2024 15:39:33 +0200 Subject: [PATCH 76/81] #9739 - URLValidator now allows two slashes in the path component of the URL (#9750) * #9739 - URLValidator now allows two slashes in the path component of the URL * #9750 Adding a release note --------- Co-authored-by: jeromeroucou --- doc/release-notes/9739-url-validator.md | 7 +++++++ .../edu/harvard/iq/dataverse/validation/URLValidator.java | 2 +- .../harvard/iq/dataverse/validation/URLValidatorTest.java | 4 +++- 3 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 doc/release-notes/9739-url-validator.md diff --git a/doc/release-notes/9739-url-validator.md b/doc/release-notes/9739-url-validator.md new file mode 100644 index 00000000000..ad149c54459 --- /dev/null +++ b/doc/release-notes/9739-url-validator.md @@ -0,0 +1,7 @@ +## Release Highlights + +### URL validation is more permissive + +Url validation now allows two slashes in the path component of the URL. (#9750) +Among other things, this allows metadata fields of `url` type to be filled with more complex url such as https://archive.softwareheritage.org/browse/directory/561bfe6698ca9e58b552b4eb4e56132cac41c6f9/?origin_url=https://github.com/gem-pasteur/macsyfinder&revision=868637fce184865d8e0436338af66a2648e8f6e1&snapshot=1bde3cb370766b10132c4e004c7cb377979928d1 + diff --git a/src/main/java/edu/harvard/iq/dataverse/validation/URLValidator.java b/src/main/java/edu/harvard/iq/dataverse/validation/URLValidator.java index 285f34d3f8c..8fde76d84e1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/validation/URLValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/validation/URLValidator.java @@ -41,7 +41,7 @@ public static boolean isURLValid(String value) { * @return true when valid (null is also valid) or false */ public static boolean isURLValid(String value, String[] schemes) { - UrlValidator urlValidator = new UrlValidator(schemes); + UrlValidator urlValidator = new UrlValidator(schemes, UrlValidator.ALLOW_2_SLASHES); return value == null || urlValidator.isValid(value); } diff --git a/src/test/java/edu/harvard/iq/dataverse/validation/URLValidatorTest.java b/src/test/java/edu/harvard/iq/dataverse/validation/URLValidatorTest.java index 8c29b609c9b..a344d6a600d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/validation/URLValidatorTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/validation/URLValidatorTest.java @@ -29,7 +29,9 @@ public static Stream stdUrlExamples() { Arguments.of(true, "http://foobar.com:9101"), Arguments.of(true, "ftp://user@foobar.com"), Arguments.of(false, "cnn.com"), - Arguments.of(false, "smb://user@foobar.com") + Arguments.of(false, "smb://user@foobar.com"), + // case of a real permalink that requires UrlValidator.ALLOW_2_SLASHES + Arguments.of(true, "https://archive.softwareheritage.org/swh:1:dir:561bfe6698ca9e58b552b4eb4e56132cac41c6f9;origin=https://github.com/gem-pasteur/macsyfinder;visit=swh:1:snp:1bde3cb370766b10132c4e004c7cb377979928d1;anchor=swh:1:rev:868637fce184865d8e0436338af66a2648e8f6e1") ); } From 36379c61b019bee744774c1724f3e2a0d04ae982 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20ROUCOU?= Date: Wed, 22 May 2024 17:29:30 +0200 Subject: [PATCH 77/81] fix change on message format --- .../java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java index b5885a50a5a..3cd03abeb38 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java @@ -19,7 +19,7 @@ public class DataRetrieverApiIT { - private static final String ERR_MSG_FORMAT = "{\"success\":false,\"error_message\":\"%s\"}"; + private static final String ERR_MSG_FORMAT = "{\n \"success\": false,\n \"error_message\": \"%s\"\n}"; @BeforeAll public static void setUpClass() { From 959ee860c8b2659f296b72484bfef65c45ddd903 Mon Sep 17 00:00:00 2001 From: luddaniel <83018819+luddaniel@users.noreply.github.com> Date: Wed, 22 May 2024 20:03:09 +0200 Subject: [PATCH 78/81] CVOC : allow flexible params in retrievalUri (Ontoportal integration) (#10404) * 9276 - CVOC : allow flexible params in retrievalUri (Ontoportal integration) * adding release note * Fixed EditDDIIT.testUpdateVariableMetadata issue * Update release note with suggested change --- ...ow-flexible-params-in-retrievaluri-cvoc.md | 14 +++++++ .../iq/dataverse/DatasetFieldServiceBean.java | 40 +++++++++++++++---- .../edu/harvard/iq/dataverse/DatasetPage.java | 6 --- .../iq/dataverse/EjbDataverseEngine.java | 10 ++++- .../engine/command/CommandContext.java | 3 ++ .../impl/AbstractCreateDatasetCommand.java | 15 +++---- .../command/impl/AbstractDatasetCommand.java | 15 ++++++- .../impl/CreateDatasetVersionCommand.java | 3 +- .../impl/UpdateDatasetVersionCommand.java | 17 +++++--- .../iq/dataverse/util/json/JSONLDUtil.java | 1 - .../iq/dataverse/util/json/JsonParser.java | 2 - .../dataverse/engine/TestCommandContext.java | 5 +++ 12 files changed, 95 insertions(+), 36 deletions(-) create mode 100644 doc/release-notes/9276-allow-flexible-params-in-retrievaluri-cvoc.md diff --git a/doc/release-notes/9276-allow-flexible-params-in-retrievaluri-cvoc.md b/doc/release-notes/9276-allow-flexible-params-in-retrievaluri-cvoc.md new file mode 100644 index 00000000000..5e18007e8ae --- /dev/null +++ b/doc/release-notes/9276-allow-flexible-params-in-retrievaluri-cvoc.md @@ -0,0 +1,14 @@ +## Release Highlights + +### Updates on Support for External Vocabulary Services + +#### HTTP Headers + +You are now able to add HTTP request headers required by the service you are implementing (#10331) + +#### Flexible params in retrievalUri + +You can now use `managed-fields` field names as well as the `term-uri-field` field name as parameters in the `retrieval-uri` when configuring an external vocabulary service. `{0}` as an alternative to using the `term-uri-field` name is still supported for backward compatibility. +Also you can specify if the value must be url encoded with `encodeUrl:`. (#10404) + +For example : `"retrieval-uri": "https://data.agroportal.lirmm.fr/ontologies/{keywordVocabulary}/classes/{encodeUrl:keywordTermURL}"` \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java index f6a566ae65f..bd40dab5af6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java @@ -4,6 +4,7 @@ import java.io.StringReader; import java.net.URI; import java.net.URISyntaxException; +import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.sql.Timestamp; import java.text.MessageFormat; @@ -349,8 +350,12 @@ public void registerExternalVocabValues(DatasetField df) { logger.fine("Registering for field: " + dft.getName()); JsonObject cvocEntry = getCVocConf(true).get(dft.getId()); if (dft.isPrimitive()) { + List siblingsDatasetFields = new ArrayList<>(); + if(dft.getParentDatasetFieldType()!=null) { + siblingsDatasetFields = df.getParentDatasetFieldCompoundValue().getChildDatasetFields(); + } for (DatasetFieldValue dfv : df.getDatasetFieldValues()) { - registerExternalTerm(cvocEntry, dfv.getValue()); + registerExternalTerm(cvocEntry, dfv.getValue(), siblingsDatasetFields); } } else { if (df.getDatasetFieldType().isCompound()) { @@ -359,7 +364,7 @@ public void registerExternalVocabValues(DatasetField df) { for (DatasetField cdf : cv.getChildDatasetFields()) { logger.fine("Found term uri field type id: " + cdf.getDatasetFieldType().getId()); if (cdf.getDatasetFieldType().equals(termdft)) { - registerExternalTerm(cvocEntry, cdf.getValue()); + registerExternalTerm(cvocEntry, cdf.getValue(), cv.getChildDatasetFields()); } } } @@ -447,15 +452,17 @@ public JsonObject getExternalVocabularyValue(String termUri) { /** * Perform a call to the external service to retrieve information about the term URI - * @param cvocEntry - the configuration for the DatasetFieldType associated with this term - * @param term - the term uri as a string + * + * @param cvocEntry - the configuration for the DatasetFieldType associated with this term + * @param term - the term uri as a string + * @param relatedDatasetFields - siblings or childs of the term */ - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void registerExternalTerm(JsonObject cvocEntry, String term) { + public void registerExternalTerm(JsonObject cvocEntry, String term, List relatedDatasetFields) { String retrievalUri = cvocEntry.getString("retrieval-uri"); + String termUriFieldName = cvocEntry.getString("term-uri-field"); String prefix = cvocEntry.getString("prefix", null); if(term.isBlank()) { - logger.fine("Ingoring blank term"); + logger.fine("Ignoring blank term"); return; } boolean isExternal = false; @@ -486,7 +493,13 @@ public void registerExternalTerm(JsonObject cvocEntry, String term) { } if (evv.getValue() == null) { String adjustedTerm = (prefix==null)? term: term.replace(prefix, ""); - retrievalUri = retrievalUri.replace("{0}", adjustedTerm); + + retrievalUri = replaceRetrievalUriParam(retrievalUri, "0", adjustedTerm); + retrievalUri = replaceRetrievalUriParam(retrievalUri, termUriFieldName, adjustedTerm); + for (DatasetField f : relatedDatasetFields) { + retrievalUri = replaceRetrievalUriParam(retrievalUri, f.getDatasetFieldType().getName(), f.getValue()); + } + logger.fine("Didn't find " + term + ", calling " + retrievalUri); try (CloseableHttpClient httpClient = HttpClients.custom() .addInterceptorLast(new HttpResponseInterceptor() { @@ -546,6 +559,17 @@ public void process(HttpResponse response, HttpContext context) throws HttpExcep } + private String replaceRetrievalUriParam(String retrievalUri, String paramName, String value) { + + if(retrievalUri.contains("encodeUrl:" + paramName)) { + retrievalUri = retrievalUri.replace("{encodeUrl:"+paramName+"}", URLEncoder.encode(value, StandardCharsets.UTF_8)); + } else { + retrievalUri = retrievalUri.replace("{"+paramName+"}", value); + } + + return retrievalUri; + } + /** * Parse the raw value returned by an external service for a give term uri and * filter it according to the 'retrieval-filtering' configuration for this diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index d9cb10026a3..9c7e951254a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -3930,12 +3930,6 @@ public String save() { ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); } dataset = commandEngine.submit(cmd); - for (DatasetField df : dataset.getLatestVersion().getFlatDatasetFields()) { - logger.fine("Found id: " + df.getDatasetFieldType().getId()); - if (fieldService.getCVocConf(true).containsKey(df.getDatasetFieldType().getId())) { - fieldService.registerExternalVocabValues(df); - } - } if (editMode == EditMode.CREATE) { if (session.getUser() instanceof AuthenticatedUser) { userNotificationService.sendNotification((AuthenticatedUser) session.getUser(), dataset.getCreateDate(), UserNotification.Type.CREATEDS, dataset.getLatestVersion().getId()); diff --git a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java index b3b69e25bf3..c8537f2a424 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java +++ b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java @@ -134,6 +134,9 @@ public class EjbDataverseEngine { @EJB DatasetLinkingServiceBean dsLinking; + @EJB + DatasetFieldServiceBean dsField; + @EJB ExplicitGroupServiceBean explicitGroups; @@ -509,7 +512,12 @@ public DataverseLinkingServiceBean dvLinking() { public DatasetLinkingServiceBean dsLinking() { return dsLinking; } - + + @Override + public DatasetFieldServiceBean dsField() { + return dsField; + } + @Override public StorageUseServiceBean storageUse() { return storageUseService; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java index 48e8cd952b4..96330271367 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.engine.command; import edu.harvard.iq.dataverse.DataFileServiceBean; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetLinkingServiceBean; import edu.harvard.iq.dataverse.DatasetServiceBean; import edu.harvard.iq.dataverse.DatasetVersionServiceBean; @@ -146,4 +147,6 @@ public interface CommandContext { public Stack getCommandsCalled(); public void addCommand(Command command); + + public DatasetFieldServiceBean dsField(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java index d8302024c14..ab78a88c9a7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java @@ -1,26 +1,19 @@ package edu.harvard.iq.dataverse.engine.command.impl; -import edu.harvard.iq.dataverse.*; +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.dataaccess.DataAccess; -import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil; -import edu.harvard.iq.dataverse.datacapturemodule.ScriptRequestResponse; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; import edu.harvard.iq.dataverse.pidproviders.PidProvider; -import edu.harvard.iq.dataverse.pidproviders.PidUtil; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import static edu.harvard.iq.dataverse.util.StringUtil.isEmpty; -import java.io.IOException; import java.util.Objects; -import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.solr.client.solrj.SolrServerException; /**; * An abstract base class for commands that creates {@link Dataset}s. @@ -97,6 +90,8 @@ public Dataset execute(CommandContext ctxt) throws CommandException { if(!harvested) { checkSystemMetadataKeyIfNeeded(dsv, null); } + + registerExternalVocabValuesIfAny(ctxt, dsv); theDataset.setCreator((AuthenticatedUser) getRequest().getUser()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java index 85e417ac5f3..1a1f4f9318b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java @@ -2,10 +2,13 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DatasetVersionDifference; import edu.harvard.iq.dataverse.DatasetVersionUser; import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.MetadataBlock; +import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; @@ -24,9 +27,8 @@ import java.util.logging.Logger; import static java.util.stream.Collectors.joining; +import jakarta.ejb.EJB; import jakarta.validation.ConstraintViolation; -import edu.harvard.iq.dataverse.MetadataBlock; -import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.settings.JvmSettings; /** @@ -231,4 +233,13 @@ protected void checkSystemMetadataKeyIfNeeded(DatasetVersion newVersion, Dataset } } } + + protected void registerExternalVocabValuesIfAny(CommandContext ctxt, DatasetVersion newVersion) { + for (DatasetField df : newVersion.getFlatDatasetFields()) { + logger.fine("Found id: " + df.getDatasetFieldType().getId()); + if (ctxt.dsField().getCVocConf(true).containsKey(df.getDatasetFieldType().getId())) { + ctxt.dsField().registerExternalVocabValues(df); + } + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java index bcaece55fed..6539ac27ea2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java @@ -59,7 +59,8 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { //Will throw an IllegalCommandException if a system metadatablock is changed and the appropriate key is not supplied. checkSystemMetadataKeyIfNeeded(newVersion, latest); - + registerExternalVocabValuesIfAny(ctxt, newVersion); + List newVersionMetadatum = new ArrayList<>(latest.getFileMetadatas().size()); for ( FileMetadata fmd : latest.getFileMetadatas() ) { FileMetadata fmdCopy = fmd.createCopy(); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index 7591bebe796..994f4c7dfb6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -1,6 +1,12 @@ package edu.harvard.iq.dataverse.engine.command.impl; -import edu.harvard.iq.dataverse.*; +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DataFileCategory; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetLock; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DatasetVersionDifference; +import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.CommandContext; @@ -8,7 +14,6 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.DatasetFieldUtil; import edu.harvard.iq.dataverse.util.FileMetadataUtil; @@ -115,8 +120,11 @@ public Dataset execute(CommandContext ctxt) throws CommandException { //Will throw an IllegalCommandException if a system metadatablock is changed and the appropriate key is not supplied. checkSystemMetadataKeyIfNeeded(getDataset().getOrCreateEditVersion(fmVarMet), persistedVersion); - - + + getDataset().getOrCreateEditVersion().setLastUpdateTime(getTimestamp()); + + registerExternalVocabValuesIfAny(ctxt, getDataset().getOrCreateEditVersion(fmVarMet)); + try { // Invariant: Dataset has no locks preventing the update String lockInfoMessage = "saving current edits"; @@ -256,7 +264,6 @@ public Dataset execute(CommandContext ctxt) throws CommandException { ctxt.ingest().recalculateDatasetVersionUNF(theDataset.getOrCreateEditVersion()); } - theDataset.getOrCreateEditVersion().setLastUpdateTime(getTimestamp()); theDataset.setModificationTime(getTimestamp()); savedDataset = ctxt.em().merge(theDataset); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java index 637f002f5ad..52491a5a7e1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java @@ -466,7 +466,6 @@ private static void addField(DatasetField dsf, JsonArray valArray, DatasetFieldT if(!datasetFieldSvc.isValidCVocValue(dsft, strValue)) { throw new BadRequestException("Invalid values submitted for " + dsft.getName() + " which is limited to specific vocabularies."); } - datasetFieldSvc.registerExternalTerm(cvocMap.get(dsft.getId()), strValue); } DatasetFieldValue datasetFieldValue = new DatasetFieldValue(); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index a0bd2fff295..addccc93fe0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -847,7 +847,6 @@ public void parsePrimitiveValue(DatasetField dsf, DatasetFieldType dft , JsonObj if(!datasetFieldSvc.isValidCVocValue(dft, datasetFieldValue.getValue())) { throw new JsonParseException("Invalid values submitted for " + dft.getName() + " which is limited to specific vocabularies."); } - datasetFieldSvc.registerExternalTerm(cvocMap.get(dft.getId()), datasetFieldValue.getValue()); } vals.add(datasetFieldValue); } @@ -864,7 +863,6 @@ public void parsePrimitiveValue(DatasetField dsf, DatasetFieldType dft , JsonObj if(!datasetFieldSvc.isValidCVocValue(dft, datasetFieldValue.getValue())) { throw new JsonParseException("Invalid values submitted for " + dft.getName() + " which is limited to specific vocabularies."); } - datasetFieldSvc.registerExternalTerm(cvocMap.get(dft.getId()), datasetFieldValue.getValue()); } vals.add(datasetFieldValue); } diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java b/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java index fa89bb756f5..f2c03adea20 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java @@ -150,6 +150,11 @@ public DatasetLinkingServiceBean dsLinking() { return null; } + @Override + public DatasetFieldServiceBean dsField() { + return null; + } + @Override public AuthenticationServiceBean authentication() { return null; From 739d0d268e81797dcfce4407e5851d2a88660eed Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 23 May 2024 09:26:37 -0400 Subject: [PATCH 79/81] update running/prod page to explain steps needed for containers --- .../source/container/running/production.rst | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/container/running/production.rst b/doc/sphinx-guides/source/container/running/production.rst index 0a628dc57b9..4994248a2e0 100644 --- a/doc/sphinx-guides/source/container/running/production.rst +++ b/doc/sphinx-guides/source/container/running/production.rst @@ -7,7 +7,32 @@ Production (Future) Status ------ -The images described in this guide are not yet recommended for production usage. +The images described in this guide are not yet recommended for production usage, but we think we are close. We'd like to make the following improvements: + +- Tagged releases + + - Currently, you have the choice between "alpha" images that change under your feet every time a new version of Dataverse is released or "unstable" images that track the "develop" branch, which is updated frequently. Instead, we'd like to offer images like 6.4, 6.5, etc. We are tracking this work at https://github.com/IQSS/dataverse/issues/10478 and there is some preliminary code at https://github.com/IQSS/dataverse/tree/10478-version-base-img . You are welcome to join the following discussions: + + - https://dataverse.zulipchat.com/#narrow/stream/375812-containers/topic/change.20version.20scheme.20base.20image.3F/near/405636949 + - https://dataverse.zulipchat.com/#narrow/stream/375812-containers/topic/tagging.20images.20with.20versions/near/366600747 + +- More docs on setting up additional features + + - How to set up previewers. See https://github.com/IQSS/dataverse/issues/10506 + - How to set up Rserve. + +- Go through all the features in docs and check what needs to be done diffeaarently with containers + + - Check ports, for example. + +To join the discussion on what else might be needed before declaring images ready for production, please comment on https://dataverse.zulipchat.com/#narrow/stream/375812-containers/topic/containers.20for.20production/near/434979159 + +You are also very welcome to join our meetings. See "how to help" below. + +Limitations +----------- + +- Multiple apps servers are not supported. See :ref:`multiple-app-servers` for more on this topic. How to Help ----------- From f1c7ef645ba7bf467d7b333e5215d3b78b227d71 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 23 May 2024 09:37:44 -0400 Subject: [PATCH 80/81] fix typo --- doc/sphinx-guides/source/container/running/production.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/container/running/production.rst b/doc/sphinx-guides/source/container/running/production.rst index 4994248a2e0..3294db8ec1b 100644 --- a/doc/sphinx-guides/source/container/running/production.rst +++ b/doc/sphinx-guides/source/container/running/production.rst @@ -21,7 +21,7 @@ The images described in this guide are not yet recommended for production usage, - How to set up previewers. See https://github.com/IQSS/dataverse/issues/10506 - How to set up Rserve. -- Go through all the features in docs and check what needs to be done diffeaarently with containers +- Go through all the features in docs and check what needs to be done differently with containers - Check ports, for example. From 9c64eba97940c8500dba4affec75830938658056 Mon Sep 17 00:00:00 2001 From: Ben Companjen Date: Thu, 23 May 2024 19:54:26 +0200 Subject: [PATCH 81/81] Read zip file in try-with-resources (#10079) * Read zip file in try-with-resources * Prevent zip slip vulnerability Sonarcloud warns strongly against blindly trusting paths in a zip file. This first checks that the resolved path for an entry is in the directory that we want it to be, before trying to write the file. If a path is outside `dataverseLangDirectory + "/"`, return HTTP 400 --- .../dataverse/api/DatasetFieldServiceApi.java | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java b/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java index 00b7dfa6e36..01c51dc2b4c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java @@ -24,7 +24,6 @@ import jakarta.ejb.EJBException; import jakarta.json.Json; import jakarta.json.JsonArrayBuilder; -import jakarta.validation.ConstraintViolation; import jakarta.validation.ConstraintViolationException; import jakarta.ws.rs.Consumes; import jakarta.ws.rs.GET; @@ -488,9 +487,7 @@ private String parseControlledVocabulary(String[] values) { @Consumes("application/zip") @Path("loadpropertyfiles") public Response loadLanguagePropertyFile(File inputFile) { - try - { - ZipFile file = new ZipFile(inputFile); + try (ZipFile file = new ZipFile(inputFile)) { //Get file entries Enumeration entries = file.entries(); @@ -502,20 +499,26 @@ public Response loadLanguagePropertyFile(File inputFile) { { ZipEntry entry = entries.nextElement(); String dataverseLangFileName = dataverseLangDirectory + "/" + entry.getName(); - FileOutputStream fileOutput = new FileOutputStream(dataverseLangFileName); + File entryFile = new File(dataverseLangFileName); + String canonicalPath = entryFile.getCanonicalPath(); + if (canonicalPath.startsWith(dataverseLangDirectory + "/")) { + try (FileOutputStream fileOutput = new FileOutputStream(dataverseLangFileName)) { - InputStream is = file.getInputStream(entry); - BufferedInputStream bis = new BufferedInputStream(is); + InputStream is = file.getInputStream(entry); + BufferedInputStream bis = new BufferedInputStream(is); - while (bis.available() > 0) { - fileOutput.write(bis.read()); + while (bis.available() > 0) { + fileOutput.write(bis.read()); + } + } + } else { + logger.log(Level.SEVERE, "Zip Slip prevented: uploaded zip file tried to write to {}", canonicalPath); + return Response.status(400).entity("The zip file includes an illegal file path").build(); } - fileOutput.close(); } } - catch(IOException e) - { - e.printStackTrace(); + catch(IOException e) { + logger.log(Level.SEVERE, "Reading the language property zip file failed", e); return Response.status(500).entity("Internal server error. More details available at the server logs.").build(); }