Skip to content

Commit

Permalink
Merge pull request #9102 from GlobalDataverseCommunityConsortium/IQSS…
Browse files Browse the repository at this point in the history
…/9100_OpenAire_update_for_Orgs

IQSS/9100 OpenAire update for orgs
  • Loading branch information
kcondon authored May 5, 2023
2 parents 58dac8d + 02e683c commit a82cd90
Show file tree
Hide file tree
Showing 12 changed files with 87 additions and 186 deletions.
3 changes: 3 additions & 0 deletions doc/release-notes/9100-schema.org-updates.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Changes made in v5.13 and v5.14 in multiple PRs to improve the embedded Schema.org metadata in dataset pages will only be propagated to the Schema.Org JSON-LD metadata export if a reExportAll() is done.

The 5.14 release notes should include the standard instructions for doing a reExportAll after updating the code.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO;
import edu.harvard.iq.dataverse.api.dto.FieldDTO;
import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO;
import edu.harvard.iq.dataverse.util.PersonOrOrgUtil;
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
import edu.harvard.iq.dataverse.util.json.JsonUtil;
import java.util.regex.Matcher;
Expand Down Expand Up @@ -252,72 +253,26 @@ public static void writeCreatorsElement(XMLStreamWriter xmlw, DatasetVersionDTO
if (StringUtils.isNotBlank(creatorName)) {
creator_check = writeOpenTag(xmlw, "creators", creator_check);
xmlw.writeStartElement("creator"); // <creator>

boolean nameType_check = false;

Map<String, String> creator_map = new HashMap<String, String>();
if ((StringUtils.containsIgnoreCase(nameIdentifierScheme, "orcid"))) {
JsonObject creatorObj = PersonOrOrgUtil.getPersonOrOrganization(creatorName, false,
StringUtils.containsIgnoreCase(nameIdentifierScheme, "orcid"));

// creatorName=<FamilyName>, <FirstName>
if (creatorObj.getBoolean("isPerson")) {
creator_map.put("nameType", "Personal");
nameType_check = true;
}
// ToDo - the algorithm to determine if this is a Person or Organization here
// has been abstracted into a separate
// edu.harvard.iq.dataverse.util.PersonOrOrgUtil class that could be used here
// to avoid duplication/variants of the algorithm
creatorName = Cleanup.normalize(creatorName);
// Datacite algorithm, https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313
if (creatorName.contains(",")) {
String givenName = FirstNames.getInstance().getFirstName(creatorName);
boolean isOrganization = Organizations.getInstance().isOrganization(creatorName);

// creatorName=<FamilyName>, <FirstName>
if (givenName != null && !isOrganization) {
// givenName ok
creator_map.put("nameType", "Personal");
nameType_check = true;
} else if (isOrganization) {
creator_map.put("nameType", "Organizational");
nameType_check = false;
}
writeFullElement(xmlw, null, "creatorName", creator_map, creatorName, language);

if ((nameType_check) && (!creatorName.replaceFirst(",", "").contains(","))) {
// creatorName=<FamilyName>, <FirstName>
String[] fullName = creatorName.split(", ");
if (fullName.length == 2) {
givenName = fullName[1];
String familyName = fullName[0];

writeFullElement(xmlw, null, "givenName", null, givenName, language);
writeFullElement(xmlw, null, "familyName", null, familyName, language);
} else {
// It's possible to get here if "Smith," is entered as an author name.
logger.info("Unable to write givenName and familyName based on creatorName '" + creatorName + "'.");
}
}
} else {
String givenName = FirstNames.getInstance().getFirstName(creatorName);
boolean isOrganization = Organizations.getInstance().isOrganization(creatorName);

if (givenName != null && !isOrganization) {
// givenName ok, creatorName=<FirstName> <FamilyName>
creator_map.put("nameType", "Personal");
nameType_check = true;
writeFullElement(xmlw, null, "creatorName", creator_map, creatorName, language);

String familyName = "";
if (givenName.length() + 1 < creatorName.length()) {
familyName = creatorName.substring(givenName.length() + 1);
}

writeFullElement(xmlw, null, "givenName", null, givenName, language);
writeFullElement(xmlw, null, "familyName", null, familyName, language);
} else {
// default
if (isOrganization) {
creator_map.put("nameType", "Organizational");
}
writeFullElement(xmlw, null, "creatorName", creator_map, creatorName, language);
}
creator_map.put("nameType", "Organizational");
}
writeFullElement(xmlw, null, "creatorName", creator_map,
creatorObj.getString("fullName"), language);
if (creatorObj.containsKey("givenName")) {
writeFullElement(xmlw, null, "givenName", null, creatorObj.getString("givenName"),
language);
}
if (creatorObj.containsKey("familyName")) {
writeFullElement(xmlw, null, "familyName", null, creatorObj.getString("familyName"),
language);
}

if (StringUtils.isNotBlank(nameIdentifier)) {
Expand Down Expand Up @@ -712,61 +667,23 @@ public static void writeContributorElement(XMLStreamWriter xmlw, String contribu
boolean nameType_check = false;
Map<String, String> contributor_map = new HashMap<String, String>();

// ToDo - the algorithm to determine if this is a Person or Organization here
// has been abstracted into a separate
// edu.harvard.iq.dataverse.util.PersonOrOrgUtil class that could be used here
// to avoid duplication/variants of the algorithm
JsonObject contributorObj = PersonOrOrgUtil.getPersonOrOrganization(contributorName,
("ContactPerson".equals(contributorType) && !isValidEmailAddress(contributorName)), false);

contributorName = Cleanup.normalize(contributorName);
// Datacite algorithm, https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313
if (contributorName.contains(",")) {
String givenName = FirstNames.getInstance().getFirstName(contributorName);
boolean isOrganization = Organizations.getInstance().isOrganization(contributorName);

// contributorName=<FamilyName>, <FirstName>
if (givenName != null && !isOrganization) {
// givenName ok
if (contributorObj.getBoolean("isPerson")) {
if(contributorObj.containsKey("givenName")) {
contributor_map.put("nameType", "Personal");
nameType_check = true;
// re: the above toDo - the ("ContactPerson".equals(contributorType) &&
// !isValidEmailAddress(contributorName)) clause in the next line could/should
// be sent as the OrgIfTied boolean parameter
} else if (isOrganization || ("ContactPerson".equals(contributorType) && !isValidEmailAddress(contributorName))) {
contributor_map.put("nameType", "Organizational");
}
writeFullElement(xmlw, null, "contributorName", contributor_map, contributorName, language);

if ((nameType_check) && (!contributorName.replaceFirst(",", "").contains(","))) {
// contributorName=<FamilyName>, <FirstName>
String[] fullName = contributorName.split(", ");
givenName = fullName[1];
String familyName = fullName[0];

writeFullElement(xmlw, null, "givenName", null, givenName, language);
writeFullElement(xmlw, null, "familyName", null, familyName, language);
}
} else {
String givenName = FirstNames.getInstance().getFirstName(contributorName);
boolean isOrganization = Organizations.getInstance().isOrganization(contributorName);

if (givenName != null && !isOrganization) {
contributor_map.put("nameType", "Personal");
writeFullElement(xmlw, null, "contributorName", contributor_map, contributorName, language);

String familyName = "";
if (givenName.length() + 1 < contributorName.length()) {
familyName = contributorName.substring(givenName.length() + 1);
}
contributor_map.put("nameType", "Organizational");
}
writeFullElement(xmlw, null, "contributorName", contributor_map, contributorName, language);

writeFullElement(xmlw, null, "givenName", null, givenName, language);
writeFullElement(xmlw, null, "familyName", null, familyName, language);
} else {
// default
if (isOrganization || ("ContactPerson".equals(contributorType) && !isValidEmailAddress(contributorName))) {
contributor_map.put("nameType", "Organizational");
}
writeFullElement(xmlw, null, "contributorName", contributor_map, contributorName, language);
}
if (contributorObj.containsKey("givenName")) {
writeFullElement(xmlw, null, "givenName", null, contributorObj.getString("givenName"), language);
}
if (contributorObj.containsKey("familyName")) {
writeFullElement(xmlw, null, "familyName", null, contributorObj.getString("familyName"), language);
}

if (StringUtils.isNotBlank(contributorAffiliation)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package edu.harvard.iq.dataverse.export.openaire;
package edu.harvard.iq.dataverse.util;

import java.io.BufferedReader;
import java.io.IOException;
Expand All @@ -9,10 +9,10 @@
import java.util.logging.Level;

/**
*
* Used by PersonOrOrgUtil
* @author [email protected]
*/
public class FirstNames {
class FirstNames {

private static FirstNames instance = null;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package edu.harvard.iq.dataverse.export.openaire;
package edu.harvard.iq.dataverse.util;

import edu.harvard.iq.dataverse.util.StringUtil;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
Expand All @@ -15,10 +14,10 @@
import opennlp.tools.util.Span;

/**
*
* Used by PersonOrOrgUtil
* @author [email protected]
*/
public class Organizations {
class Organizations {

private static Organizations instance = null;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@
import javax.json.JsonObjectBuilder;
import javax.json.JsonString;

import edu.harvard.iq.dataverse.export.openaire.Cleanup;
import edu.harvard.iq.dataverse.export.openaire.FirstNames;
import edu.harvard.iq.dataverse.export.openaire.Organizations;
import edu.harvard.iq.dataverse.util.json.JsonUtil;
import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder;

Expand Down Expand Up @@ -69,7 +66,7 @@ public class PersonOrOrgUtil {
* @return
*/
public static JsonObject getPersonOrOrganization(String name, boolean organizationIfTied, boolean isPerson) {
name = Cleanup.normalize(name);
name = StringUtil.normalize(name);

String givenName = null;
String familyName = null;
Expand Down
21 changes: 21 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/util/StringUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import javax.crypto.IllegalBlockSizeException;
import javax.crypto.NoSuchPaddingException;
import javax.crypto.spec.SecretKeySpec;

import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;

/**
Expand Down Expand Up @@ -198,4 +200,23 @@ private static SecretKeySpec generateKeyFromString(final String secKey) throws U
SecretKeySpec secretKeySpec = new SecretKeySpec(key, "AES");
return secretKeySpec;
}

/**
* Normalize sentence
*
* @author [email protected]
*
*
* @param sentence full name or organization name
* @return normalize string value
*/
static public String normalize(String sentence) {
if (StringUtils.isBlank(sentence)) {
return "";
}

sentence = sentence.trim().replaceAll(", *", ", ").replaceAll(" +", " ");

return sentence;
}
}
29 changes: 0 additions & 29 deletions src/test/java/edu/harvard/iq/dataverse/export/CleanupTest.java

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package edu.harvard.iq.dataverse.export;
package edu.harvard.iq.dataverse.util;

import edu.harvard.iq.dataverse.export.openaire.FirstNames;
import org.junit.Test;

import edu.harvard.iq.dataverse.util.FirstNames;

import static org.junit.Assert.*;

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package edu.harvard.iq.dataverse.export;
package edu.harvard.iq.dataverse.util;

import edu.harvard.iq.dataverse.export.openaire.Organizations;
import org.junit.Test;

import edu.harvard.iq.dataverse.util.Organizations;

import static org.junit.Assert.*;

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package edu.harvard.iq.dataverse.util;

import edu.harvard.iq.dataverse.export.openaire.Organizations;
import edu.harvard.iq.dataverse.util.json.JsonUtil;

import org.junit.Ignore;
Expand Down
18 changes: 18 additions & 0 deletions src/test/java/edu/harvard/iq/dataverse/util/StringUtilTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -354,5 +354,23 @@ public void testNonEmpty_emptyString() {
String expected = "";
assertFalse(StringUtil.nonEmpty(expected));
}

/**
* full name or organization name cleanup.
*
* @author [email protected]
*
* Name is composed of: <First Names> <Family Name>
*/
@Test
public void testNormalize() {
assertEquals(StringUtil.normalize(" Francesco "), "Francesco");
assertEquals(StringUtil.normalize("Francesco Cadili "), "Francesco Cadili");
assertEquals(StringUtil.normalize(" Cadili,Francesco"), "Cadili, Francesco");
assertEquals(StringUtil.normalize("Cadili, Francesco "), "Cadili, Francesco");
assertEquals(StringUtil.normalize(null), "");

// TODO: organization examples...
}
}
}

0 comments on commit a82cd90

Please sign in to comment.