Skip to content

Commit

Permalink
Merge pull request IQSS#11246 from QualitativeDataRepository/IQSS/112…
Browse files Browse the repository at this point in the history
…42-ExtVocabFix

IQSS/11242 ExternalIdentifier fix
  • Loading branch information
ofahimIQSS authored Mar 5, 2025
2 parents f5f2c72 + d6731a6 commit 99019de
Show file tree
Hide file tree
Showing 9 changed files with 162 additions and 109 deletions.
3 changes: 3 additions & 0 deletions doc/release-notes/11242-fix-oricid-recognition.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
A bug that caused ORCIDs start with https://orcid.org/ entered as author identifier to be ignored when creating the DataCite metadata has been fixed. This primarily affected users of the ORCID external vocabulary script as the manual entry form recommends not using the URL form.

The display of authorIdentifier, when not using any external vocabulary scripts, has been improved so that either the plain identifier (e.g. "0000-0002-1825-0097") or its URL form (e.g. "https://orcid.org/0000-0002-1825-0097") will result in valid links in the display (for identifier types that have a URL form). The URL form is now recommended when doing manual entry.
2 changes: 1 addition & 1 deletion doc/sphinx-guides/source/user/dataset-management.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ Adding a New Dataset
#. Click on the "Add Data" button and select "New Dataset" in the dropdown menu. **Note:** If you are on the root Dataverse collection, your My Data page or click the "Add Data" link in the navbar, the dataset you create will be hosted in the root Dataverse collection. You can change this by selecting another Dataverse collection you have proper permissions to create datasets in, from the Host Dataverse collection dropdown in the create dataset form. This option to choose will not be available after you create the dataset.
#. To quickly get started, enter at minimum all the required fields with an asterisk (e.g., the Dataset Title, Author Name, Description Text, Point of Contact Email, and Subject) to get a Data Citation with a DOI.

#. When entering author identifiers, select the type from the dropdown (e.g. "ORCID") and under "Identifier" enter just the unique identifier (e.g. "0000-0002-1825-0097") rather than the full URL (e.g. "https://orcid.org/0000-0002-1825-0097").
#. When entering author identifiers, select the type from the dropdown (e.g. "ORCID") and under "Identifier" enter the full URL (e.g. "https://orcid.org/0000-0002-1825-0097") for identifiers that have a URL form. The shorter form of the unique identifier (e.g. "0000-0002-1825-0097") can also be entered, but URL form is preferred when available.

#. Scroll down to the "Files" section and click on "Select Files to Add" to add all the relevant files to your Dataset.
You can also upload your files directly from your Dropbox. **Tip:** You can drag and drop or select multiple files at a time from your desktop
Expand Down
9 changes: 7 additions & 2 deletions src/main/java/edu/harvard/iq/dataverse/DatasetAuthor.java
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,13 @@ public static String getIdentifierAsUrl(String idType, String idValue) {
if (idType != null && !idType.isEmpty() && idValue != null && !idValue.isEmpty()) {
try {
ExternalIdentifier externalIdentifier = ExternalIdentifier.valueOf(idType);
if (externalIdentifier.isValidIdentifier(idValue))
return externalIdentifier.format(idValue);
if (externalIdentifier.isValidIdentifier(idValue)) {
String uri = externalIdentifier.format(idValue);
//The DAI identifier is a URI starting with "info" - we don't want to return it as a URL (we assume non-null URLs should be links in the display)
if(uri.startsWith("http")) {
return uri;
}
}
} catch (Exception e) {
// non registered identifier
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,6 @@ private boolean isValidDate(String dateString, String pattern) {
return valid;
}

public boolean isValidAuthorIdentifier(String userInput, Pattern pattern) {
return pattern.matcher(userInput).matches();
}

// Validate child fields against each other and return failure message or Optional.empty() if success
public Optional<String> validateChildConstraints(DatasetField dsf) {
final String fieldName = dsf.getDatasetFieldType().getName() != null ? dsf.getDatasetFieldType().getName() : "";
Expand Down
22 changes: 12 additions & 10 deletions src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,18 @@
import java.util.regex.Pattern;

public enum ExternalIdentifier {
ORCID("ORCID", "https://orcid.org/%s", "^\\d{4}-\\d{4}-\\d{4}-(\\d{4}|\\d{3}X)$"),
ISNI("ISNI", "http://www.isni.org/isni/%s", "^\\d*$"),
LCNA("LCNA", "http://id.loc.gov/authorities/names/%s", "^[a-z]+\\d+$"),
VIAF("VIAF", "https://viaf.org/viaf/%s", "^\\d*$"),
ORCID("ORCID", "https://orcid.org/%s", "^(https:\\/\\/orcid\\.org\\/)?\\d{4}-\\d{4}-\\d{4}-(\\d{4}|\\d{3}X)$"),
ISNI("ISNI", "http://www.isni.org/isni/%s", "^(http:\\/\\/www\\.isni\\.org\\/isni\\/)?(\\d{16}|\\d{15}X)$"),
LCNA("LCNA", "http://id.loc.gov/authorities/names/%s", "^(http:\\/\\/id\\.loc\\.gov\\/authorities\\/names\\/)?[a-z]+\\d+$"),
VIAF("VIAF", "https://viaf.org/viaf/%s", "^(https:\\/\\/viaf\\.org\\/viaf\\/)?\\d*$"),
// GND regex from https://www.wikidata.org/wiki/Property:P227
GND("GND", "https://d-nb.info/gnd/%s", "^1[01]?\\d{7}[0-9X]|[47]\\d{6}-\\d|[1-9]\\d{0,7}-[0-9X]|3\\d{7}[0-9X]$"),
GND("GND", "https://d-nb.info/gnd/%s", "^(https:\\/\\/d-nb\\.info\\/gnd\\/)?(1[01]?\\d{7}[0-9X]|[47]\\d{6}-\\d|[1-9]\\d{0,7}-[0-9X]|3\\d{7}[0-9X])$"),
// note: DAI is missing from this list, because it doesn't have resolvable URL
ResearcherID("ResearcherID", "https://publons.com/researcher/%s/", "^[A-Z\\d][A-Z\\d-]+[A-Z\\d]$"),
ScopusID("ScopusID", "https://www.scopus.com/authid/detail.uri?authorId=%s", "^\\d*$"),
ResearcherID("ResearcherID", "https://publons.com/researcher/%s/", "^([A-Z\\d][A-Z\\d-]+[A-Z\\d]|(https:\\/\\/publons\\.com\\/researcher\\/)?[A-Z\\d][A-Z\\d-]+[A-Z\\d]\\/)$"),
ScopusID("ScopusID", "https://www.scopus.com/authid/detail.uri?authorId=%s", "^(https:\\/\\/www\\.scopus\\.com\\/authid\\/detail\\.uri\\?authorId=)?\\d*$"),
// ROR regex from https://ror.readme.io/docs/identifier
ROR("ROR", "https://ror.org/%s", "^0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$"),
// In some contexts, we check for the full ROR URL.
ROR_FULL_URL("ROR", "https://ror.org/%s", "^(https:\\/\\/ror.org\\/)0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$");
ROR("ROR", "https://ror.org/%s", "^(https:\\/\\/ror\\.org\\/)?0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$"),
DAI("DAI", "info:eu-repo/dai/nl/%s", "^(info:eu-repo\\/dai\\/nl\\/)?[\\d]?\\d{8}[0-9X]$");

private String name;
private String template;
Expand Down Expand Up @@ -57,6 +56,9 @@ public Pattern getPattern() {
}

public String format(String idValue) {
if(idValue.startsWith(template.substring(0,template.indexOf("%s")))) {
return idValue;
}
return String.format(template, idValue);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,7 @@ private void writeEntityElements(XMLStreamWriter xmlw, String elementName, Strin
attributeMap.clear();
boolean isROR=false;
String orgName = affiliation;
ExternalIdentifier externalIdentifier = ExternalIdentifier.ROR_FULL_URL;
ExternalIdentifier externalIdentifier = ExternalIdentifier.ROR;
if (externalIdentifier.isValidIdentifier(orgName)) {
isROR = true;
JsonObject jo = getExternalVocabularyValue(orgName);
Expand Down Expand Up @@ -1540,7 +1540,7 @@ private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) thr
fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten);
boolean isROR=false;
String funderIdentifier = null;
ExternalIdentifier externalIdentifier = ExternalIdentifier.ROR_FULL_URL;
ExternalIdentifier externalIdentifier = ExternalIdentifier.ROR;
if (externalIdentifier.isValidIdentifier(funder)) {
isROR = true;
JsonObject jo = getExternalVocabularyValue(funder);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,68 +111,6 @@ public void testIsValid() {
assertFalse(result);
}

@Test
public void testIsValidAuthorIdentifierOrcid() {
DatasetFieldValueValidator validator = new DatasetFieldValueValidator();
Pattern pattern = ExternalIdentifier.valueOf("ORCID").getPattern();
assertTrue(validator.isValidAuthorIdentifier("0000-0002-1825-0097", pattern));
// An "X" at the end of an ORCID is less common but still valid.
assertTrue(validator.isValidAuthorIdentifier("0000-0002-1694-233X", pattern));
assertFalse(validator.isValidAuthorIdentifier("0000 0002 1825 0097", pattern));
assertFalse(validator.isValidAuthorIdentifier(" 0000-0002-1825-0097", pattern));
assertFalse(validator.isValidAuthorIdentifier("0000-0002-1825-0097 ", pattern));
assertFalse(validator.isValidAuthorIdentifier("junk", pattern));
}

@Test
public void testIsValidAuthorIdentifierIsni() {
DatasetFieldValueValidator validator = new DatasetFieldValueValidator();
Pattern pattern = ExternalIdentifier.valueOf("ISNI").getPattern();
assertTrue(validator.isValidAuthorIdentifier("0000000121032683", pattern));
assertFalse(validator.isValidAuthorIdentifier("junk", pattern));
}

@Test
public void testIsValidAuthorIdentifierLcna() {
DatasetFieldValueValidator validator = new DatasetFieldValueValidator();
Pattern pattern = ExternalIdentifier.valueOf("LCNA").getPattern();
assertTrue(validator.isValidAuthorIdentifier("n82058243", pattern));
assertTrue(validator.isValidAuthorIdentifier("foobar123", pattern));
assertFalse(validator.isValidAuthorIdentifier("junk", pattern));
}

@Test
public void testIsValidAuthorIdentifierViaf() {
DatasetFieldValueValidator validator = new DatasetFieldValueValidator();
Pattern pattern = ExternalIdentifier.valueOf("VIAF").getPattern();
assertTrue(validator.isValidAuthorIdentifier("172389567", pattern));
assertFalse(validator.isValidAuthorIdentifier("junk", pattern));
}

@Test
public void testIsValidAuthorIdentifierGnd() {
DatasetFieldValueValidator validator = new DatasetFieldValueValidator();
Pattern pattern = ExternalIdentifier.valueOf("GND").getPattern();
assertTrue(validator.isValidAuthorIdentifier("4079154-3", pattern));
assertFalse(validator.isValidAuthorIdentifier("junk", pattern));
}

@Test
public void testIsValidAuthorIdentifierRor() {
DatasetFieldValueValidator validator = new DatasetFieldValueValidator();
Pattern pattern = ExternalIdentifier.valueOf("ROR").getPattern();
assertTrue(validator.isValidAuthorIdentifier("03vek6s52", pattern));
assertFalse(validator.isValidAuthorIdentifier("junk", pattern));
}

@Test
public void testIsValidAuthorIdentifierRorFull() {
DatasetFieldValueValidator validator = new DatasetFieldValueValidator();
Pattern pattern = ExternalIdentifier.valueOf("ROR_FULL_URL").getPattern();
assertTrue(validator.isValidAuthorIdentifier("https://ror.org/03vek6s52", pattern));
assertFalse(validator.isValidAuthorIdentifier("junk", pattern));
}

final Validator validator = Validation.buildDefaultValidatorFactory().getValidator();

@ParameterizedTest
Expand Down
Loading

0 comments on commit 99019de

Please sign in to comment.