Skip to content

Commit

Permalink
Merge pull request #10016 from ErykKul/10015_ro_crate_mime_type
Browse files Browse the repository at this point in the history
RO-Crate metadata mime type detection
  • Loading branch information
sekmiller authored May 7, 2024
2 parents 28a9d44 + 0252cdb commit 71324d8
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 25 deletions.
10 changes: 10 additions & 0 deletions doc/release-notes/10015-RO-Crate-metadata-file.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Detection of mime-types based on a filename with extension and detection of the RO-Crate metadata files.

From now on, filenames with extensions can be added into `MimeTypeDetectionByFileName.properties` file. Filenames added there will take precedence over simply recognizing files by extensions. For example, two new filenames are added into that file:
```
ro-crate-metadata.json=application/ld+json; profile="http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate"
ro-crate-metadata.jsonld=application/ld+json; profile="http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate"
```

Therefore, files named `ro-crate-metadata.json` will be then detected as RO-Crated metadata files from now on, instead as generic `JSON` files.
For more information on the RO-Crate specifications, see https://www.researchobject.org/ro-crate
74 changes: 49 additions & 25 deletions src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Optional;
import java.util.ResourceBundle;
import java.util.UUID;
import java.util.logging.Level;
import java.util.logging.Logger;
Expand Down Expand Up @@ -176,6 +177,7 @@ public class FileUtil implements java.io.Serializable {
public static final String MIME_TYPE_NETCDF = "application/netcdf";
public static final String MIME_TYPE_XNETCDF = "application/x-netcdf";
public static final String MIME_TYPE_HDF5 = "application/x-hdf5";
public static final String MIME_TYPE_RO_CRATE = "application/ld+json; profile=\"http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate\"";

// File type "thumbnail classes" tags:

Expand Down Expand Up @@ -272,6 +274,11 @@ public static String getUserFriendlyFileType(DataFile dataFile) {
if (fileType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)){
return ShapefileHandler.SHAPEFILE_FILE_TYPE_FRIENDLY_NAME;
}
try {
return BundleUtil.getStringFromPropertyFile(fileType,"MimeTypeDisplay" );
} catch (MissingResourceException e) {
//NOOP: we will try again after trimming ";"
}
if (fileType.contains(";")) {
fileType = fileType.substring(0, fileType.indexOf(";"));
}
Expand All @@ -286,6 +293,11 @@ public static String getUserFriendlyFileType(DataFile dataFile) {
}

public static String getIndexableFacetFileType(DataFile dataFile) {
try {
return BundleUtil.getStringFromDefaultPropertyFile(dataFile.getContentType(),"MimeTypeFacets" );
} catch (MissingResourceException e) {
//NOOP: we will try again after trimming ";"
}
String fileType = getFileType(dataFile);
try {
return BundleUtil.getStringFromDefaultPropertyFile(fileType,"MimeTypeFacets" );
Expand Down Expand Up @@ -415,7 +427,10 @@ public static String retestIngestableFileType(File file, String fileType) {
}

public static String determineFileType(File f, String fileName) throws IOException{
String fileType = null;
String fileType = lookupFileTypeByFileName(fileName);
if (fileType != null) {
return fileType;
}
String fileExtension = getFileExtension(fileName);


Expand Down Expand Up @@ -474,17 +489,17 @@ public static String determineFileType(File f, String fileName) throws IOExcepti
if (fileType != null && fileType.startsWith("text/plain") && STATISTICAL_FILE_EXTENSION.containsKey(fileExtension)) {
fileType = STATISTICAL_FILE_EXTENSION.get(fileExtension);
} else {
fileType = determineFileTypeByNameAndExtension(fileName);
fileType = lookupFileTypeByExtension(fileName);
}

logger.fine("mime type recognized by extension: "+fileType);
}
} else {
logger.fine("fileExtension is null");
String fileTypeByName = lookupFileTypeFromPropertiesFile(fileName);
if(!StringUtil.isEmpty(fileTypeByName)) {
logger.fine(String.format("mime type: %s recognized by filename: %s", fileTypeByName, fileName));
fileType = fileTypeByName;
final String fileTypeByExtension = lookupFileTypeByExtensionFromPropertiesFile(fileName);
if(!StringUtil.isEmpty(fileTypeByExtension)) {
logger.fine(String.format("mime type: %s recognized by extension: %s", fileTypeByExtension, fileName));
fileType = fileTypeByExtension;
}
}

Expand Down Expand Up @@ -529,33 +544,41 @@ public static String determineFileType(File f, String fileName) throws IOExcepti
return fileType;
}

public static String determineFileTypeByNameAndExtension(String fileName) {
String mimetypesFileTypeMapResult = MIME_TYPE_MAP.getContentType(fileName);
public static String determineFileTypeByNameAndExtension(final String fileName) {
final String fileType = lookupFileTypeByFileName(fileName);
if (fileType != null) {
return fileType;
}
return lookupFileTypeByExtension(fileName);
}

private static String lookupFileTypeByExtension(final String fileName) {
final String mimetypesFileTypeMapResult = MIME_TYPE_MAP.getContentType(fileName);
logger.fine("MimetypesFileTypeMap type by extension, for " + fileName + ": " + mimetypesFileTypeMapResult);
if (mimetypesFileTypeMapResult != null) {
if ("application/octet-stream".equals(mimetypesFileTypeMapResult)) {
return lookupFileTypeFromPropertiesFile(fileName);
} else {
return mimetypesFileTypeMapResult;
}
} else {
if (mimetypesFileTypeMapResult == null) {
return null;
}
if ("application/octet-stream".equals(mimetypesFileTypeMapResult)) {
return lookupFileTypeByExtensionFromPropertiesFile(fileName);
}
return mimetypesFileTypeMapResult;
}

public static String lookupFileTypeFromPropertiesFile(String fileName) {
String fileKey = FilenameUtils.getExtension(fileName);
String propertyFileName = "MimeTypeDetectionByFileExtension";
if(fileKey == null || fileKey.isEmpty()) {
fileKey = fileName;
propertyFileName = "MimeTypeDetectionByFileName";
private static String lookupFileTypeByFileName(final String fileName) {
return lookupFileTypeFromPropertiesFile("MimeTypeDetectionByFileName", fileName);
}

}
String propertyFileNameOnDisk = propertyFileName + ".properties";
private static String lookupFileTypeByExtensionFromPropertiesFile(final String fileName) {
final String fileKey = FilenameUtils.getExtension(fileName);
return lookupFileTypeFromPropertiesFile("MimeTypeDetectionByFileExtension", fileKey);
}

private static String lookupFileTypeFromPropertiesFile(final String propertyFileName, final String fileKey) {
final String propertyFileNameOnDisk = propertyFileName + ".properties";
try {
logger.fine("checking " + propertyFileNameOnDisk + " for file key " + fileKey);
return BundleUtil.getStringFromPropertyFile(fileKey, propertyFileName);
} catch (MissingResourceException ex) {
} catch (final MissingResourceException ex) {
logger.info(fileKey + " is a filename/extension Dataverse doesn't know about. Consider adding it to the " + propertyFileNameOnDisk + " file.");
return null;
}
Expand Down Expand Up @@ -810,7 +833,8 @@ public static boolean useRecognizedType(String suppliedContentType, String recog
|| canIngestAsTabular(recognizedType) || recognizedType.equals("application/fits-gzipped")
|| recognizedType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)
|| recognizedType.equalsIgnoreCase(BagItFileHandler.FILE_TYPE)
|| recognizedType.equals(MIME_TYPE_ZIP)) {
|| recognizedType.equals(MIME_TYPE_ZIP)
|| recognizedType.equals(MIME_TYPE_RO_CRATE)) {
return true;
}
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@ Makefile=text/x-makefile
Snakemake=text/x-snakemake
Dockerfile=application/x-docker-file
Vagrantfile=application/x-vagrant-file
ro-crate-metadata.json=application/ld+json; profile="http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate"
ro-crate-metadata.jsonld=application/ld+json; profile="http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate"
1 change: 1 addition & 0 deletions src/main/java/propertyFiles/MimeTypeDisplay.properties
Original file line number Diff line number Diff line change
Expand Up @@ -222,5 +222,6 @@ text/xml-graphml=GraphML Network Data
application/octet-stream=Unknown
application/x-docker-file=Docker Image File
application/x-vagrant-file=Vagrant Image File
application/ld+json;\u0020profile\u003d\u0022http\u003a//www.w3.org/ns/json-ld#flattened\u0020http\u003a//www.w3.org/ns/json-ld#compacted\u0020https\u003a//w3id.org/ro/crate\u0022=RO-Crate metadata
# Dataverse-specific
application/vnd.dataverse.file-package=Dataverse Package
1 change: 1 addition & 0 deletions src/main/java/propertyFiles/MimeTypeFacets.properties
Original file line number Diff line number Diff line change
Expand Up @@ -224,5 +224,6 @@ video/webm=Video
text/xml-graphml=Network Data
# Other
application/octet-stream=Unknown
application/ld+json;\u0020profile\u003d\u0022http\u003a//www.w3.org/ns/json-ld#flattened\u0020http\u003a//www.w3.org/ns/json-ld#compacted\u0020https\u003a//w3id.org/ro/crate\u0022=Metadata
# Dataverse-specific
application/vnd.dataverse.file-package=Data
25 changes: 25 additions & 0 deletions src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -409,4 +409,29 @@ public void testGZipFile() throws IOException {
assertEquals("application/fits-gzipped", contentType);
}

@Test
public void testDetermineFileTypeROCrate() {
final String roCrateContentType = "application/ld+json; profile=\"http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate\"";
final DataFile rocrate = new DataFile(roCrateContentType);

assertEquals(roCrateContentType, rocrate.getContentType());
assertEquals("RO-Crate metadata", FileUtil.getUserFriendlyFileType(rocrate));
assertEquals("Metadata", FileUtil.getIndexableFacetFileType(rocrate));

final File roCrateFile = new File("src/test/resources/fileutil/ro-crate-metadata.json");
try {
assertEquals(roCrateContentType, FileUtil.determineFileType(roCrateFile, "ro-crate-metadata.json"));
} catch (IOException ex) {
fail(ex);
}

// test ";" removal
final String dockerFileWithProfile = "application/x-docker-file; profile=\"http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate\"";
final DataFile dockerDataFile = new DataFile(dockerFileWithProfile);

assertEquals(dockerFileWithProfile, dockerDataFile.getContentType());
assertEquals("Docker Image File", FileUtil.getUserFriendlyFileType(dockerDataFile));
assertEquals("Code", FileUtil.getIndexableFacetFileType(dockerDataFile));
}

}

0 comments on commit 71324d8

Please sign in to comment.