From fbf1f9cbd05e18031170f206b3352c9a424cab16 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 16 Oct 2023 15:36:27 +0200 Subject: [PATCH 1/4] RO-Crate metadata mime type detection --- .../edu/harvard/iq/dataverse/util/FileUtil.java | 15 ++++++++++++++- .../MimeTypeDetectionByFileName.properties | 2 ++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 1ad389fb0e2..a7c3488e57f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -86,6 +86,7 @@ import java.util.HashMap; import java.util.List; import java.util.Optional; +import java.util.ResourceBundle; import java.util.UUID; import java.util.logging.Level; import java.util.logging.Logger; @@ -182,6 +183,7 @@ public class FileUtil implements java.io.Serializable { public static final String MIME_TYPE_NETCDF = "application/netcdf"; public static final String MIME_TYPE_XNETCDF = "application/x-netcdf"; public static final String MIME_TYPE_HDF5 = "application/x-hdf5"; + public static final String MIME_TYPE_RO_CRATE = "application/ld+json; profile=\"http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate\""; // File type "thumbnail classes" tags: @@ -421,6 +423,11 @@ public static String retestIngestableFileType(File file, String fileType) { } public static String determineFileType(File f, String fileName) throws IOException{ + final ResourceBundle bundle = BundleUtil.getResourceBundle("MimeTypeDetectionByFileName"); + if (bundle.keySet().contains(fileName)) { + return bundle.getString(fileName); + } + String fileType = null; String fileExtension = getFileExtension(fileName); @@ -545,6 +552,11 @@ public static String determineFileType(File f, String fileName) throws IOExcepti } public static String determineFileTypeByNameAndExtension(String fileName) { + final ResourceBundle bundle = BundleUtil.getResourceBundle("MimeTypeDetectionByFileName"); + if (bundle.keySet().contains(fileName)) { + return bundle.getString(fileName); + } + String mimetypesFileTypeMapResult = MIME_TYPE_MAP.getContentType(fileName); logger.fine("MimetypesFileTypeMap type by extension, for " + fileName + ": " + mimetypesFileTypeMapResult); if (mimetypesFileTypeMapResult != null) { @@ -825,7 +837,8 @@ public static boolean useRecognizedType(String suppliedContentType, String recog || canIngestAsTabular(recognizedType) || recognizedType.equals("application/fits-gzipped") || recognizedType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE) || recognizedType.equalsIgnoreCase(BagItFileHandler.FILE_TYPE) - || recognizedType.equals(MIME_TYPE_ZIP)) { + || recognizedType.equals(MIME_TYPE_ZIP) + || recognizedType.equals(MIME_TYPE_RO_CRATE)) { return true; } return false; diff --git a/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties b/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties index 70b0c4e371e..5c1a22bfd5f 100644 --- a/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties +++ b/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties @@ -2,3 +2,5 @@ Makefile=text/x-makefile Snakemake=text/x-snakemake Dockerfile=application/x-docker-file Vagrantfile=application/x-vagrant-file +ro-crate-metadata.json=application/ld+json; profile="http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate" +ro-crate-metadata.jsonld=application/ld+json; profile="http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate" From fbc6a54ba067e24f135d11cc7f66a950838c45a0 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 16 Oct 2023 19:34:46 +0200 Subject: [PATCH 2/4] fixed display and facet string for mime types with profile + test + reslease notes --- .../10015-RO-Crate-metadata-file.md | 10 +++++++ .../harvard/iq/dataverse/util/FileUtil.java | 10 +++++++ .../propertyFiles/MimeTypeDisplay.properties | 1 + .../propertyFiles/MimeTypeFacets.properties | 1 + .../iq/dataverse/util/FileUtilTest.java | 29 +++++++++++++++++++ .../resources/fileutil/ro-crate-metadata.json | 1 + 6 files changed, 52 insertions(+) create mode 100644 doc/release-notes/10015-RO-Crate-metadata-file.md create mode 100644 src/test/resources/fileutil/ro-crate-metadata.json diff --git a/doc/release-notes/10015-RO-Crate-metadata-file.md b/doc/release-notes/10015-RO-Crate-metadata-file.md new file mode 100644 index 00000000000..4b018a634f7 --- /dev/null +++ b/doc/release-notes/10015-RO-Crate-metadata-file.md @@ -0,0 +1,10 @@ +Detection of mime-types based on a filename with extension and detection of the RO-Crate metadata files. + +From now on, filenames with extensions can be added into `MimeTypeDetectionByFileName.properties` file. Filenames added there will take precedence over simply recognizing files by extensions. For example, two new filenames are added into that file: +``` +ro-crate-metadata.json=application/ld+json; profile="http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate" +ro-crate-metadata.jsonld=application/ld+json; profile="http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate" +``` + +Therefore, files named `ro-crate-metadata.json` will be then detected as RO-Crated metadata files from now on, instead as generic `JSON` files. +For more information on the RO-Crate specifications, see https://www.researchobject.org/ro-crate diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index a7c3488e57f..baff17af601 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -280,6 +280,11 @@ public static String getUserFriendlyFileType(DataFile dataFile) { if (fileType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)){ return ShapefileHandler.SHAPEFILE_FILE_TYPE_FRIENDLY_NAME; } + try { + return BundleUtil.getStringFromPropertyFile(fileType,"MimeTypeDisplay" ); + } catch (MissingResourceException e) { + //NOOP: we will try again after trimming ";" + } if (fileType.contains(";")) { fileType = fileType.substring(0, fileType.indexOf(";")); } @@ -294,6 +299,11 @@ public static String getUserFriendlyFileType(DataFile dataFile) { } public static String getIndexableFacetFileType(DataFile dataFile) { + try { + return BundleUtil.getStringFromDefaultPropertyFile(dataFile.getContentType(),"MimeTypeFacets" ); + } catch (MissingResourceException e) { + //NOOP: we will try again after trimming ";" + } String fileType = getFileType(dataFile); try { return BundleUtil.getStringFromDefaultPropertyFile(fileType,"MimeTypeFacets" ); diff --git a/src/main/java/propertyFiles/MimeTypeDisplay.properties b/src/main/java/propertyFiles/MimeTypeDisplay.properties index 295ac226fa1..8486a113116 100644 --- a/src/main/java/propertyFiles/MimeTypeDisplay.properties +++ b/src/main/java/propertyFiles/MimeTypeDisplay.properties @@ -222,5 +222,6 @@ text/xml-graphml=GraphML Network Data application/octet-stream=Unknown application/x-docker-file=Docker Image File application/x-vagrant-file=Vagrant Image File +application/ld+json;\u0020profile\u003d\u0022http\u003a//www.w3.org/ns/json-ld#flattened\u0020http\u003a//www.w3.org/ns/json-ld#compacted\u0020https\u003a//w3id.org/ro/crate\u0022=RO-Crate metadata # Dataverse-specific application/vnd.dataverse.file-package=Dataverse Package diff --git a/src/main/java/propertyFiles/MimeTypeFacets.properties b/src/main/java/propertyFiles/MimeTypeFacets.properties index aaab66f20ae..831c509b860 100644 --- a/src/main/java/propertyFiles/MimeTypeFacets.properties +++ b/src/main/java/propertyFiles/MimeTypeFacets.properties @@ -224,5 +224,6 @@ video/webm=Video text/xml-graphml=Network Data # Other application/octet-stream=Unknown +application/ld+json;\u0020profile\u003d\u0022http\u003a//www.w3.org/ns/json-ld#flattened\u0020http\u003a//www.w3.org/ns/json-ld#compacted\u0020https\u003a//w3id.org/ro/crate\u0022=Metadata # Dataverse-specific application/vnd.dataverse.file-package=Data diff --git a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java index 2cfe9f25d7e..396d613e768 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java @@ -371,4 +371,33 @@ public void testHdf4File() throws IOException { assertEquals("application/octet-stream", contentType); } + @Test + public void testDetermineFileTypeROCrate() { + final String roCrateContentType = "application/ld+json; profile=\"http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate\""; + final DataFile rocrate = new DataFile(roCrateContentType); + + assertEquals(roCrateContentType, rocrate.getContentType()); + assertEquals("RO-Crate metadata", FileUtil.getUserFriendlyFileType(rocrate)); + assertEquals("Metadata", FileUtil.getIndexableFacetFileType(rocrate)); + + final File roCrateFile = new File("src/test/resources/fileutil/ro-crate-metadata.json"); + if (roCrateFile.exists()) { + try { + assertEquals(roCrateContentType, FileUtil.determineFileType(roCrateFile, "ro-crate-metadata.json")); + } catch (IOException ex) { + Logger.getLogger(FileUtilTest.class.getName()).log(Level.SEVERE, null, ex); + } + } else { + fail("File does not exist: " + roCrateFile.toPath().toString()); + } + + // test ";" removal + final String dockerFileWithProfile = "application/x-docker-file; profile=\"http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate\""; + final DataFile dockerDataFile = new DataFile(dockerFileWithProfile); + + assertEquals(dockerFileWithProfile, dockerDataFile.getContentType()); + assertEquals("Docker Image File", FileUtil.getUserFriendlyFileType(dockerDataFile)); + assertEquals("Code", FileUtil.getIndexableFacetFileType(dockerDataFile)); + } + } diff --git a/src/test/resources/fileutil/ro-crate-metadata.json b/src/test/resources/fileutil/ro-crate-metadata.json new file mode 100644 index 00000000000..5ac4b24ad2b --- /dev/null +++ b/src/test/resources/fileutil/ro-crate-metadata.json @@ -0,0 +1 @@ +{"@context":["https://w3id.org/ro/crate/1.1/context",{"metadataContext":"https://language-archives.services/metadata-definitions#metadataContext","orthographicNotes":"https://language-archives.services/metadata-definitions#orthographicNotes","media":"https://language-archives.services/metadata-definitions#media","comments":"https://language-archives.services/metadata-definitions#comments","private":"https://language-archives.services/metadata-definitions#private","depositFormReceived":"https://language-archives.services/metadata-definitions#depositFormReceived","subjectLanguages":"https://language-archives.services/metadata-definitions#subjectLanguages","originatedOn":"https://language-archives.services/metadata-definitions#originatedOn","languageAsGiven":"https://language-archives.services/metadata-definitions#languageAsGiven","contentLanguages":"https://language-archives.services/metadata-definitions#contentLanguages","dialect":"https://language-archives.services/metadata-definitions#dialect","discourseType":"https://language-archives.services/metadata-definitions#discourseType","bornDigital":"https://language-archives.services/metadata-definitions#bornDigital","accessNarrative":"https://language-archives.services/metadata-definitions#accessNarrative","receivedOn":"https://language-archives.services/metadata-definitions#receivedOn","digitisedOn":"https://language-archives.services/metadata-definitions#digitisedOn","ingestNotes":"https://language-archives.services/metadata-definitions#ingestNotes","adminComment":"https://language-archives.services/metadata-definitions#adminComment","external":"https://language-archives.services/metadata-definitions#external","dataType":"https://language-archives.services/metadata-definitions#dataType","citeAs":"https://language-archives.services/metadata-definitions#citeAs","originatedOnNarrative":"https://language-archives.services/metadata-definitions#originatedOnNarrative","countries":"https://language-archives.services/metadata-definitions#countries","tapesReturned":"https://language-archives.services/metadata-definitions#tapesReturned","originalMedia":"https://language-archives.services/metadata-definitions#originalMedia","metadataExportable":"https://language-archives.services/metadata-definitions#metadataExportable","metadataImportedOn":"https://language-archives.services/metadata-definitions#metadataImportedOn","metadataExportedOn":"https://language-archives.services/metadata-definitions#metadataExportedOn","tracking":"https://language-archives.services/metadata-definitions#tracking","fieldsOfResearch":"https://language-archives.services/metadata-definitions#fieldsOfResearch","role":"https://language-archives.services/metadata-definitions#role","doi":"https://language-archives.services/metadata-definitions#doi","sampleRate":"https://language-archives.services/metadata-definitions#sampleRate","channels":"https://language-archives.services/metadata-definitions#channels","fps":"https://language-archives.services/metadata-definitions#fps","essenceId":"https://language-archives.services/metadata-definitions#essenceId"}],"@graph":[{"@id":"#Erakor village","@type":"Place","name":"Erakor village"},{"@id":"#country_Australia","@type":"Country","name":"Australia"},{"@id":"#country_Vanuatu","@type":"Country","code":"VU","name":"Vanuatu"},{"@id":"#country_null","@type":"Country"},{"@id":"#geo_166.427,-22.283,166.467,-22.241","@type":"GeoShape","box":"166.427,-22.283 166.467,-22.241"},{"@id":"#geo_168.159,-17.83,168.594,-17.585","@type":"GeoShape","box":"168.159,-17.83 168.594,-17.585"},{"@id":"#geo_168.217,-17.8235,168.317,-17.7235","@type":"GeoShape","box":"168.217,-17.8235 168.317,-17.7235"},{"@id":"#identifier_collectionId","@type":"PropertyValue","name":"collectionIdentifier","value":"NT1"},{"@id":"#identifier_doi","@type":"PropertyValue","name":"doi","value":"10.4225/72/56F94A61DA9EC"},{"@id":"#identifier_domain","@type":"PropertyValue","name":"domain","value":"paradisec.org.au"},{"@id":"#identifier_hashid","@type":"PropertyValue","name":"hashId","value":"72b3dc1401c8ff06aacba0990a128fc113cf9ad5275f494b05c1142177356561bd7f4c0e8800bade2cbbbed75f6d9d019894735ad7e40762684d243a442d658a"},{"@id":"#identifier_id","@type":"PropertyValue","name":"id","value":"/paradisec.org.au/NT1/98007"},{"@id":"#identifier_itemId","@type":"PropertyValue","name":"itemIdentifier","value":"98007"},{"@id":"#language_bis","@type":"Language","code":"bis","location":{"@id":"#geo_166.427,-22.283,166.467,-22.241"},"name":"Bislama"},{"@id":"#language_erk","@type":"Language","code":"erk","location":{"@id":"#geo_168.159,-17.83,168.594,-17.585"},"name":"Efate, South"},{"@id":"#place_geo_168.217,-17.8235,168.317,-17.7235","@type":"Place","geo":{"@id":"#geo_168.217,-17.8235,168.317,-17.7235"}},{"@id":"./","@type":["Dataset","RepositoryObject"],"additionalType":"item","contentLocation":[{"@id":"#Erakor village"},{"@id":"#place_geo_168.217,-17.8235,168.317,-17.7235"}],"contributor":[{"@id":"http://nla.gov.au/nla.party-479603"},{"@id":"Kalsarap Namaf"},{"@id":"Iokopeth null"},{"@id":"John Maklen"},{"@id":"Waia Tenene"}],"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2018-05-17T04:13:04.000Z","description":"NT1-98007. Text #047 (speaker is John Maklen. Text title: History of villages before Erakor); Text #048 (speaker is John Maklen. Text title: Mantu the flying fox and Erromango); Text #049. Text title: Asaraf (speaker is John Maklen);Text #050. Text title: Mumu and Kotkot (speaker is John Maklen); Text #051. Text title: Natopu ni Erakor—the spirit who lives at Erakor (speaker is John Maklen);Text #038. Text title: The need for respect (speaker is Iokopeth) Stories can be seen at NT8-TEXT. There are time-aligned transcripts of this item and handwritten transcripts by Manuel Wayane scanned as jpg files.","hasPart":[{"@id":"NT1-98007-001.jpg"},{"@id":"NT1-98007-002.jpg"},{"@id":"NT1-98007-003.jpg"},{"@id":"NT1-98007-004.jpg"},{"@id":"NT1-98007-005.jpg"},{"@id":"NT1-98007-006.jpg"},{"@id":"NT1-98007-007.jpg"},{"@id":"NT1-98007-008.jpg"},{"@id":"NT1-98007-009.jpg"},{"@id":"NT1-98007-010.jpg"},{"@id":"NT1-98007-011.jpg"},{"@id":"NT1-98007-012.jpg"},{"@id":"NT1-98007-013.jpg"},{"@id":"NT1-98007-014.jpg"},{"@id":"NT1-98007-015.jpg"},{"@id":"NT1-98007-016.jpg"},{"@id":"NT1-98007-017.jpg"},{"@id":"NT1-98007-018.jpg"},{"@id":"NT1-98007-019.jpg"},{"@id":"NT1-98007-020.jpg"},{"@id":"NT1-98007-021.jpg"},{"@id":"NT1-98007-022.jpg"},{"@id":"NT1-98007-023.jpg"},{"@id":"NT1-98007-024.jpg"},{"@id":"NT1-98007-025.jpg"},{"@id":"NT1-98007-026.jpg"},{"@id":"NT1-98007-027.jpg"},{"@id":"NT1-98007-028.jpg"},{"@id":"NT1-98007-029.jpg"},{"@id":"NT1-98007-030.jpg"},{"@id":"NT1-98007-031.jpg"},{"@id":"NT1-98007-98007A.mp3"},{"@id":"NT1-98007-98007A.wav"},{"@id":"NT1-98007-98007B.mp3"},{"@id":"NT1-98007-98007B.wav"},{"@id":"NT1-98007-98007az.xml"},{"@id":"NT1-98007-A.tab"},{"@id":"NT1-98007-A.xml"},{"@id":"NT1-98007-B.tab"},{"@id":"NT1-98007-B.xml"},{"@id":"NT1-98007-98007A.ixt"},{"@id":"NT1-98007-98007A.trs"},{"@id":"NT1-98007-98007A.flextext"},{"@id":"NT1-98007-98007A.eaf"},{"@id":"NT1-98007-98007B.eaf"}],"identifier":[{"@id":"#identifier_domain"},{"@id":"#identifier_id"},{"@id":"#identifier_hashid"},{"@id":"#identifier_itemId"},{"@id":"#identifier_collectionId"},{"@id":"#identifier_doi"}],"license":{"@id":"_:b0"},"memberOf":{"@id":"/paradisec.org.au/NT1"},"name":"Recordings in South Efate","publisher":{"@id":"http://nla.gov.au/nla.party-593909"},"bornDigital":0,"contentLanguages":[{"@id":"#language_bis"},{"@id":"#language_erk"}],"countries":{"@id":"#country_Vanuatu"},"digitisedOn":"Sun Dec 31 2000 13:00:00 GMT+0000 (Coordinated Universal Time)","external":0,"languageAsGiven":"Nafsan","metadataExportable":1,"originalMedia":"audiocassette","originatedOn":"1998-10-03","private":0,"subjectLanguages":{"@id":"#language_erk"},"tapesReturned":0},{"@id":"Iokopeth null","@type":"Person","givenName":"Iokopeth","homeLocation":{"@id":"#country_null"},"name":"Iokopeth","role":{"@id":"role_speaker"}},{"@id":"John Maklen","@type":"Person","familyName":"Maklen","givenName":"John","homeLocation":{"@id":"#country_null"},"name":"John Maklen","role":{"@id":"role_speaker"}},{"@id":"Kalsarap Namaf","@type":"Person","familyName":"Namaf","givenName":"Kalsarap","homeLocation":{"@id":"#country_null"},"name":"Kalsarap Namaf","role":{"@id":"role_speaker"}},{"@id":"NT1-98007-001.jpg","@type":"File","contentSize":1658368,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:31.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-001.jpg","doi":"10.4225/72/575C8A369D680","essenceId":1010709},{"@id":"NT1-98007-002.jpg","@type":"File","contentSize":1816576,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:37.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-002.jpg","doi":"10.4225/72/575C8A3C15C98","essenceId":1010710},{"@id":"NT1-98007-003.jpg","@type":"File","contentSize":1811968,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:43.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-003.jpg","doi":"10.4225/72/575C8A41DD973","essenceId":1010711},{"@id":"NT1-98007-004.jpg","@type":"File","contentSize":1827840,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:48.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-004.jpg","doi":"10.4225/72/575C8A4767685","essenceId":1010712},{"@id":"NT1-98007-005.jpg","@type":"File","contentSize":1853440,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:54.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-005.jpg","doi":"10.4225/72/575C8A4CD8971","essenceId":1010713},{"@id":"NT1-98007-006.jpg","@type":"File","contentSize":1796608,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:59.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-006.jpg","doi":"10.4225/72/575C8A525C618","essenceId":1010714},{"@id":"NT1-98007-007.jpg","@type":"File","contentSize":1780224,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:05.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-007.jpg","doi":"10.4225/72/575C8A5810189","essenceId":1010715},{"@id":"NT1-98007-008.jpg","@type":"File","contentSize":1737728,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:11.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-008.jpg","doi":"10.4225/72/575C8A5DB1113","essenceId":1010716},{"@id":"NT1-98007-009.jpg","@type":"File","contentSize":1781760,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:16.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-009.jpg","doi":"10.4225/72/575C8A63479C1","essenceId":1010717},{"@id":"NT1-98007-010.jpg","@type":"File","contentSize":1797632,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:22.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-010.jpg","doi":"10.4225/72/575C8A68B23D2","essenceId":1010718},{"@id":"NT1-98007-011.jpg","@type":"File","contentSize":1800704,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:28.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-011.jpg","doi":"10.4225/72/575C8A6E73D01","essenceId":1010719},{"@id":"NT1-98007-012.jpg","@type":"File","contentSize":1822720,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:33.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-012.jpg","doi":"10.4225/72/575C8A742DE00","essenceId":1010720},{"@id":"NT1-98007-013.jpg","@type":"File","contentSize":1809920,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:39.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-013.jpg","doi":"10.4225/72/575C8A79B1B0F","essenceId":1010721},{"@id":"NT1-98007-014.jpg","@type":"File","contentSize":1821696,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:44.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-014.jpg","doi":"10.4225/72/575C8A7F3F253","essenceId":1010722},{"@id":"NT1-98007-015.jpg","@type":"File","contentSize":1626624,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:50.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-015.jpg","doi":"10.4225/72/575C8A84C0022","essenceId":1010723},{"@id":"NT1-98007-016.jpg","@type":"File","contentSize":1633792,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:56.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-016.jpg","doi":"10.4225/72/575C8A8A9A944","essenceId":1010724},{"@id":"NT1-98007-017.jpg","@type":"File","contentSize":1870336,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:01.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-017.jpg","doi":"10.4225/72/575C8A90506E6","essenceId":1010725},{"@id":"NT1-98007-018.jpg","@type":"File","contentSize":1858560,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:07.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-018.jpg","doi":"10.4225/72/575C8A95DF462","essenceId":1010726},{"@id":"NT1-98007-019.jpg","@type":"File","contentSize":1852416,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:12.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-019.jpg","doi":"10.4225/72/575C8A9B56F2F","essenceId":1010727},{"@id":"NT1-98007-020.jpg","@type":"File","contentSize":1838080,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:18.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-020.jpg","doi":"10.4225/72/575C8AA0F09B8","essenceId":1010728},{"@id":"NT1-98007-021.jpg","@type":"File","contentSize":1861120,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:24.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-021.jpg","doi":"10.4225/72/575C8AA6B23AF","essenceId":1010729},{"@id":"NT1-98007-022.jpg","@type":"File","contentSize":1835008,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:29.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-022.jpg","doi":"10.4225/72/575C8AAC3A545","essenceId":1010730},{"@id":"NT1-98007-023.jpg","@type":"File","contentSize":1827328,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:35.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-023.jpg","doi":"10.4225/72/575C8AB1C991E","essenceId":1010731},{"@id":"NT1-98007-024.jpg","@type":"File","contentSize":1805312,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:40.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-024.jpg","doi":"10.4225/72/575C8AB74847D","essenceId":1010732},{"@id":"NT1-98007-025.jpg","@type":"File","contentSize":1912832,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:46.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-025.jpg","doi":"10.4225/72/575C8ABCD2B44","essenceId":1010733},{"@id":"NT1-98007-026.jpg","@type":"File","contentSize":1889792,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:51.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-026.jpg","doi":"10.4225/72/575C8AC26D69E","essenceId":1010734},{"@id":"NT1-98007-027.jpg","@type":"File","contentSize":1878528,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:57.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-027.jpg","doi":"10.4225/72/575C8AC7F3886","essenceId":1010735},{"@id":"NT1-98007-028.jpg","@type":"File","contentSize":1868288,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:02.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-028.jpg","doi":"10.4225/72/575C8ACD72196","essenceId":1010736},{"@id":"NT1-98007-029.jpg","@type":"File","contentSize":1859584,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:08.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-029.jpg","doi":"10.4225/72/575C8AD2E8E82","essenceId":1010737},{"@id":"NT1-98007-030.jpg","@type":"File","contentSize":1859072,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:13.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-030.jpg","doi":"10.4225/72/575C8AD8775E6","essenceId":1010738},{"@id":"NT1-98007-031.jpg","@type":"File","contentSize":1708544,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:19.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-031.jpg","doi":"10.4225/72/575C8ADDE64B8","essenceId":1010739},{"@id":"NT1-98007-98007A.eaf","@type":"File","contentSize":165674,"dateCreated":"2016-08-01T05:00:06.000Z","dateModified":"2016-08-01T16:01:41.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007A.eaf","doi":"10.4225/72/579F725FDD059","essenceId":1100487},{"@id":"NT1-98007-98007A.flextext","@type":"File","contentSize":141244,"dateCreated":"2016-05-20T04:00:06.000Z","dateModified":"2016-06-24T12:41:36.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007A.flextext","doi":"10.4225/72/576D2A7B75614","essenceId":1086277},{"@id":"NT1-98007-98007A.ixt","@type":"File","contentSize":40299,"dateCreated":"2016-04-18T07:00:07.000Z","dateModified":"2016-06-24T08:25:06.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007A.ixt","doi":"10.4225/72/576CEE5CED1FB","essenceId":1085095},{"@id":"NT1-98007-98007A.mp3","@type":"File","bitrate":128009,"contentSize":43667584,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2019-11-29T09:17:34.000Z","duration":2729.02,"encodingFormat":"audio/mpeg","name":"NT1-98007-98007A.mp3","channels":2,"doi":"10.4225/72/575C8AE370B93","essenceId":1010740,"sampleRate":44100},{"@id":"NT1-98007-98007A.trs","@type":"File","contentSize":28292,"dateCreated":"2016-04-26T10:00:06.000Z","dateModified":"2016-06-24T08:45:27.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007A.trs","doi":"10.4225/72/576CF32101764","essenceId":1085293},{"@id":"NT1-98007-98007A.wav","@type":"File","bitrate":4608000,"contentSize":1571894006,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2019-11-29T09:16:52.000Z","duration":2728.98,"encodingFormat":"audio/x-wav","name":"NT1-98007-98007A.wav","channels":2,"doi":"10.4225/72/575C8AE8E6E6B","essenceId":1010741,"sampleRate":96000},{"@id":"NT1-98007-98007B.eaf","@type":"File","contentSize":118748,"dateCreated":"2016-08-01T05:00:07.000Z","dateModified":"2016-08-01T16:01:47.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007B.eaf","doi":"10.4225/72/579F7265746C0","essenceId":1100493},{"@id":"NT1-98007-98007B.mp3","@type":"File","bitrate":128007,"contentSize":35305600,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2019-11-29T09:12:58.000Z","duration":2206.47,"encodingFormat":"audio/mpeg","name":"NT1-98007-98007B.mp3","channels":2,"doi":"10.4225/72/575C8AEE64BCA","essenceId":1010742,"sampleRate":44100},{"@id":"NT1-98007-98007B.wav","@type":"File","bitrate":4608000,"contentSize":1270917002,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2019-11-29T09:15:22.000Z","duration":2206.45,"encodingFormat":"audio/x-wav","name":"NT1-98007-98007B.wav","channels":2,"doi":"10.4225/72/575C8AF3D2DA0","essenceId":1010743,"sampleRate":96000},{"@id":"NT1-98007-98007az.xml","@type":"File","contentSize":48755,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:46.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007az.xml","doi":"10.4225/72/575C8AF94896E","essenceId":1010744},{"@id":"NT1-98007-A.tab","@type":"File","contentSize":27810,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:57.000Z","encodingFormat":"text/plain","name":"NT1-98007-A.tab","doi":"10.4225/72/575C8B043764B","essenceId":1010746},{"@id":"NT1-98007-A.xml","@type":"File","contentSize":48788,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:05:03.000Z","encodingFormat":"application/xml","name":"NT1-98007-A.xml","doi":"10.4225/72/575C8B09BDC88","essenceId":1010747},{"@id":"NT1-98007-B.tab","@type":"File","contentSize":20239,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:05:08.000Z","encodingFormat":"text/plain","name":"NT1-98007-B.tab","doi":"10.4225/72/575C8B0F4F8F1","essenceId":1010748},{"@id":"NT1-98007-B.xml","@type":"File","contentSize":35289,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:05:14.000Z","encodingFormat":"application/xml","name":"NT1-98007-B.xml","doi":"10.4225/72/575C8B14C6702","essenceId":1010749},{"@id":"Waia Tenene","@type":"Person","familyName":"Tenene","givenName":"Waia","homeLocation":{"@id":"#country_null"},"name":"Waia Tenene","role":{"@id":"role_speaker"}},{"@id":"_:b0","@type":"CreativeWork","name":"Open (subject to agreeing to PDSC access conditions)"},{"@id":"http://nla.gov.au/nla.party-479603","@type":"Person","email":"thien@unimelb.edu.au","familyName":"Thieberger","givenName":"Nick","homeLocation":{"@id":"#country_Australia"},"name":"Nick Thieberger","role":[{"@id":"role_collector"},{"@id":"role_depositor"},{"@id":"role_recorder"}]},{"@id":"http://nla.gov.au/nla.party-593909","@type":"Organization","name":"University of Melbourne"},{"@id":"ro-crate-metadata.json","@type":"CreativeWork","conformsTo":{"@id":"https://w3id.org/ro/crate/1.1/context"},"about":{"@id":"./"}},{"@id":"role_collector","@type":"Role","name":"collector"},{"@id":"role_depositor","@type":"Role","name":"depositor"},{"@id":"role_recorder","@type":"Role","name":"recorder"},{"@id":"role_speaker","@type":"Role","name":"speaker"}]} From f071658df01ba5dc5fcc8820ff4e9987bf85fd4d Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 22 Apr 2024 16:18:52 +0200 Subject: [PATCH 3/4] removed unneeded ro-crate test file --- .../edu/harvard/iq/dataverse/util/FileUtilTest.java | 12 ++++-------- src/test/resources/fileutil/ro-crate-metadata.json | 1 - 2 files changed, 4 insertions(+), 9 deletions(-) delete mode 100644 src/test/resources/fileutil/ro-crate-metadata.json diff --git a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java index bf63b6d39b3..29a7ae9934e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java @@ -390,14 +390,10 @@ public void testDetermineFileTypeROCrate() { assertEquals("Metadata", FileUtil.getIndexableFacetFileType(rocrate)); final File roCrateFile = new File("src/test/resources/fileutil/ro-crate-metadata.json"); - if (roCrateFile.exists()) { - try { - assertEquals(roCrateContentType, FileUtil.determineFileType(roCrateFile, "ro-crate-metadata.json")); - } catch (IOException ex) { - Logger.getLogger(FileUtilTest.class.getName()).log(Level.SEVERE, null, ex); - } - } else { - fail("File does not exist: " + roCrateFile.toPath().toString()); + try { + assertEquals(roCrateContentType, FileUtil.determineFileType(roCrateFile, "ro-crate-metadata.json")); + } catch (IOException ex) { + fail(ex); } // test ";" removal diff --git a/src/test/resources/fileutil/ro-crate-metadata.json b/src/test/resources/fileutil/ro-crate-metadata.json deleted file mode 100644 index 5ac4b24ad2b..00000000000 --- a/src/test/resources/fileutil/ro-crate-metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"@context":["https://w3id.org/ro/crate/1.1/context",{"metadataContext":"https://language-archives.services/metadata-definitions#metadataContext","orthographicNotes":"https://language-archives.services/metadata-definitions#orthographicNotes","media":"https://language-archives.services/metadata-definitions#media","comments":"https://language-archives.services/metadata-definitions#comments","private":"https://language-archives.services/metadata-definitions#private","depositFormReceived":"https://language-archives.services/metadata-definitions#depositFormReceived","subjectLanguages":"https://language-archives.services/metadata-definitions#subjectLanguages","originatedOn":"https://language-archives.services/metadata-definitions#originatedOn","languageAsGiven":"https://language-archives.services/metadata-definitions#languageAsGiven","contentLanguages":"https://language-archives.services/metadata-definitions#contentLanguages","dialect":"https://language-archives.services/metadata-definitions#dialect","discourseType":"https://language-archives.services/metadata-definitions#discourseType","bornDigital":"https://language-archives.services/metadata-definitions#bornDigital","accessNarrative":"https://language-archives.services/metadata-definitions#accessNarrative","receivedOn":"https://language-archives.services/metadata-definitions#receivedOn","digitisedOn":"https://language-archives.services/metadata-definitions#digitisedOn","ingestNotes":"https://language-archives.services/metadata-definitions#ingestNotes","adminComment":"https://language-archives.services/metadata-definitions#adminComment","external":"https://language-archives.services/metadata-definitions#external","dataType":"https://language-archives.services/metadata-definitions#dataType","citeAs":"https://language-archives.services/metadata-definitions#citeAs","originatedOnNarrative":"https://language-archives.services/metadata-definitions#originatedOnNarrative","countries":"https://language-archives.services/metadata-definitions#countries","tapesReturned":"https://language-archives.services/metadata-definitions#tapesReturned","originalMedia":"https://language-archives.services/metadata-definitions#originalMedia","metadataExportable":"https://language-archives.services/metadata-definitions#metadataExportable","metadataImportedOn":"https://language-archives.services/metadata-definitions#metadataImportedOn","metadataExportedOn":"https://language-archives.services/metadata-definitions#metadataExportedOn","tracking":"https://language-archives.services/metadata-definitions#tracking","fieldsOfResearch":"https://language-archives.services/metadata-definitions#fieldsOfResearch","role":"https://language-archives.services/metadata-definitions#role","doi":"https://language-archives.services/metadata-definitions#doi","sampleRate":"https://language-archives.services/metadata-definitions#sampleRate","channels":"https://language-archives.services/metadata-definitions#channels","fps":"https://language-archives.services/metadata-definitions#fps","essenceId":"https://language-archives.services/metadata-definitions#essenceId"}],"@graph":[{"@id":"#Erakor village","@type":"Place","name":"Erakor village"},{"@id":"#country_Australia","@type":"Country","name":"Australia"},{"@id":"#country_Vanuatu","@type":"Country","code":"VU","name":"Vanuatu"},{"@id":"#country_null","@type":"Country"},{"@id":"#geo_166.427,-22.283,166.467,-22.241","@type":"GeoShape","box":"166.427,-22.283 166.467,-22.241"},{"@id":"#geo_168.159,-17.83,168.594,-17.585","@type":"GeoShape","box":"168.159,-17.83 168.594,-17.585"},{"@id":"#geo_168.217,-17.8235,168.317,-17.7235","@type":"GeoShape","box":"168.217,-17.8235 168.317,-17.7235"},{"@id":"#identifier_collectionId","@type":"PropertyValue","name":"collectionIdentifier","value":"NT1"},{"@id":"#identifier_doi","@type":"PropertyValue","name":"doi","value":"10.4225/72/56F94A61DA9EC"},{"@id":"#identifier_domain","@type":"PropertyValue","name":"domain","value":"paradisec.org.au"},{"@id":"#identifier_hashid","@type":"PropertyValue","name":"hashId","value":"72b3dc1401c8ff06aacba0990a128fc113cf9ad5275f494b05c1142177356561bd7f4c0e8800bade2cbbbed75f6d9d019894735ad7e40762684d243a442d658a"},{"@id":"#identifier_id","@type":"PropertyValue","name":"id","value":"/paradisec.org.au/NT1/98007"},{"@id":"#identifier_itemId","@type":"PropertyValue","name":"itemIdentifier","value":"98007"},{"@id":"#language_bis","@type":"Language","code":"bis","location":{"@id":"#geo_166.427,-22.283,166.467,-22.241"},"name":"Bislama"},{"@id":"#language_erk","@type":"Language","code":"erk","location":{"@id":"#geo_168.159,-17.83,168.594,-17.585"},"name":"Efate, South"},{"@id":"#place_geo_168.217,-17.8235,168.317,-17.7235","@type":"Place","geo":{"@id":"#geo_168.217,-17.8235,168.317,-17.7235"}},{"@id":"./","@type":["Dataset","RepositoryObject"],"additionalType":"item","contentLocation":[{"@id":"#Erakor village"},{"@id":"#place_geo_168.217,-17.8235,168.317,-17.7235"}],"contributor":[{"@id":"http://nla.gov.au/nla.party-479603"},{"@id":"Kalsarap Namaf"},{"@id":"Iokopeth null"},{"@id":"John Maklen"},{"@id":"Waia Tenene"}],"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2018-05-17T04:13:04.000Z","description":"NT1-98007. Text #047 (speaker is John Maklen. Text title: History of villages before Erakor); Text #048 (speaker is John Maklen. Text title: Mantu the flying fox and Erromango); Text #049. Text title: Asaraf (speaker is John Maklen);Text #050. Text title: Mumu and Kotkot (speaker is John Maklen); Text #051. Text title: Natopu ni Erakor—the spirit who lives at Erakor (speaker is John Maklen);Text #038. Text title: The need for respect (speaker is Iokopeth) Stories can be seen at NT8-TEXT. There are time-aligned transcripts of this item and handwritten transcripts by Manuel Wayane scanned as jpg files.","hasPart":[{"@id":"NT1-98007-001.jpg"},{"@id":"NT1-98007-002.jpg"},{"@id":"NT1-98007-003.jpg"},{"@id":"NT1-98007-004.jpg"},{"@id":"NT1-98007-005.jpg"},{"@id":"NT1-98007-006.jpg"},{"@id":"NT1-98007-007.jpg"},{"@id":"NT1-98007-008.jpg"},{"@id":"NT1-98007-009.jpg"},{"@id":"NT1-98007-010.jpg"},{"@id":"NT1-98007-011.jpg"},{"@id":"NT1-98007-012.jpg"},{"@id":"NT1-98007-013.jpg"},{"@id":"NT1-98007-014.jpg"},{"@id":"NT1-98007-015.jpg"},{"@id":"NT1-98007-016.jpg"},{"@id":"NT1-98007-017.jpg"},{"@id":"NT1-98007-018.jpg"},{"@id":"NT1-98007-019.jpg"},{"@id":"NT1-98007-020.jpg"},{"@id":"NT1-98007-021.jpg"},{"@id":"NT1-98007-022.jpg"},{"@id":"NT1-98007-023.jpg"},{"@id":"NT1-98007-024.jpg"},{"@id":"NT1-98007-025.jpg"},{"@id":"NT1-98007-026.jpg"},{"@id":"NT1-98007-027.jpg"},{"@id":"NT1-98007-028.jpg"},{"@id":"NT1-98007-029.jpg"},{"@id":"NT1-98007-030.jpg"},{"@id":"NT1-98007-031.jpg"},{"@id":"NT1-98007-98007A.mp3"},{"@id":"NT1-98007-98007A.wav"},{"@id":"NT1-98007-98007B.mp3"},{"@id":"NT1-98007-98007B.wav"},{"@id":"NT1-98007-98007az.xml"},{"@id":"NT1-98007-A.tab"},{"@id":"NT1-98007-A.xml"},{"@id":"NT1-98007-B.tab"},{"@id":"NT1-98007-B.xml"},{"@id":"NT1-98007-98007A.ixt"},{"@id":"NT1-98007-98007A.trs"},{"@id":"NT1-98007-98007A.flextext"},{"@id":"NT1-98007-98007A.eaf"},{"@id":"NT1-98007-98007B.eaf"}],"identifier":[{"@id":"#identifier_domain"},{"@id":"#identifier_id"},{"@id":"#identifier_hashid"},{"@id":"#identifier_itemId"},{"@id":"#identifier_collectionId"},{"@id":"#identifier_doi"}],"license":{"@id":"_:b0"},"memberOf":{"@id":"/paradisec.org.au/NT1"},"name":"Recordings in South Efate","publisher":{"@id":"http://nla.gov.au/nla.party-593909"},"bornDigital":0,"contentLanguages":[{"@id":"#language_bis"},{"@id":"#language_erk"}],"countries":{"@id":"#country_Vanuatu"},"digitisedOn":"Sun Dec 31 2000 13:00:00 GMT+0000 (Coordinated Universal Time)","external":0,"languageAsGiven":"Nafsan","metadataExportable":1,"originalMedia":"audiocassette","originatedOn":"1998-10-03","private":0,"subjectLanguages":{"@id":"#language_erk"},"tapesReturned":0},{"@id":"Iokopeth null","@type":"Person","givenName":"Iokopeth","homeLocation":{"@id":"#country_null"},"name":"Iokopeth","role":{"@id":"role_speaker"}},{"@id":"John Maklen","@type":"Person","familyName":"Maklen","givenName":"John","homeLocation":{"@id":"#country_null"},"name":"John Maklen","role":{"@id":"role_speaker"}},{"@id":"Kalsarap Namaf","@type":"Person","familyName":"Namaf","givenName":"Kalsarap","homeLocation":{"@id":"#country_null"},"name":"Kalsarap Namaf","role":{"@id":"role_speaker"}},{"@id":"NT1-98007-001.jpg","@type":"File","contentSize":1658368,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:31.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-001.jpg","doi":"10.4225/72/575C8A369D680","essenceId":1010709},{"@id":"NT1-98007-002.jpg","@type":"File","contentSize":1816576,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:37.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-002.jpg","doi":"10.4225/72/575C8A3C15C98","essenceId":1010710},{"@id":"NT1-98007-003.jpg","@type":"File","contentSize":1811968,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:43.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-003.jpg","doi":"10.4225/72/575C8A41DD973","essenceId":1010711},{"@id":"NT1-98007-004.jpg","@type":"File","contentSize":1827840,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:48.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-004.jpg","doi":"10.4225/72/575C8A4767685","essenceId":1010712},{"@id":"NT1-98007-005.jpg","@type":"File","contentSize":1853440,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:54.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-005.jpg","doi":"10.4225/72/575C8A4CD8971","essenceId":1010713},{"@id":"NT1-98007-006.jpg","@type":"File","contentSize":1796608,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:01:59.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-006.jpg","doi":"10.4225/72/575C8A525C618","essenceId":1010714},{"@id":"NT1-98007-007.jpg","@type":"File","contentSize":1780224,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:05.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-007.jpg","doi":"10.4225/72/575C8A5810189","essenceId":1010715},{"@id":"NT1-98007-008.jpg","@type":"File","contentSize":1737728,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:11.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-008.jpg","doi":"10.4225/72/575C8A5DB1113","essenceId":1010716},{"@id":"NT1-98007-009.jpg","@type":"File","contentSize":1781760,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:16.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-009.jpg","doi":"10.4225/72/575C8A63479C1","essenceId":1010717},{"@id":"NT1-98007-010.jpg","@type":"File","contentSize":1797632,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:22.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-010.jpg","doi":"10.4225/72/575C8A68B23D2","essenceId":1010718},{"@id":"NT1-98007-011.jpg","@type":"File","contentSize":1800704,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:28.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-011.jpg","doi":"10.4225/72/575C8A6E73D01","essenceId":1010719},{"@id":"NT1-98007-012.jpg","@type":"File","contentSize":1822720,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:33.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-012.jpg","doi":"10.4225/72/575C8A742DE00","essenceId":1010720},{"@id":"NT1-98007-013.jpg","@type":"File","contentSize":1809920,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:39.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-013.jpg","doi":"10.4225/72/575C8A79B1B0F","essenceId":1010721},{"@id":"NT1-98007-014.jpg","@type":"File","contentSize":1821696,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:44.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-014.jpg","doi":"10.4225/72/575C8A7F3F253","essenceId":1010722},{"@id":"NT1-98007-015.jpg","@type":"File","contentSize":1626624,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:50.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-015.jpg","doi":"10.4225/72/575C8A84C0022","essenceId":1010723},{"@id":"NT1-98007-016.jpg","@type":"File","contentSize":1633792,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:02:56.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-016.jpg","doi":"10.4225/72/575C8A8A9A944","essenceId":1010724},{"@id":"NT1-98007-017.jpg","@type":"File","contentSize":1870336,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:01.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-017.jpg","doi":"10.4225/72/575C8A90506E6","essenceId":1010725},{"@id":"NT1-98007-018.jpg","@type":"File","contentSize":1858560,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:07.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-018.jpg","doi":"10.4225/72/575C8A95DF462","essenceId":1010726},{"@id":"NT1-98007-019.jpg","@type":"File","contentSize":1852416,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:12.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-019.jpg","doi":"10.4225/72/575C8A9B56F2F","essenceId":1010727},{"@id":"NT1-98007-020.jpg","@type":"File","contentSize":1838080,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:18.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-020.jpg","doi":"10.4225/72/575C8AA0F09B8","essenceId":1010728},{"@id":"NT1-98007-021.jpg","@type":"File","contentSize":1861120,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:24.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-021.jpg","doi":"10.4225/72/575C8AA6B23AF","essenceId":1010729},{"@id":"NT1-98007-022.jpg","@type":"File","contentSize":1835008,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:29.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-022.jpg","doi":"10.4225/72/575C8AAC3A545","essenceId":1010730},{"@id":"NT1-98007-023.jpg","@type":"File","contentSize":1827328,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:35.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-023.jpg","doi":"10.4225/72/575C8AB1C991E","essenceId":1010731},{"@id":"NT1-98007-024.jpg","@type":"File","contentSize":1805312,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:40.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-024.jpg","doi":"10.4225/72/575C8AB74847D","essenceId":1010732},{"@id":"NT1-98007-025.jpg","@type":"File","contentSize":1912832,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:46.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-025.jpg","doi":"10.4225/72/575C8ABCD2B44","essenceId":1010733},{"@id":"NT1-98007-026.jpg","@type":"File","contentSize":1889792,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:51.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-026.jpg","doi":"10.4225/72/575C8AC26D69E","essenceId":1010734},{"@id":"NT1-98007-027.jpg","@type":"File","contentSize":1878528,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:03:57.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-027.jpg","doi":"10.4225/72/575C8AC7F3886","essenceId":1010735},{"@id":"NT1-98007-028.jpg","@type":"File","contentSize":1868288,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:02.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-028.jpg","doi":"10.4225/72/575C8ACD72196","essenceId":1010736},{"@id":"NT1-98007-029.jpg","@type":"File","contentSize":1859584,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:08.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-029.jpg","doi":"10.4225/72/575C8AD2E8E82","essenceId":1010737},{"@id":"NT1-98007-030.jpg","@type":"File","contentSize":1859072,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:13.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-030.jpg","doi":"10.4225/72/575C8AD8775E6","essenceId":1010738},{"@id":"NT1-98007-031.jpg","@type":"File","contentSize":1708544,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:19.000Z","encodingFormat":"image/jpeg","name":"NT1-98007-031.jpg","doi":"10.4225/72/575C8ADDE64B8","essenceId":1010739},{"@id":"NT1-98007-98007A.eaf","@type":"File","contentSize":165674,"dateCreated":"2016-08-01T05:00:06.000Z","dateModified":"2016-08-01T16:01:41.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007A.eaf","doi":"10.4225/72/579F725FDD059","essenceId":1100487},{"@id":"NT1-98007-98007A.flextext","@type":"File","contentSize":141244,"dateCreated":"2016-05-20T04:00:06.000Z","dateModified":"2016-06-24T12:41:36.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007A.flextext","doi":"10.4225/72/576D2A7B75614","essenceId":1086277},{"@id":"NT1-98007-98007A.ixt","@type":"File","contentSize":40299,"dateCreated":"2016-04-18T07:00:07.000Z","dateModified":"2016-06-24T08:25:06.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007A.ixt","doi":"10.4225/72/576CEE5CED1FB","essenceId":1085095},{"@id":"NT1-98007-98007A.mp3","@type":"File","bitrate":128009,"contentSize":43667584,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2019-11-29T09:17:34.000Z","duration":2729.02,"encodingFormat":"audio/mpeg","name":"NT1-98007-98007A.mp3","channels":2,"doi":"10.4225/72/575C8AE370B93","essenceId":1010740,"sampleRate":44100},{"@id":"NT1-98007-98007A.trs","@type":"File","contentSize":28292,"dateCreated":"2016-04-26T10:00:06.000Z","dateModified":"2016-06-24T08:45:27.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007A.trs","doi":"10.4225/72/576CF32101764","essenceId":1085293},{"@id":"NT1-98007-98007A.wav","@type":"File","bitrate":4608000,"contentSize":1571894006,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2019-11-29T09:16:52.000Z","duration":2728.98,"encodingFormat":"audio/x-wav","name":"NT1-98007-98007A.wav","channels":2,"doi":"10.4225/72/575C8AE8E6E6B","essenceId":1010741,"sampleRate":96000},{"@id":"NT1-98007-98007B.eaf","@type":"File","contentSize":118748,"dateCreated":"2016-08-01T05:00:07.000Z","dateModified":"2016-08-01T16:01:47.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007B.eaf","doi":"10.4225/72/579F7265746C0","essenceId":1100493},{"@id":"NT1-98007-98007B.mp3","@type":"File","bitrate":128007,"contentSize":35305600,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2019-11-29T09:12:58.000Z","duration":2206.47,"encodingFormat":"audio/mpeg","name":"NT1-98007-98007B.mp3","channels":2,"doi":"10.4225/72/575C8AEE64BCA","essenceId":1010742,"sampleRate":44100},{"@id":"NT1-98007-98007B.wav","@type":"File","bitrate":4608000,"contentSize":1270917002,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2019-11-29T09:15:22.000Z","duration":2206.45,"encodingFormat":"audio/x-wav","name":"NT1-98007-98007B.wav","channels":2,"doi":"10.4225/72/575C8AF3D2DA0","essenceId":1010743,"sampleRate":96000},{"@id":"NT1-98007-98007az.xml","@type":"File","contentSize":48755,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:46.000Z","encodingFormat":"application/xml","name":"NT1-98007-98007az.xml","doi":"10.4225/72/575C8AF94896E","essenceId":1010744},{"@id":"NT1-98007-A.tab","@type":"File","contentSize":27810,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:04:57.000Z","encodingFormat":"text/plain","name":"NT1-98007-A.tab","doi":"10.4225/72/575C8B043764B","essenceId":1010746},{"@id":"NT1-98007-A.xml","@type":"File","contentSize":48788,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:05:03.000Z","encodingFormat":"application/xml","name":"NT1-98007-A.xml","doi":"10.4225/72/575C8B09BDC88","essenceId":1010747},{"@id":"NT1-98007-B.tab","@type":"File","contentSize":20239,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:05:08.000Z","encodingFormat":"text/plain","name":"NT1-98007-B.tab","doi":"10.4225/72/575C8B0F4F8F1","essenceId":1010748},{"@id":"NT1-98007-B.xml","@type":"File","contentSize":35289,"dateCreated":"2012-09-27T10:08:01.000Z","dateModified":"2016-06-11T22:05:14.000Z","encodingFormat":"application/xml","name":"NT1-98007-B.xml","doi":"10.4225/72/575C8B14C6702","essenceId":1010749},{"@id":"Waia Tenene","@type":"Person","familyName":"Tenene","givenName":"Waia","homeLocation":{"@id":"#country_null"},"name":"Waia Tenene","role":{"@id":"role_speaker"}},{"@id":"_:b0","@type":"CreativeWork","name":"Open (subject to agreeing to PDSC access conditions)"},{"@id":"http://nla.gov.au/nla.party-479603","@type":"Person","email":"thien@unimelb.edu.au","familyName":"Thieberger","givenName":"Nick","homeLocation":{"@id":"#country_Australia"},"name":"Nick Thieberger","role":[{"@id":"role_collector"},{"@id":"role_depositor"},{"@id":"role_recorder"}]},{"@id":"http://nla.gov.au/nla.party-593909","@type":"Organization","name":"University of Melbourne"},{"@id":"ro-crate-metadata.json","@type":"CreativeWork","conformsTo":{"@id":"https://w3id.org/ro/crate/1.1/context"},"about":{"@id":"./"}},{"@id":"role_collector","@type":"Role","name":"collector"},{"@id":"role_depositor","@type":"Role","name":"depositor"},{"@id":"role_recorder","@type":"Role","name":"recorder"},{"@id":"role_speaker","@type":"Role","name":"speaker"}]} From 95dd558c3906f3f9792fa1289d303cb0ca0092df Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Tue, 23 Apr 2024 19:22:48 +0200 Subject: [PATCH 4/4] mime filetype lookup refactoring --- .../harvard/iq/dataverse/util/FileUtil.java | 65 ++++++++++--------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index cd9a138a621..7e38dacd4e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -433,12 +433,10 @@ public static String retestIngestableFileType(File file, String fileType) { } public static String determineFileType(File f, String fileName) throws IOException{ - final ResourceBundle bundle = BundleUtil.getResourceBundle("MimeTypeDetectionByFileName"); - if (bundle.keySet().contains(fileName)) { - return bundle.getString(fileName); + String fileType = lookupFileTypeByFileName(fileName); + if (fileType != null) { + return fileType; } - - String fileType = null; String fileExtension = getFileExtension(fileName); @@ -497,17 +495,17 @@ public static String determineFileType(File f, String fileName) throws IOExcepti if (fileType != null && fileType.startsWith("text/plain") && STATISTICAL_FILE_EXTENSION.containsKey(fileExtension)) { fileType = STATISTICAL_FILE_EXTENSION.get(fileExtension); } else { - fileType = determineFileTypeByNameAndExtension(fileName); + fileType = lookupFileTypeByExtension(fileName); } logger.fine("mime type recognized by extension: "+fileType); } } else { logger.fine("fileExtension is null"); - String fileTypeByName = lookupFileTypeFromPropertiesFile(fileName); - if(!StringUtil.isEmpty(fileTypeByName)) { - logger.fine(String.format("mime type: %s recognized by filename: %s", fileTypeByName, fileName)); - fileType = fileTypeByName; + final String fileTypeByExtension = lookupFileTypeByExtensionFromPropertiesFile(fileName); + if(!StringUtil.isEmpty(fileTypeByExtension)) { + logger.fine(String.format("mime type: %s recognized by extension: %s", fileTypeByExtension, fileName)); + fileType = fileTypeByExtension; } } @@ -552,38 +550,41 @@ public static String determineFileType(File f, String fileName) throws IOExcepti return fileType; } - public static String determineFileTypeByNameAndExtension(String fileName) { - final ResourceBundle bundle = BundleUtil.getResourceBundle("MimeTypeDetectionByFileName"); - if (bundle.keySet().contains(fileName)) { - return bundle.getString(fileName); + public static String determineFileTypeByNameAndExtension(final String fileName) { + final String fileType = lookupFileTypeByFileName(fileName); + if (fileType != null) { + return fileType; } - - String mimetypesFileTypeMapResult = MIME_TYPE_MAP.getContentType(fileName); + return lookupFileTypeByExtension(fileName); + } + + private static String lookupFileTypeByExtension(final String fileName) { + final String mimetypesFileTypeMapResult = MIME_TYPE_MAP.getContentType(fileName); logger.fine("MimetypesFileTypeMap type by extension, for " + fileName + ": " + mimetypesFileTypeMapResult); - if (mimetypesFileTypeMapResult != null) { - if ("application/octet-stream".equals(mimetypesFileTypeMapResult)) { - return lookupFileTypeFromPropertiesFile(fileName); - } else { - return mimetypesFileTypeMapResult; - } - } else { + if (mimetypesFileTypeMapResult == null) { return null; } + if ("application/octet-stream".equals(mimetypesFileTypeMapResult)) { + return lookupFileTypeByExtensionFromPropertiesFile(fileName); + } + return mimetypesFileTypeMapResult; + } + + private static String lookupFileTypeByFileName(final String fileName) { + return lookupFileTypeFromPropertiesFile("MimeTypeDetectionByFileName", fileName); } - public static String lookupFileTypeFromPropertiesFile(String fileName) { - String fileKey = FilenameUtils.getExtension(fileName); - String propertyFileName = "MimeTypeDetectionByFileExtension"; - if(fileKey == null || fileKey.isEmpty()) { - fileKey = fileName; - propertyFileName = "MimeTypeDetectionByFileName"; + private static String lookupFileTypeByExtensionFromPropertiesFile(final String fileName) { + final String fileKey = FilenameUtils.getExtension(fileName); + return lookupFileTypeFromPropertiesFile("MimeTypeDetectionByFileExtension", fileKey); + } - } - String propertyFileNameOnDisk = propertyFileName + ".properties"; + private static String lookupFileTypeFromPropertiesFile(final String propertyFileName, final String fileKey) { + final String propertyFileNameOnDisk = propertyFileName + ".properties"; try { logger.fine("checking " + propertyFileNameOnDisk + " for file key " + fileKey); return BundleUtil.getStringFromPropertyFile(fileKey, propertyFileName); - } catch (MissingResourceException ex) { + } catch (final MissingResourceException ex) { logger.info(fileKey + " is a filename/extension Dataverse doesn't know about. Consider adding it to the " + propertyFileNameOnDisk + " file."); return null; }