From 45dec74ad5bd92a70e8477ef3c2792d737c1243f Mon Sep 17 00:00:00 2001 From: jo-pol Date: Thu, 29 Aug 2024 16:43:45 +0200 Subject: [PATCH 01/20] replaced ZipInputStream in CreateNewDataFilesCommand --- .../impl/CreateNewDataFilesCommand.java | 317 ++++++++---------- 1 file changed, 138 insertions(+), 179 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java index 3a21345448b..21010ebd2b6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java @@ -2,34 +2,29 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; -import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -//import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper; -import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; -import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; -import static edu.harvard.iq.dataverse.util.FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; -import static edu.harvard.iq.dataverse.util.FileUtil.createIngestFailureReport; -import static edu.harvard.iq.dataverse.util.FileUtil.determineFileType; -import static edu.harvard.iq.dataverse.util.FileUtil.determineFileTypeByNameAndExtension; -import static edu.harvard.iq.dataverse.util.FileUtil.getFilesTempDirectory; -import static edu.harvard.iq.dataverse.util.FileUtil.saveInputStreamInTempFile; -import static edu.harvard.iq.dataverse.util.FileUtil.useRecognizedType; import edu.harvard.iq.dataverse.util.ShapefileHandler; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.file.BagItFileHandler; import edu.harvard.iq.dataverse.util.file.BagItFileHandlerFactory; import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; +import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException; +import jakarta.enterprise.inject.spi.CDI; +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; + import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -42,6 +37,7 @@ import java.text.MessageFormat; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; @@ -51,12 +47,18 @@ import java.util.Set; import java.util.logging.Logger; import java.util.zip.GZIPInputStream; -import java.util.zip.ZipFile; import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; import java.util.zip.ZipInputStream; -import jakarta.enterprise.inject.spi.CDI; -import org.apache.commons.io.FileUtils; -import org.apache.commons.lang3.StringUtils; + +import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; +import static edu.harvard.iq.dataverse.util.FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; +import static edu.harvard.iq.dataverse.util.FileUtil.createIngestFailureReport; +import static edu.harvard.iq.dataverse.util.FileUtil.determineFileType; +import static edu.harvard.iq.dataverse.util.FileUtil.determineFileTypeByNameAndExtension; +import static edu.harvard.iq.dataverse.util.FileUtil.getFilesTempDirectory; +import static edu.harvard.iq.dataverse.util.FileUtil.saveInputStreamInTempFile; +import static edu.harvard.iq.dataverse.util.FileUtil.useRecognizedType; /** * @@ -260,10 +262,6 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException // DataFile objects from its contents: } else if (finalType.equals("application/zip")) { - ZipFile zipFile = null; - ZipInputStream unZippedIn = null; - ZipEntry zipEntry = null; - int fileNumberLimit = ctxt.systemConfig().getZipUploadFilesLimit(); Long combinedUnzippedFileSize = 0L; @@ -271,14 +269,14 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException Charset charset = null; /* TODO: (?) - We may want to investigate somehow letting the user specify + We may want to investigate somehow letting the user specify the charset for the filenames in the zip file... - - otherwise, ZipInputStream bails out if it encounteres a file - name that's not valid in the current charest (i.e., UTF-8, in - our case). It would be a bit trickier than what we're doing for - SPSS tabular ingests - with the lang. encoding pulldown menu - + - otherwise, ZipInputStream bails out if it encounteres a file + name that's not valid in the current charest (i.e., UTF-8, in + our case). It would be a bit trickier than what we're doing for + SPSS tabular ingests - with the lang. encoding pulldown menu - because this encoding needs to be specified *before* we upload and - attempt to unzip the file. + attempt to unzip the file. -- L.A. 4.0 beta12 logger.info("default charset is "+Charset.defaultCharset().name()); if (Charset.isSupported("US-ASCII")) { @@ -287,7 +285,7 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException if (charset != null) { logger.info("was able to obtain charset for US-ASCII"); } - + } */ @@ -298,14 +296,9 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException * This should be a fairly inexpensive operation, we just need * to read the directory at the end of the file. */ - - if (charset != null) { - zipFile = new ZipFile(tempFile.toFile(), charset); - } else { - zipFile = new ZipFile(tempFile.toFile()); - } + /** - * The ZipFile constructors above will throw ZipException - + * The ZipFile constructors in openZipFile will throw ZipException - * a type of IOException - if there's something wrong * with this file as a zip. There's no need to intercept it * here, it will be caught further below, with other IOExceptions, @@ -323,156 +316,112 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException * that are files. */ - for (Enumeration entries = zipFile.entries(); entries.hasMoreElements();) { - ZipEntry entry = entries.nextElement(); - logger.fine("inside first zip pass; this entry: "+entry.getName()); - if (!entry.isDirectory()) { - String shortName = entry.getName().replaceFirst("^.*[\\/]", ""); - // ... and, finally, check if it's a "fake" file - a zip archive entry - // created for a MacOS X filesystem element: (these - // start with "._") - if (!shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName)) { - numberOfUnpackableFiles++; - if (numberOfUnpackableFiles > fileNumberLimit) { - logger.warning("Zip upload - too many files in the zip to process individually."); - warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit - + "); please upload a zip archive with fewer files, if you want them to be ingested " - + "as individual DataFiles."; - throw new IOException(); - } - // In addition to counting the files, we can - // also check the file size while we're here, - // provided the size limit is defined; if a single - // file is above the individual size limit, unzipped, - // we give up on unpacking this zip archive as well: - if (fileSizeLimit != null && entry.getSize() > fileSizeLimit) { - throw new FileExceedsMaxSizeException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(entry.getSize()), bytesToHumanReadable(fileSizeLimit))); - } - // Similarly, we want to check if saving all these unpacked - // files is going to push the disk usage over the - // quota: - if (storageQuotaLimit != null) { - combinedUnzippedFileSize = combinedUnzippedFileSize + entry.getSize(); - if (combinedUnzippedFileSize > storageQuotaLimit) { - //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(combinedUnzippedFileSize), bytesToHumanReadable(storageQuotaLimit))); - // change of plans: if the unzipped content inside exceeds the remaining quota, - // we reject the upload outright, rather than accepting the zip - // file as is. - throw new CommandExecutionException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.unzipped.quota_exceeded"), bytesToHumanReadable(storageQuotaLimit)), this); + try (var zipFile = openZipFile(tempFile, charset)) { + for (Enumeration entries = zipFile.entries(); entries.hasMoreElements(); ) { + ZipEntry entry = entries.nextElement(); + logger.fine("inside first zip pass; this entry: " + entry.getName()); + if (!entry.isDirectory()) { + if (isNotFakeFile(toShortName(entry.getName()))) { + numberOfUnpackableFiles++; + if (numberOfUnpackableFiles > fileNumberLimit) { + logger.warning("Zip upload - too many files in the zip to process individually."); + warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit + + "); please upload a zip archive with fewer files, if you want them to be ingested " + + "as individual DataFiles."; + throw new IOException(); + } + // In addition to counting the files, we can + // also check the file size while we're here, + // provided the size limit is defined; if a single + // file is above the individual size limit, unzipped, + // we give up on unpacking this zip archive as well: + if (fileSizeLimit != null && entry.getSize() > fileSizeLimit) { + // TODO why no log and warning message? + throw new FileExceedsMaxSizeException( + MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(entry.getSize()), + bytesToHumanReadable(fileSizeLimit))); + } + // Similarly, we want to check if saving all these unpacked + // files is going to push the disk usage over the + // quota: + if (storageQuotaLimit != null) { + combinedUnzippedFileSize = combinedUnzippedFileSize + entry.getSize(); + if (combinedUnzippedFileSize > storageQuotaLimit) { + //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(combinedUnzippedFileSize), bytesToHumanReadable(storageQuotaLimit))); + // change of plans: if the unzipped content inside exceeds the remaining quota, + // we reject the upload outright, rather than accepting the zip + // file as is. + // TODO why no log and warning message? + throw new CommandExecutionException( + MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.unzipped.quota_exceeded"), bytesToHumanReadable(storageQuotaLimit)), this); + } } } } } } - + // OK we're still here - that means we can proceed unzipping. - // Close the ZipFile, re-open as ZipInputStream: - zipFile.close(); // reset: combinedUnzippedFileSize = 0L; - if (charset != null) { - unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile()), charset); - } else { - unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile())); - } - - while (true) { - try { - zipEntry = unZippedIn.getNextEntry(); - } catch (IllegalArgumentException iaex) { - // Note: - // ZipInputStream documentation doesn't even mention that - // getNextEntry() throws an IllegalArgumentException! - // but that's what happens if the file name of the next - // entry is not valid in the current CharSet. - // -- L.A. - warningMessage = "Failed to unpack Zip file. (Unknown Character Set used in a file name?) Saving the file as is."; - logger.warning(warningMessage); - throw new IOException(); - } - - if (zipEntry == null) { - break; - } - // Note that some zip entries may be directories - we - // simply skip them: - - if (!zipEntry.isDirectory()) { - if (datafiles.size() > fileNumberLimit) { - logger.warning("Zip upload - too many files."); - warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit - + "); please upload a zip archive with fewer files, if you want them to be ingested " - + "as individual DataFiles."; - throw new IOException(); + try (var zipFile = openZipFile(tempFile, charset)) { + var entries = Collections.list(zipFile.entries()).stream().filter(e -> { + var entryName = e.getName(); + logger.fine("ZipEntry, file: " + entryName); + return !e.isDirectory() && !entryName.isEmpty() && isNotFakeFile(toShortName(entryName)); + }).toList(); + + for (var entry : entries) { + String storageIdentifier = FileUtil.generateStorageIdentifier(); + File unzippedFile = new File(getFilesTempDirectory() + "/" + storageIdentifier); + Files.copy(zipFile.getInputStream(entry), unzippedFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + // No need to check the size of this unpacked file against the size limit, + // since we've already checked for that in the first pass. + var fileEntryName = entry.getName(); + var shortName = toShortName(fileEntryName); + DataFile datafile = FileUtil.createSingleDataFile(version, null, storageIdentifier, shortName, + MIME_TYPE_UNDETERMINED_DEFAULT, + ctxt.systemConfig().getFileFixityChecksumAlgorithm(), null, false); + + if (!fileEntryName.equals(shortName)) { + // If the filename looks like a hierarchical folder name (i.e., contains slashes and backslashes), + // we'll extract the directory name; then subject it to some "aggressive sanitizing" - strip all + // the leading, trailing and duplicate slashes; then replace all the characters that + // don't pass our validation rules. + String directoryName = fileEntryName.replaceFirst("[\\\\/][\\\\/]*[^\\\\/]*$", ""); + directoryName = StringUtil.sanitizeFileDirectory(directoryName, true); + // if (!"".equals(directoryName)) { + if (!StringUtil.isEmpty(directoryName)) { + logger.fine("setting the directory label to " + directoryName); + datafile.getFileMetadata().setDirectoryLabel(directoryName); + } } - String fileEntryName = zipEntry.getName(); - logger.fine("ZipEntry, file: " + fileEntryName); - - if (fileEntryName != null && !fileEntryName.equals("")) { - - String shortName = fileEntryName.replaceFirst("^.*[\\/]", ""); - - // Check if it's a "fake" file - a zip archive entry - // created for a MacOS X filesystem element: (these - // start with "._") - if (!shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName)) { - // OK, this seems like an OK file entry - we'll try - // to read it and create a DataFile with it: - - String storageIdentifier = FileUtil.generateStorageIdentifier(); - File unzippedFile = new File(getFilesTempDirectory() + "/" + storageIdentifier); - Files.copy(unZippedIn, unzippedFile.toPath(), StandardCopyOption.REPLACE_EXISTING); - // No need to check the size of this unpacked file against the size limit, - // since we've already checked for that in the first pass. - - DataFile datafile = FileUtil.createSingleDataFile(version, null, storageIdentifier, shortName, - MIME_TYPE_UNDETERMINED_DEFAULT, - ctxt.systemConfig().getFileFixityChecksumAlgorithm(), null, false); - - if (!fileEntryName.equals(shortName)) { - // If the filename looks like a hierarchical folder name (i.e., contains slashes and backslashes), - // we'll extract the directory name; then subject it to some "aggressive sanitizing" - strip all - // the leading, trailing and duplicate slashes; then replace all the characters that - // don't pass our validation rules. - String directoryName = fileEntryName.replaceFirst("[\\\\/][\\\\/]*[^\\\\/]*$", ""); - directoryName = StringUtil.sanitizeFileDirectory(directoryName, true); - // if (!"".equals(directoryName)) { - if (!StringUtil.isEmpty(directoryName)) { - logger.fine("setting the directory label to " + directoryName); - datafile.getFileMetadata().setDirectoryLabel(directoryName); - } - } + if (datafile != null) { + // We have created this datafile with the mime type "unknown"; + // Now that we have it saved in a temporary location, + // let's try and determine its real type: - if (datafile != null) { - // We have created this datafile with the mime type "unknown"; - // Now that we have it saved in a temporary location, - // let's try and determine its real type: - - String tempFileName = getFilesTempDirectory() + "/" + datafile.getStorageIdentifier(); - - try { - recognizedType = determineFileType(unzippedFile, shortName); - // null the File explicitly, to release any open FDs: - unzippedFile = null; - logger.fine("File utility recognized unzipped file as " + recognizedType); - if (recognizedType != null && !recognizedType.equals("")) { - datafile.setContentType(recognizedType); - } - } catch (Exception ex) { - logger.warning("Failed to run the file utility mime type check on file " + fileName); - } + String tempFileName = getFilesTempDirectory() + "/" + datafile.getStorageIdentifier(); - datafiles.add(datafile); - combinedUnzippedFileSize += datafile.getFilesize(); + try { + recognizedType = determineFileType(unzippedFile, shortName); + // null the File explicitly, to release any open FDs: + unzippedFile = null; + logger.fine("File utility recognized unzipped file as " + recognizedType); + if (recognizedType != null && !recognizedType.equals("")) { + datafile.setContentType(recognizedType); } + } catch (Exception ex) { + logger.warning("Failed to run the file utility mime type check on file " + fileName); } + + datafiles.add(datafile); + combinedUnzippedFileSize += datafile.getFilesize(); } } - unZippedIn.closeEntry(); - } } catch (IOException ioex) { @@ -494,18 +443,7 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException //warningMessage = BundleUtil.getStringFromBundle("file.addreplace.warning.unzip.failed.quota", Arrays.asList(FileSizeChecker.bytesToHumanReadable(storageQuotaLimit))); //datafiles.clear(); throw new CommandExecutionException(fesqx.getMessage(), fesqx, this); - }*/ finally { - if (zipFile != null) { - try { - zipFile.close(); - } catch (Exception zEx) {} - } - if (unZippedIn != null) { - try { - unZippedIn.close(); - } catch (Exception zEx) {} - } - } + }*/ if (!datafiles.isEmpty()) { // remove the uploaded zip file: try { @@ -591,7 +529,8 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException // The try-catch is due to error encountered in using NFS for stocking file, // cf. https://github.com/IQSS/dataverse/issues/5909 try { - FileUtils.deleteDirectory(rezipFolder); + if (rezipFolder!=null) + FileUtils.deleteDirectory(rezipFolder); } catch (IOException ioex) { // do nothing - it's a temp folder. logger.warning("Could not remove temp folder, error message : " + ioex.getMessage()); @@ -730,7 +669,27 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException return CreateDataFileResult.error(fileName, finalType); } // end createDataFiles - + + private static ZipFile openZipFile(Path tempFile, Charset charset) throws IOException { + if (charset != null) { + return new ZipFile(tempFile.toFile(), charset); + } + else { + return new ZipFile(tempFile.toFile()); + } + } + + private static String toShortName(String fileEntryName) { + return fileEntryName.replaceFirst("^.*[\\/]", ""); + } + + private static boolean isNotFakeFile(String shortName) { + // check if it's a "fake" file - a zip archive entry + // created for a MacOS X filesystem element: (these + // start with "._") + return !shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName); + } + @Override public Map> getRequiredPermissions() { Map> ret = new HashMap<>(); From 36d79b1d5760ab5ffcafa018d2920c9f1cb5d1a2 Mon Sep 17 00:00:00 2001 From: jo-pol Date: Mon, 2 Sep 2024 09:48:06 +0200 Subject: [PATCH 02/20] extract method to filter files --- .../impl/CreateNewDataFilesCommand.java | 103 +++++++++--------- 1 file changed, 50 insertions(+), 53 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java index 21010ebd2b6..a816675e695 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java @@ -289,14 +289,6 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException } */ - /** - * Perform a quick check for how many individual files are - * inside this zip archive. If it's above the limit, we can - * give up right away, without doing any unpacking. - * This should be a fairly inexpensive operation, we just need - * to read the directory at the end of the file. - */ - /** * The ZipFile constructors in openZipFile will throw ZipException - * a type of IOException - if there's something wrong @@ -317,45 +309,47 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException */ try (var zipFile = openZipFile(tempFile, charset)) { - for (Enumeration entries = zipFile.entries(); entries.hasMoreElements(); ) { - ZipEntry entry = entries.nextElement(); + for (var entry : filteredZipEntries(zipFile)) { + /** + * Perform a quick check for how many individual files are + * inside this zip archive. If it's above the limit, we can + * give up right away, without doing any unpacking. + * This should be a fairly inexpensive operation, we just need + * to read the directory at the end of the file. + */ logger.fine("inside first zip pass; this entry: " + entry.getName()); - if (!entry.isDirectory()) { - if (isNotFakeFile(toShortName(entry.getName()))) { - numberOfUnpackableFiles++; - if (numberOfUnpackableFiles > fileNumberLimit) { - logger.warning("Zip upload - too many files in the zip to process individually."); - warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit - + "); please upload a zip archive with fewer files, if you want them to be ingested " - + "as individual DataFiles."; - throw new IOException(); - } - // In addition to counting the files, we can - // also check the file size while we're here, - // provided the size limit is defined; if a single - // file is above the individual size limit, unzipped, - // we give up on unpacking this zip archive as well: - if (fileSizeLimit != null && entry.getSize() > fileSizeLimit) { - // TODO why no log and warning message? - throw new FileExceedsMaxSizeException( - MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(entry.getSize()), - bytesToHumanReadable(fileSizeLimit))); - } - // Similarly, we want to check if saving all these unpacked - // files is going to push the disk usage over the - // quota: - if (storageQuotaLimit != null) { - combinedUnzippedFileSize = combinedUnzippedFileSize + entry.getSize(); - if (combinedUnzippedFileSize > storageQuotaLimit) { - //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(combinedUnzippedFileSize), bytesToHumanReadable(storageQuotaLimit))); - // change of plans: if the unzipped content inside exceeds the remaining quota, - // we reject the upload outright, rather than accepting the zip - // file as is. - // TODO why no log and warning message? - throw new CommandExecutionException( - MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.unzipped.quota_exceeded"), bytesToHumanReadable(storageQuotaLimit)), this); - } - } + numberOfUnpackableFiles++; + if (numberOfUnpackableFiles > fileNumberLimit) { + logger.warning("Zip upload - too many files in the zip to process individually."); + warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit + + "); please upload a zip archive with fewer files, if you want them to be ingested " + + "as individual DataFiles."; + throw new IOException(); + } + // In addition to counting the files, we can + // also check the file size while we're here, + // provided the size limit is defined; if a single + // file is above the individual size limit, unzipped, + // we give up on unpacking this zip archive as well: + if (fileSizeLimit != null && entry.getSize() > fileSizeLimit) { + // TODO why no log and warning message? + throw new FileExceedsMaxSizeException( + MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(entry.getSize()), + bytesToHumanReadable(fileSizeLimit))); + } + // Similarly, we want to check if saving all these unpacked + // files is going to push the disk usage over the + // quota: + if (storageQuotaLimit != null) { + combinedUnzippedFileSize = combinedUnzippedFileSize + entry.getSize(); + if (combinedUnzippedFileSize > storageQuotaLimit) { + //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(combinedUnzippedFileSize), bytesToHumanReadable(storageQuotaLimit))); + // change of plans: if the unzipped content inside exceeds the remaining quota, + // we reject the upload outright, rather than accepting the zip + // file as is. + // TODO why no log and warning message? + throw new CommandExecutionException( + MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.unzipped.quota_exceeded"), bytesToHumanReadable(storageQuotaLimit)), this); } } } @@ -367,13 +361,7 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException combinedUnzippedFileSize = 0L; try (var zipFile = openZipFile(tempFile, charset)) { - var entries = Collections.list(zipFile.entries()).stream().filter(e -> { - var entryName = e.getName(); - logger.fine("ZipEntry, file: " + entryName); - return !e.isDirectory() && !entryName.isEmpty() && isNotFakeFile(toShortName(entryName)); - }).toList(); - - for (var entry : entries) { + for (var entry : filteredZipEntries(zipFile)) { String storageIdentifier = FileUtil.generateStorageIdentifier(); File unzippedFile = new File(getFilesTempDirectory() + "/" + storageIdentifier); Files.copy(zipFile.getInputStream(entry), unzippedFile.toPath(), StandardCopyOption.REPLACE_EXISTING); @@ -670,6 +658,15 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException return CreateDataFileResult.error(fileName, finalType); } // end createDataFiles + private static List filteredZipEntries(ZipFile zipFile) { + var entries = Collections.list(zipFile.entries()).stream().filter(e -> { + var entryName = e.getName(); + logger.fine("ZipEntry, file: " + entryName); + return !e.isDirectory() && !entryName.isEmpty() && isNotFakeFile(toShortName(entryName)); + }).toList(); + return entries; + } + private static ZipFile openZipFile(Path tempFile, Charset charset) throws IOException { if (charset != null) { return new ZipFile(tempFile.toFile(), charset); From db5150dbabb05fe65e3e791ede074ada96a1c458 Mon Sep 17 00:00:00 2001 From: jo-pol Date: Mon, 2 Sep 2024 10:17:17 +0200 Subject: [PATCH 03/20] File.getName replaces regexp --- .../command/impl/CreateNewDataFilesCommand.java | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java index a816675e695..2735c94faec 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java @@ -38,7 +38,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -49,7 +48,6 @@ import java.util.zip.GZIPInputStream; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; -import java.util.zip.ZipInputStream; import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; import static edu.harvard.iq.dataverse.util.FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; @@ -368,7 +366,7 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException // No need to check the size of this unpacked file against the size limit, // since we've already checked for that in the first pass. var fileEntryName = entry.getName(); - var shortName = toShortName(fileEntryName); + var shortName = new File(fileEntryName).getName(); DataFile datafile = FileUtil.createSingleDataFile(version, null, storageIdentifier, shortName, MIME_TYPE_UNDETERMINED_DEFAULT, ctxt.systemConfig().getFileFixityChecksumAlgorithm(), null, false); @@ -662,7 +660,7 @@ private static List filteredZipEntries(ZipFile zipFile) { var entries = Collections.list(zipFile.entries()).stream().filter(e -> { var entryName = e.getName(); logger.fine("ZipEntry, file: " + entryName); - return !e.isDirectory() && !entryName.isEmpty() && isNotFakeFile(toShortName(entryName)); + return !e.isDirectory() && !entryName.isEmpty() && isNotFakeFile(entryName); }).toList(); return entries; } @@ -676,14 +674,11 @@ private static ZipFile openZipFile(Path tempFile, Charset charset) throws IOExce } } - private static String toShortName(String fileEntryName) { - return fileEntryName.replaceFirst("^.*[\\/]", ""); - } - - private static boolean isNotFakeFile(String shortName) { + private static boolean isNotFakeFile(String fileName) { // check if it's a "fake" file - a zip archive entry // created for a MacOS X filesystem element: (these // start with "._") + var shortName = new File(fileName).getName(); return !shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName); } From 86f679815009b9fcecd0bd3f9f3a5a3bcd4d5582 Mon Sep 17 00:00:00 2001 From: jo-pol Date: Mon, 2 Sep 2024 14:52:15 +0200 Subject: [PATCH 04/20] create ShapeFileHandler from file to avoid Zip/File-InputStream --- .../ingest/IngestServiceShapefileHelper.java | 62 +------- .../harvard/iq/dataverse/util/FileUtil.java | 2 +- .../iq/dataverse/util/ShapefileHandler.java | 144 +++++------------- .../util/shapefile/ShapefileHandlerTest.java | 22 +-- 4 files changed, 62 insertions(+), 168 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceShapefileHelper.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceShapefileHelper.java index 8c5dad237b1..958b3636ef7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceShapefileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceShapefileHelper.java @@ -100,71 +100,25 @@ public IngestServiceShapefileHelper(File zippedShapefile, File rezipFolder){ //this.processFile(zippedShapefile, rezipFolder); } - - private FileInputStream getFileInputStream(File fileObject){ - if (fileObject==null){ - return null; - } - try { - return new FileInputStream(fileObject); - } catch (FileNotFoundException ex) { - logger.severe("Failed to create FileInputStream from File: " + fileObject.getAbsolutePath()); - return null; - } - } - - private void closeFileInputStream(FileInputStream fis){ - if (fis==null){ - return; - } - try { - fis.close(); - } catch (IOException ex) { - logger.info("Failed to close FileInputStream"); - } - } - + public boolean processFile() { if ((!isValidFile(this.zippedShapefile))||(!isValidFolder(this.rezipFolder))){ return false; } - - // (1) Use the ShapefileHandler to the .zip for a shapefile - // - FileInputStream shpfileInputStream = this.getFileInputStream(zippedShapefile); - if (shpfileInputStream==null){ - return false; - } - - this.shpHandler = new ShapefileHandler(shpfileInputStream); - if (!shpHandler.containsShapefile()){ - logger.severe("Shapefile was incorrectly detected upon Ingest (FileUtil) and passed here"); - return false; - } - - this.closeFileInputStream(shpfileInputStream); - - // (2) Rezip the shapefile pieces - logger.info("rezipFolder: " + rezipFolder.getAbsolutePath()); - shpfileInputStream = this.getFileInputStream(zippedShapefile); - if (shpfileInputStream==null){ - return false; - } - - boolean rezipSuccess; try { - rezipSuccess = shpHandler.rezipShapefileSets(shpfileInputStream, rezipFolder); + this.shpHandler = new ShapefileHandler(zippedShapefile); + if (!shpHandler.containsShapefile()){ + logger.severe("Shapefile was incorrectly detected upon Ingest (FileUtil) and passed here"); + return false; + } + logger.info("rezipFolder: " + rezipFolder.getAbsolutePath()); + return shpHandler.rezipShapefileSets(rezipFolder); } catch (IOException ex) { logger.severe("Shapefile was not correctly unpacked/repacked"); logger.severe("shpHandler message: " + shpHandler.errorMessage); return false; } - - this.closeFileInputStream(shpfileInputStream); - - return rezipSuccess; - // return createDataFiles(rezipFolder); } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 5ad6c32abb3..5b0c44e1aec 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -510,7 +510,7 @@ public static String determineFileType(File f, String fileName) throws IOExcepti // Check for shapefile extensions as described here: http://en.wikipedia.org/wiki/Shapefile //logger.info("Checking for shapefile"); - ShapefileHandler shp_handler = new ShapefileHandler(new FileInputStream(f)); + ShapefileHandler shp_handler = new ShapefileHandler(f); if (shp_handler.containsShapefile()){ // logger.info("------- shapefile FOUND ----------"); fileType = ShapefileHandler.SHAPEFILE_FILE_TYPE; //"application/zipped-shapefile"; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java b/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java index 9786fda4217..ea62776f754 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java @@ -1,22 +1,22 @@ package edu.harvard.iq.dataverse.util; import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.FileNotFoundException; import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.text.MessageFormat; import java.util.Date; import java.util.ArrayList; import java.util.List; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; -import java.util.zip.ZipException; +import java.util.zip.ZipFile; import java.util.HashMap; import java.util.*; import java.nio.file.Files; import static java.nio.file.StandardCopyOption.REPLACE_EXISTING; -import java.util.logging.Level; +import static java.text.MessageFormat.format; + import java.util.logging.Logger; import org.apache.commons.io.FileUtils; @@ -42,11 +42,10 @@ * "shape1.pdf", "README.md", "shape_notes.txt" * * Code Example: - * FileInputStream shp_file_input_stream = new FileInputStream(new File("zipped_shapefile.zip")) - * ShapefileHandler shp_handler = new ShapefileHandler(shp_file_input_stream); + * ShapefileHandler shp_handler = new ShapefileHandler(new File("zipped_shapefile.zip")); * if (shp_handler.containsShapefile()){ * File rezip_folder = new File("~/folder_for_rezipping"); - * boolean rezip_success = shp_handler.rezipShapefileSets(shp_file_input_stream, rezip_folder ); + * boolean rezip_success = shp_handler.rezipShapefileSets(rezip_folder ); * if (!rezip_success){ * // rezip failed, should be an error message (String) available System.out.println(shp_handler.error_message); @@ -73,7 +72,8 @@ public class ShapefileHandler{ public final static String SHP_XML_EXTENSION = "shp.xml"; public final static String BLANK_EXTENSION = "__PLACEHOLDER-FOR-BLANK-EXTENSION__"; public final static List SHAPEFILE_ALL_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj", "sbn", "sbx", "fbn", "fbx", "ain", "aih", "ixs", "mxs", "atx", "cpg", "qpj", "qmd", SHP_XML_EXTENSION); - + private final File zipFile; + public boolean DEBUG = false; private boolean zipFileProcessed = false; @@ -97,9 +97,6 @@ public class ShapefileHandler{ private List finalRezippedFiles = new ArrayList<>(); - private String outputFolder = "unzipped"; - private String rezippedFolder = "rezipped"; - // Debug helper private void msg(String s){ //logger.info(s); @@ -115,40 +112,28 @@ private void msgt(String s){ } /* - Constructor, start with filename - */ - public ShapefileHandler(String filename){ - - if (filename==null){ - this.addErrorMessage("The filename was null"); - return; - } - - FileInputStream zip_file_stream; - try { - zip_file_stream = new FileInputStream(new File(filename)); - } catch (FileNotFoundException ex) { - this.addErrorMessage("The file was not found"); + Constructor, start with File + */ + public ShapefileHandler(File zip_file) throws IOException { + zipFile = zip_file; + if (zip_file == null) { + this.addErrorMessage("The file was null"); return; } - - this.examineZipfile(zip_file_stream); - - } - - - /* - Constructor, start with FileInputStream - */ - public ShapefileHandler(FileInputStream zip_file_stream){ - if (zip_file_stream==null){ - this.addErrorMessage("The zip_file_stream was null"); - return; + try (var zip_file_object = new ZipFile(zip_file)) { + this.examineZipfile(zip_file_object); + } + catch (FileNotFoundException ex) { + // While this constructor had a FileInputStream as argument: + // FileUtil.determineFileType threw this exception before calling the constructor with a FileInputStream + // IngestServiceShapefileHelper.processFile won´t call this constructor if the file is not valid hence does not exist. + // When the file would have disappeared in the meantime, it would have produced a slightly different error message. + logger.severe("File not found: " + zip_file.getAbsolutePath()); + throw ex; } - this.examineZipfile(zip_file_stream); } - + public List getFinalRezippedFiles(){ return this.finalRezippedFiles; } @@ -290,26 +275,19 @@ inside the uploaded zip file (issue #6873). To achieve this, we recreate subfolders in the FileMetadata of the newly created DataFiles. (-- L.A. 09/2020) */ - private boolean unzipFilesToDirectory(FileInputStream zipfile_input_stream, File target_directory){ + private boolean unzipFilesToDirectory(ZipFile zipfileInput, File target_directory){ logger.fine("unzipFilesToDirectory: " + target_directory.getAbsolutePath() ); - if (zipfile_input_stream== null){ - this.addErrorMessage("unzipFilesToDirectory. The zipfile_input_stream is null."); - return false; - } if (!target_directory.isDirectory()){ this.addErrorMessage("This directory does not exist: " + target_directory.getAbsolutePath()); return false; } - List unzippedFileNames = new ArrayList<>(); + List unzippedFileNames = new ArrayList<>(); - ZipInputStream zipStream = new ZipInputStream(zipfile_input_stream); - ZipEntry origEntry; - byte[] buffer = new byte[2048]; try { - while((origEntry = zipStream.getNextEntry())!=null){ + for(var origEntry : Collections.list(zipfileInput.entries())){ String zentryFileName = origEntry.getName(); logger.fine("\nOriginal entry name: " + origEntry); @@ -359,15 +337,9 @@ private boolean unzipFilesToDirectory(FileInputStream zipfile_input_stream, File unzippedFileNames.add(outpath); } logger.fine("Write zip file: " + outpath); - FileOutputStream fileOutputStream; - long fsize = 0; - fileOutputStream = new FileOutputStream(outpath); - int len;// = 0; - while ((len = zipStream.read(buffer)) > 0){ - fileOutputStream.write(buffer, 0, len); - fsize+=len; - } // end while - fileOutputStream.close(); + try(var inputStream = zipfileInput.getInputStream(origEntry)) { + Files.copy(inputStream, Path.of(outpath), StandardCopyOption.REPLACE_EXISTING); + } } // end outer while } catch (IOException ex) { for (StackTraceElement el : ex.getStackTrace()){ @@ -376,19 +348,13 @@ private boolean unzipFilesToDirectory(FileInputStream zipfile_input_stream, File this.addErrorMessage("Failed to open ZipInputStream entry" + ex.getMessage()); return false; } - - try { - zipStream.close(); - } catch (IOException ex) { - Logger.getLogger(ShapefileHandler.class.getName()).log(Level.SEVERE, null, ex); - } - return true; + return true; } /* Rezip the shapefile(s) into a given directory Assumes that the zipfile_input_stream has already been checked! */ - public boolean rezipShapefileSets(FileInputStream zipfile_input_stream, File rezippedFolder) throws IOException{ + public boolean rezipShapefileSets(File rezippedFolder) throws IOException{ logger.fine("rezipShapefileSets"); //msgt("rezipShapefileSets"); if (!this.zipFileProcessed){ @@ -399,10 +365,6 @@ public boolean rezipShapefileSets(FileInputStream zipfile_input_stream, File rez this.addErrorMessage("There are no shapefiles here!"); return false; } - if (zipfile_input_stream== null){ - this.addErrorMessage("The zipfile_input_stream is null."); - return false; - } if (rezippedFolder == null){ this.addErrorMessage("The rezippedFolder is null."); return false; @@ -432,9 +394,11 @@ public boolean rezipShapefileSets(FileInputStream zipfile_input_stream, File rez // Unzip files! - if (!this.unzipFilesToDirectory(zipfile_input_stream, dir_for_unzipping)){ - this.addErrorMessage("Failed to unzip files."); - return false; + try(var zipfileObject = new ZipFile(zipFile)) { + if (!this.unzipFilesToDirectory(zipfileObject, dir_for_unzipping)) { + this.addErrorMessage("Failed to unzip files."); + return false; + } } // Redistribute files! String target_dirname = rezippedFolder.getAbsolutePath(); @@ -680,27 +644,17 @@ private boolean isFileToSkip(String fname){ /************************************** * Iterate through the zip file contents. * Does it contain any shapefiles? - * - * @param FileInputStream zip_file_stream */ - private boolean examineZipfile(FileInputStream zip_file_stream){ + private boolean examineZipfile(ZipFile zip_file){ // msgt("examineZipfile"); - if (zip_file_stream==null){ - this.addErrorMessage("The zip file stream was null"); - return false; - } - // Clear out file lists this.filesListInDir.clear(); this.filesizeHash.clear(); this.fileGroups.clear(); try{ - ZipInputStream zipStream = new ZipInputStream(zip_file_stream); - ZipEntry entry; - - while((entry = zipStream.getNextEntry())!=null){ + for(var entry : Collections.list(zip_file.entries())){ String zentryFileName = entry.getName(); //msg("zip entry: " + entry.getName()); @@ -738,8 +692,6 @@ private boolean examineZipfile(FileInputStream zip_file_stream){ this.filesizeHash.put(unzipFilePath, entry.getSize()); } } // end while - - zipStream.close(); if (this.filesListInDir.isEmpty()){ errorMessage = "No files in zipStream"; @@ -749,23 +701,11 @@ private boolean examineZipfile(FileInputStream zip_file_stream){ this.zipFileProcessed = true; return true; - }catch(ZipException ex){ - this.addErrorMessage("ZipException"); - msgt("ZipException"); - return false; - - }catch(IOException ex){ - //ex.printStackTrace(); - this.addErrorMessage("IOException File name"); - msgt("IOException"); - return false; }catch(IllegalArgumentException ex){ this.addErrorMessage("IllegalArgumentException when parsing zipfile"); msgt("IllegalArgumentException when parsing zipfile"); return false; - }finally{ - } } // end examineFile diff --git a/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java b/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java index f0e538616b2..587ea265f29 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java @@ -98,13 +98,13 @@ private File createAndZipFiles(List file_names, String zipfile_name) thr } Path zip_file_obj = this.tempFolder.resolve(zipfile_name); - ZipOutputStream zip_stream = new ZipOutputStream(new FileOutputStream(zip_file_obj.toFile())); + try (ZipOutputStream zip_stream = new ZipOutputStream(new FileOutputStream(zip_file_obj.toFile()))) { - // Iterate through File objects and add them to the ZipOutputStream - for (File file_obj : fileCollection) { - this.addToZipFile(file_obj.getName(), file_obj, zip_stream); + // Iterate through File objects and add them to the ZipOutputStream + for (File file_obj : fileCollection) { + this.addToZipFile(file_obj.getName(), file_obj, zip_stream); + } } - /* ----------------------------------- Cleanup: Delete single files that were added to .zip ----------------------------------- */ @@ -126,7 +126,7 @@ public void testCreateZippedNonShapefile() throws IOException{ File zipfile_obj = createAndZipFiles(file_names, "not-quite-a-shape.zip"); // Pass the .zip to the ShapefileHandler - ShapefileHandler shp_handler = new ShapefileHandler(new FileInputStream(zipfile_obj)); + ShapefileHandler shp_handler = new ShapefileHandler(zipfile_obj); shp_handler.DEBUG= true; // Contains shapefile? @@ -157,7 +157,7 @@ public void testShapefileWithQpjAndQmd() throws IOException { File zipFile = createAndZipFiles(fileNames, "testShapeWithNewExtensions.zip"); // Pass the zip to the ShapefileHandler - ShapefileHandler shpHandler = new ShapefileHandler(new FileInputStream(zipFile)); + ShapefileHandler shpHandler = new ShapefileHandler(zipFile); shpHandler.DEBUG = true; // Check if it is recognized as a shapefile @@ -191,7 +191,7 @@ public void testZippedTwoShapefiles() throws IOException{ File zipfile_obj = createAndZipFiles(file_names, "two-shapes.zip"); // Pass the .zip to the ShapefileHandler - ShapefileHandler shp_handler = new ShapefileHandler(new FileInputStream(zipfile_obj)); + ShapefileHandler shp_handler = new ShapefileHandler(zipfile_obj); shp_handler.DEBUG= true; assertTrue(shp_handler.containsShapefile(), "verify shapefile existance"); @@ -217,7 +217,7 @@ public void testZippedTwoShapefiles() throws IOException{ // Rezip/Reorder the files File test_unzip_folder = Files.createDirectory(this.tempFolder.resolve("test_unzip")).toFile(); //File test_unzip_folder = new File("/Users/rmp553/Desktop/blah"); - shp_handler.rezipShapefileSets(new FileInputStream(zipfile_obj), test_unzip_folder ); + shp_handler.rezipShapefileSets(test_unzip_folder ); // Does the re-ordering do what we wanted? @@ -244,7 +244,7 @@ public void testZippedShapefileWithExtraFiles() throws IOException{ File zipfile_obj = createAndZipFiles(file_names, "shape-plus.zip"); // Pass the .zip to the ShapefileHandler - ShapefileHandler shp_handler = new ShapefileHandler(new FileInputStream(zipfile_obj)); + ShapefileHandler shp_handler = new ShapefileHandler(zipfile_obj); shp_handler.DEBUG= true; assertTrue(shp_handler.containsShapefile(), "verify shapefile existance"); @@ -264,7 +264,7 @@ public void testZippedShapefileWithExtraFiles() throws IOException{ File unzip2Folder = Files.createDirectory(this.tempFolder.resolve("test_unzip2")).toFile(); // Rezip/Reorder the files - shp_handler.rezipShapefileSets(new FileInputStream(zipfile_obj), unzip2Folder); + shp_handler.rezipShapefileSets(unzip2Folder); //shp_handler.rezipShapefileSets(new FileInputStream(zipfile_obj), new File("/Users/rmp553/Desktop/blah")); From c613154b846dc2befac8b7e55062a0fda0b9449d Mon Sep 17 00:00:00 2001 From: jo-pol Date: Thu, 5 Sep 2024 12:53:55 +0200 Subject: [PATCH 05/20] upload unit test --- .../impl/CreateNewDataFilesCommand.java | 5 +- .../command/impl/CreateNewDataFilesTest.java | 218 ++++++++++++++++++ .../util/shapefile/ShapefileHandlerTest.java | 15 +- 3 files changed, 222 insertions(+), 16 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java index 2735c94faec..31b11a4a367 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java @@ -140,9 +140,10 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException if (newStorageIdentifier == null) { - if (getFilesTempDirectory() != null) { + var filesTempDirectory = getFilesTempDirectory(); + if (filesTempDirectory != null) { try { - tempFile = Files.createTempFile(Paths.get(getFilesTempDirectory()), "tmp", "upload"); + tempFile = Files.createTempFile(Paths.get(filesTempDirectory), "tmp", "upload"); // "temporary" location is the key here; this is why we are not using // the DataStore framework for this - the assumption is that // temp files will always be stored on the local filesystem. diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java new file mode 100644 index 00000000000..f0d3340a3e4 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java @@ -0,0 +1,218 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; +import edu.harvard.iq.dataverse.util.JhoveFileType; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; +import org.jetbrains.annotations.NotNull; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.MockedStatic; +import org.mockito.Mockito; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +import static org.apache.commons.io.file.FilesUncheck.createDirectories; +import static org.apache.commons.io.file.PathUtils.deleteDirectory; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.any; + + +@LocalJvmSettings +public class CreateNewDataFilesTest { + @BeforeEach + public void cleanTmpDir() throws IOException { + var tmpDir = Paths.get("target/test/tmp"); + if(tmpDir.toFile().exists()) + deleteDirectory(tmpDir); + } + + @Test + @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/tmp") + public void execute_fails_to_upload_when_tmp_does_not_exist() throws FileNotFoundException { + + mockTmpLookup(); + var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion()); + var ctxt = mockCommandContext(mockQuota(true, 0L)); + + assertThatThrownBy(() -> cmd.execute(ctxt)) + .isInstanceOf(CommandException.class) + .hasMessageContaining("Failed to save the upload as a temp file (temp disk space?)") + .hasRootCauseInstanceOf(NoSuchFileException.class) + .getRootCause() + .hasMessageStartingWith("target/test/tmp/temp/tmp"); + } + + @Test + @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/tmp") + public void execute_fails_to_upload_too_big_files() throws FileNotFoundException { + createDirectories(Path.of("target/test/tmp/temp")); + + mockTmpLookup(); + var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion()); + var ctxt = mockCommandContext(mockQuota(true, 1000L)); + + assertThatThrownBy(() -> cmd.execute(ctxt)) + .isInstanceOf(CommandException.class) + .hasMessage("This file size (56.0 KB) exceeds the size limit of 1000 B."); + } + + @Test + @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/tmp") + public void execute_fails_on_too_little_remaining_storage() throws FileNotFoundException { + createDirectories(Path.of("target/test/tmp/temp")); + + mockTmpLookup(); + var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion()); + var ctxt = mockCommandContext(mockQuota(true, 1000000L)); + try (MockedStatic mockedStatic = Mockito.mockStatic(JhoveFileType.class)) { + mockedStatic.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); + + assertThatThrownBy(() -> cmd.execute(ctxt)) + .isInstanceOf(CommandException.class) + .hasMessage("This file (size 56.0 KB) exceeds the remaining storage quota of 500 B."); + } + } + + @Test + @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/tmp") + public void execute_succeeds() throws FileNotFoundException, CommandException { + var tempDir = Path.of("target/test/tmp/temp"); + var testFile = "scripts/search/data/binary/trees.zip"; + createDirectories(tempDir); + + mockTmpLookup(); + var cmd = createCmd(testFile, mockDatasetVersion()); + var ctxt = mockCommandContext(mockQuota(false, 1000000L)); + try (MockedStatic mockedStatic = Mockito.mockStatic(JhoveFileType.class)) { + mockedStatic.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); + + var result = cmd.execute(ctxt); + + assertThat(result.getErrors()).hasSize(0); + assertThat(result.getDataFiles()).hasSize(1); + + var dataFile = result.getDataFiles().subList(0, 1).get(0); + var storageId = dataFile.getStorageIdentifier(); + + // uploaded zip remains in tmp directory + assertThat(tempDir.toFile().list()).hasSize(1); + assertThat(tempDir.resolve(storageId).toFile().length()) + .isEqualTo(new File(testFile).length()); + } + } + + @Test + @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/tmp") + public void execute_with_2_shapes() throws Exception { + var tempDir = Path.of("target/test/tmp/temp"); + List file_names = Arrays.asList("shape1.shp", "shape1.shx", "shape1.dbf", "shape1.prj", "shape1.fbn", "shape1.fbx", // 1st shapefile + "shape2.shp", "shape2.shx", "shape2.dbf", "shape2.prj", // 2nd shapefile + "shape2.txt", "shape2.pdf", "shape2", // single files, same basename as 2nd shapefile + "README.MD", "shp_dictionary.xls", "notes"); //, "prj"); // single files + File testFile = createAndZipFiles(file_names, "two-shapes.zip"); + + // TODO mock CDI provider to allow mime type check + createDirectories(tempDir); + + mockTmpLookup(); + var cmd = createCmd(testFile.toString(), mockDatasetVersion()); + var ctxt = mockCommandContext(mockQuota(false, 1000000L)); + + try (MockedStatic mockedStatic = Mockito.mockStatic(JhoveFileType.class)) { + mockedStatic.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); + + // the test + var result = cmd.execute(ctxt); + + assertThat(result.getErrors()).hasSize(0); + assertThat(result.getDataFiles()).hasSize(1); + + var dataFile = result.getDataFiles().subList(0, 1).get(0); + var storageId = dataFile.getStorageIdentifier(); + + // uploaded zip remains in tmp directory + assertThat(tempDir.toFile().list()).hasSize(1); + assertThat(tempDir.resolve(storageId).toFile().length()) + .isEqualTo(testFile.length()); + } + } + + @TempDir + Path tempFolder; + + // simplified version from ShapefileHandlerTest + private File createAndZipFiles(List file_names, String zipfile_name) throws IOException { + var zip_file_obj = tempFolder.resolve(zipfile_name).toFile(); + try (ZipOutputStream zip_stream = new ZipOutputStream(new FileOutputStream(zip_file_obj))) { + for (var file_name : file_names) { + zip_stream.putNextEntry(new ZipEntry(file_name)); + zip_stream.write("content".getBytes(), 0, 7); + zip_stream.closeEntry(); + } + } + return zip_file_obj; + } + + private static @NotNull CreateNewDataFilesCommand createCmd(String name, DatasetVersion dsVersion) throws FileNotFoundException { + return new CreateNewDataFilesCommand( + Mockito.mock(DataverseRequest.class), + dsVersion, + new FileInputStream(name), + "example.zip", + "application/zip", + null, + new UploadSessionQuotaLimit(1000L, 500L), + "sha"); + } + + private static @NotNull CommandContext mockCommandContext(SystemConfig sysCfg) { + var ctxt = Mockito.mock(CommandContext.class); + Mockito.when(ctxt.systemConfig()).thenReturn(sysCfg); + return ctxt; + } + + private static @NotNull SystemConfig mockQuota(boolean isStorageQuataEnforced, long maxFileUploadSizeForStore) { + var sysCfg = Mockito.mock(SystemConfig.class); + Mockito.when(sysCfg.isStorageQuotasEnforced()).thenReturn(isStorageQuataEnforced); + Mockito.when(sysCfg.getMaxFileUploadSizeForStore(any())).thenReturn(maxFileUploadSizeForStore); + return sysCfg; + } + + private static void mockTmpLookup() { + JvmSettings mockFilesDirectory = Mockito.mock(JvmSettings.class); + Mockito.when(mockFilesDirectory.lookup()).thenReturn("/mocked/path"); + } + + private static @NotNull DatasetVersion mockDatasetVersion() { + var dsVersion = Mockito.mock(DatasetVersion.class); + Mockito.when(dsVersion.getDataset()).thenReturn(Mockito.mock(Dataset.class)); + return dsVersion; + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java b/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java index 587ea265f29..c5684534335 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java @@ -63,22 +63,9 @@ private File createBlankFile(String filename) throws IOException { } return Files.createFile(tempFolder.resolve(filename)).toFile(); } - - private FileInputStream createZipReturnFilestream(List file_names, String zipfile_name) throws IOException{ - - File zip_file_obj = this.createAndZipFiles(file_names, zipfile_name); - if (zip_file_obj == null){ - return null; - } - - FileInputStream file_input_stream = new FileInputStream(zip_file_obj); - return file_input_stream; - - } - /* - Convenience class to create .zip file and return a FileInputStream + Convenience method to create .zip file and return a File @param List file_names - List of filenames to add to .zip. These names will be used to create 0 length files @param String zipfile_name - Name of .zip file to create From 1fae6d66055f476aa14dcc52d1d613a15de2e0a4 Mon Sep 17 00:00:00 2001 From: jo-pol Date: Thu, 12 Sep 2024 11:07:29 +0200 Subject: [PATCH 06/20] cleanup --- .../command/impl/CreateNewDataFilesTest.java | 59 ++++++++----------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java index f0d3340a3e4..caa9ae079dd 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java @@ -14,23 +14,17 @@ import org.jetbrains.annotations.NotNull; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; import org.mockito.MockedStatic; import org.mockito.Mockito; -import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; -import java.nio.file.Files; import java.nio.file.NoSuchFileException; import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.List; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; @@ -39,21 +33,22 @@ import static org.apache.commons.io.file.PathUtils.deleteDirectory; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.ArgumentMatchers.any; @LocalJvmSettings public class CreateNewDataFilesTest { + // TODO keep constants for annotations in sync with class name + Path testDir = Path.of("target/test/").resolve(getClass().getSimpleName()); + @BeforeEach public void cleanTmpDir() throws IOException { - var tmpDir = Paths.get("target/test/tmp"); - if(tmpDir.toFile().exists()) - deleteDirectory(tmpDir); + if(testDir.toFile().exists()) + deleteDirectory(testDir); } @Test - @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/tmp") + @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") public void execute_fails_to_upload_when_tmp_does_not_exist() throws FileNotFoundException { mockTmpLookup(); @@ -65,13 +60,13 @@ public void execute_fails_to_upload_when_tmp_does_not_exist() throws FileNotFoun .hasMessageContaining("Failed to save the upload as a temp file (temp disk space?)") .hasRootCauseInstanceOf(NoSuchFileException.class) .getRootCause() - .hasMessageStartingWith("target/test/tmp/temp/tmp"); + .hasMessageStartingWith("target/test/CreateNewDataFilesTest/tmp/temp/tmp"); } @Test - @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/tmp") + @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") public void execute_fails_to_upload_too_big_files() throws FileNotFoundException { - createDirectories(Path.of("target/test/tmp/temp")); + createDirectories(Path.of("target/test/CreateNewDataFilesTest/tmp/temp")); mockTmpLookup(); var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion()); @@ -83,9 +78,9 @@ public void execute_fails_to_upload_too_big_files() throws FileNotFoundException } @Test - @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/tmp") + @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") public void execute_fails_on_too_little_remaining_storage() throws FileNotFoundException { - createDirectories(Path.of("target/test/tmp/temp")); + createDirectories(Path.of("target/test/CreateNewDataFilesTest/tmp/temp")); mockTmpLookup(); var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion()); @@ -100,9 +95,9 @@ public void execute_fails_on_too_little_remaining_storage() throws FileNotFoundE } @Test - @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/tmp") + @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") public void execute_succeeds() throws FileNotFoundException, CommandException { - var tempDir = Path.of("target/test/tmp/temp"); + var tempDir = testDir.resolve("tmp/temp"); var testFile = "scripts/search/data/binary/trees.zip"; createDirectories(tempDir); @@ -128,17 +123,19 @@ public void execute_succeeds() throws FileNotFoundException, CommandException { } @Test - @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/tmp") + @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") public void execute_with_2_shapes() throws Exception { - var tempDir = Path.of("target/test/tmp/temp"); - List file_names = Arrays.asList("shape1.shp", "shape1.shx", "shape1.dbf", "shape1.prj", "shape1.fbn", "shape1.fbx", // 1st shapefile - "shape2.shp", "shape2.shx", "shape2.dbf", "shape2.prj", // 2nd shapefile - "shape2.txt", "shape2.pdf", "shape2", // single files, same basename as 2nd shapefile - "README.MD", "shp_dictionary.xls", "notes"); //, "prj"); // single files - File testFile = createAndZipFiles(file_names, "two-shapes.zip"); + var tempDir = testDir.resolve("tmp/temp"); + createDirectories(tempDir); + + File testFile = createAndZipFiles(Arrays.asList( + "shape1.shp", "shape1.shx", "shape1.dbf", "shape1.prj", "shape1.fbn", "shape1.fbx", // 1st shapefile + "shape2.shp", "shape2.shx", "shape2.dbf", "shape2.prj", // 2nd shapefile + "shape2.txt", "shape2.pdf", "shape2", // single files, same basename as 2nd shapefile + "README.MD", "shp_dictionary.xls", "notes" // single files + ), testDir.resolve("shapes.zip")); // TODO mock CDI provider to allow mime type check - createDirectories(tempDir); mockTmpLookup(); var cmd = createCmd(testFile.toString(), mockDatasetVersion()); @@ -163,20 +160,16 @@ public void execute_with_2_shapes() throws Exception { } } - @TempDir - Path tempFolder; - // simplified version from ShapefileHandlerTest - private File createAndZipFiles(List file_names, String zipfile_name) throws IOException { - var zip_file_obj = tempFolder.resolve(zipfile_name).toFile(); - try (ZipOutputStream zip_stream = new ZipOutputStream(new FileOutputStream(zip_file_obj))) { + private File createAndZipFiles(List file_names, Path zipfile) throws IOException { + try (ZipOutputStream zip_stream = new ZipOutputStream(new FileOutputStream(zipfile.toFile()))) { for (var file_name : file_names) { zip_stream.putNextEntry(new ZipEntry(file_name)); zip_stream.write("content".getBytes(), 0, 7); zip_stream.closeEntry(); } } - return zip_file_obj; + return zipfile.toFile(); } private static @NotNull CreateNewDataFilesCommand createCmd(String name, DatasetVersion dsVersion) throws FileNotFoundException { From c02b7ae9f1c80e2f3153f68fd0a8a3756838c90c Mon Sep 17 00:00:00 2001 From: jo-pol Date: Thu, 12 Sep 2024 12:26:18 +0200 Subject: [PATCH 07/20] fixed mimetype detection --- .../harvard/iq/dataverse/util/FileUtil.java | 9 ++-- .../command/impl/CreateNewDataFilesTest.java | 41 ++++++++++--------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 5b0c44e1aec..68f1873ec1c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -514,11 +514,12 @@ public static String determineFileType(File f, String fileName) throws IOExcepti if (shp_handler.containsShapefile()){ // logger.info("------- shapefile FOUND ----------"); fileType = ShapefileHandler.SHAPEFILE_FILE_TYPE; //"application/zipped-shapefile"; - } + } else { - Optional bagItFileHandler = CDI.current().select(BagItFileHandlerFactory.class).get().getBagItFileHandler(); - if(bagItFileHandler.isPresent() && bagItFileHandler.get().isBagItPackage(fileName, f)) { - fileType = BagItFileHandler.FILE_TYPE; + Optional bagItFileHandler = CDI.current().select(BagItFileHandlerFactory.class).get().getBagItFileHandler(); + if (bagItFileHandler.isPresent() && bagItFileHandler.get().isBagItPackage(fileName, f)) { + fileType = BagItFileHandler.FILE_TYPE; + } } } diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java index caa9ae079dd..5f91edebf52 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java @@ -1,7 +1,9 @@ package edu.harvard.iq.dataverse.engine.command.impl; +import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; @@ -11,6 +13,7 @@ import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.testing.JvmSetting; import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; +import org.assertj.core.api.ObjectArrayAssert; import org.jetbrains.annotations.NotNull; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -135,29 +138,27 @@ public void execute_with_2_shapes() throws Exception { "README.MD", "shp_dictionary.xls", "notes" // single files ), testDir.resolve("shapes.zip")); - // TODO mock CDI provider to allow mime type check - mockTmpLookup(); var cmd = createCmd(testFile.toString(), mockDatasetVersion()); var ctxt = mockCommandContext(mockQuota(false, 1000000L)); - try (MockedStatic mockedStatic = Mockito.mockStatic(JhoveFileType.class)) { - mockedStatic.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); - - // the test - var result = cmd.execute(ctxt); - - assertThat(result.getErrors()).hasSize(0); - assertThat(result.getDataFiles()).hasSize(1); - - var dataFile = result.getDataFiles().subList(0, 1).get(0); - var storageId = dataFile.getStorageIdentifier(); - - // uploaded zip remains in tmp directory - assertThat(tempDir.toFile().list()).hasSize(1); - assertThat(tempDir.resolve(storageId).toFile().length()) - .isEqualTo(testFile.length()); - } + // the test + var result = cmd.execute(ctxt); + + assertThat(result.getErrors()).hasSize(0); + assertThat(result.getDataFiles().stream().map(DataFile::toString)) + .containsExactlyInAnyOrder( + "[DataFile id:null label:shp_dictionary.xls]", + "[DataFile id:null label:notes]", + "[DataFile id:null label:shape1.zip]", + "[DataFile id:null label:shape2.txt]", + "[DataFile id:null label:shape2.pdf]", + "[DataFile id:null label:shape2]", + "[DataFile id:null label:shape2.zip]", + "[DataFile id:null label:README.MD]" + ); + var ids = result.getDataFiles().stream().map(DataFile::getStorageIdentifier).toList(); + assertThat(tempDir.toFile().list()).containsExactlyInAnyOrderElementsOf(ids); } // simplified version from ShapefileHandlerTest @@ -165,7 +166,7 @@ private File createAndZipFiles(List file_names, Path zipfile) throws IOE try (ZipOutputStream zip_stream = new ZipOutputStream(new FileOutputStream(zipfile.toFile()))) { for (var file_name : file_names) { zip_stream.putNextEntry(new ZipEntry(file_name)); - zip_stream.write("content".getBytes(), 0, 7); + zip_stream.write((file_name + " content").getBytes()); zip_stream.closeEntry(); } } From d786bf9cb9ff3ff000bc77c95b31a3ab5cf25de9 Mon Sep 17 00:00:00 2001 From: jo-pol Date: Thu, 12 Sep 2024 15:34:57 +0200 Subject: [PATCH 08/20] testable FileUtil with mock-able BagItFileHandler --- .../harvard/iq/dataverse/util/FileUtil.java | 9 +- .../dataverse/util/file/BagItFileHandler.java | 5 ++ .../command/impl/CreateNewDataFilesTest.java | 86 +++++++++++-------- 3 files changed, 59 insertions(+), 41 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 68f1873ec1c..349bd684042 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -514,12 +514,11 @@ public static String determineFileType(File f, String fileName) throws IOExcepti if (shp_handler.containsShapefile()){ // logger.info("------- shapefile FOUND ----------"); fileType = ShapefileHandler.SHAPEFILE_FILE_TYPE; //"application/zipped-shapefile"; - } else { + } - Optional bagItFileHandler = CDI.current().select(BagItFileHandlerFactory.class).get().getBagItFileHandler(); - if (bagItFileHandler.isPresent() && bagItFileHandler.get().isBagItPackage(fileName, f)) { - fileType = BagItFileHandler.FILE_TYPE; - } + Optional bagItFileHandler = BagItFileHandler.getFromCDI(); + if(bagItFileHandler.isPresent() && bagItFileHandler.get().isBagItPackage(fileName, f)) { + fileType = BagItFileHandler.FILE_TYPE; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandler.java b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandler.java index 6162df049f8..6748b8e1083 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandler.java @@ -11,6 +11,7 @@ import edu.harvard.iq.dataverse.util.bagit.data.FileDataProvider; import edu.harvard.iq.dataverse.util.bagit.data.FileDataProviderFactory; import edu.harvard.iq.dataverse.util.bagit.data.FileUtilWrapper; +import jakarta.enterprise.inject.spi.CDI; import java.io.File; import java.io.IOException; @@ -44,6 +45,10 @@ public BagItFileHandler(FileUtilWrapper fileUtil, FileDataProviderFactory fileDa this.postProcessor = postProcessor; } + public static Optional getFromCDI() { + return CDI.current().select(BagItFileHandlerFactory.class).get().getBagItFileHandler(); + } + public boolean isBagItPackage(String uploadedFilename, File file) throws IOException { try(FileDataProvider fileDataProvider = fileDataProviderFactory.getFileDataProvider(file)) { boolean isBagItPackage = bagValidator.hasBagItPackage(fileDataProvider); diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java index 5f91edebf52..dfe81cc66a4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java @@ -3,7 +3,6 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; @@ -11,9 +10,9 @@ import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; import edu.harvard.iq.dataverse.util.JhoveFileType; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.file.BagItFileHandler; import edu.harvard.iq.dataverse.util.testing.JvmSetting; import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; -import org.assertj.core.api.ObjectArrayAssert; import org.jetbrains.annotations.NotNull; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -29,9 +28,11 @@ import java.nio.file.Path; import java.util.Arrays; import java.util.List; +import java.util.Optional; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; +import static edu.harvard.iq.dataverse.DataFile.ChecksumType.MD5; import static org.apache.commons.io.file.FilesUncheck.createDirectories; import static org.apache.commons.io.file.PathUtils.deleteDirectory; import static org.assertj.core.api.Assertions.assertThat; @@ -56,7 +57,7 @@ public void execute_fails_to_upload_when_tmp_does_not_exist() throws FileNotFoun mockTmpLookup(); var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion()); - var ctxt = mockCommandContext(mockQuota(true, 0L)); + var ctxt = mockCommandContext(mockSysConfig(true, 0L, MD5)); assertThatThrownBy(() -> cmd.execute(ctxt)) .isInstanceOf(CommandException.class) @@ -73,7 +74,7 @@ public void execute_fails_to_upload_too_big_files() throws FileNotFoundException mockTmpLookup(); var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion()); - var ctxt = mockCommandContext(mockQuota(true, 1000L)); + var ctxt = mockCommandContext(mockSysConfig(true, 1000L, MD5)); assertThatThrownBy(() -> cmd.execute(ctxt)) .isInstanceOf(CommandException.class) @@ -87,7 +88,7 @@ public void execute_fails_on_too_little_remaining_storage() throws FileNotFoundE mockTmpLookup(); var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion()); - var ctxt = mockCommandContext(mockQuota(true, 1000000L)); + var ctxt = mockCommandContext(mockSysConfig(true, 1000000L, MD5)); try (MockedStatic mockedStatic = Mockito.mockStatic(JhoveFileType.class)) { mockedStatic.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); @@ -99,29 +100,28 @@ public void execute_fails_on_too_little_remaining_storage() throws FileNotFoundE @Test @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") - public void execute_succeeds() throws FileNotFoundException, CommandException { + public void execute_does_not_unzip() throws Exception { // TODO fix warnings var tempDir = testDir.resolve("tmp/temp"); - var testFile = "scripts/search/data/binary/trees.zip"; + var testFile = "scripts/search/data/binary/3files.zip"; createDirectories(tempDir); mockTmpLookup(); var cmd = createCmd(testFile, mockDatasetVersion()); - var ctxt = mockCommandContext(mockQuota(false, 1000000L)); + var ctxt = mockCommandContext(mockSysConfig(false, 1000000L, MD5)); try (MockedStatic mockedStatic = Mockito.mockStatic(JhoveFileType.class)) { mockedStatic.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); + // the test var result = cmd.execute(ctxt); assertThat(result.getErrors()).hasSize(0); assertThat(result.getDataFiles()).hasSize(1); - - var dataFile = result.getDataFiles().subList(0, 1).get(0); - var storageId = dataFile.getStorageIdentifier(); - - // uploaded zip remains in tmp directory - assertThat(tempDir.toFile().list()).hasSize(1); - assertThat(tempDir.resolve(storageId).toFile().length()) - .isEqualTo(new File(testFile).length()); + assertThat(result.getDataFiles().stream().map(DataFile::toString)) + .containsExactlyInAnyOrder( + "[DataFile id:null label:example.zip]" + ); + var ids = result.getDataFiles().stream().map(DataFile::getStorageIdentifier).toList(); + assertThat(tempDir.toFile().list()).containsExactlyInAnyOrderElementsOf(ids); } } @@ -140,25 +140,38 @@ public void execute_with_2_shapes() throws Exception { mockTmpLookup(); var cmd = createCmd(testFile.toString(), mockDatasetVersion()); - var ctxt = mockCommandContext(mockQuota(false, 1000000L)); - - // the test - var result = cmd.execute(ctxt); - - assertThat(result.getErrors()).hasSize(0); - assertThat(result.getDataFiles().stream().map(DataFile::toString)) - .containsExactlyInAnyOrder( - "[DataFile id:null label:shp_dictionary.xls]", - "[DataFile id:null label:notes]", - "[DataFile id:null label:shape1.zip]", - "[DataFile id:null label:shape2.txt]", - "[DataFile id:null label:shape2.pdf]", - "[DataFile id:null label:shape2]", - "[DataFile id:null label:shape2.zip]", - "[DataFile id:null label:README.MD]" - ); - var ids = result.getDataFiles().stream().map(DataFile::getStorageIdentifier).toList(); - assertThat(tempDir.toFile().list()).containsExactlyInAnyOrderElementsOf(ids); + var ctxt = mockCommandContext(mockSysConfig(false, 1000000L, MD5)); + try (var mockedHandler = Mockito.mockStatic(BagItFileHandler.class); + var mockedJHoveFileType = Mockito.mockStatic(JhoveFileType.class) + ) { + var opt = mockOptional(false); + mockedHandler.when(BagItFileHandler::getFromCDI).thenReturn(opt); + mockedJHoveFileType.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); + + // the test + var result = cmd.execute(ctxt); + + assertThat(result.getErrors()).hasSize(0); + assertThat(result.getDataFiles().stream().map(DataFile::toString)) + .containsExactlyInAnyOrder( + "[DataFile id:null label:shp_dictionary.xls]", + "[DataFile id:null label:notes]", + "[DataFile id:null label:shape1.zip]", + "[DataFile id:null label:shape2.txt]", + "[DataFile id:null label:shape2.pdf]", + "[DataFile id:null label:shape2]", + "[DataFile id:null label:shape2.zip]", + "[DataFile id:null label:README.MD]" + ); + var ids = result.getDataFiles().stream().map(DataFile::getStorageIdentifier).toList(); + assertThat(tempDir.toFile().list()).containsExactlyInAnyOrderElementsOf(ids); + } + } + + private @NotNull Optional mockOptional(boolean t) { + var mockedOptional = Mockito.mock(Optional.class); + Mockito.when(mockedOptional.isPresent()).thenReturn(t); + return mockedOptional; } // simplified version from ShapefileHandlerTest @@ -191,10 +204,11 @@ private File createAndZipFiles(List file_names, Path zipfile) throws IOE return ctxt; } - private static @NotNull SystemConfig mockQuota(boolean isStorageQuataEnforced, long maxFileUploadSizeForStore) { + private static @NotNull SystemConfig mockSysConfig(boolean isStorageQuataEnforced, long maxFileUploadSizeForStore, DataFile.ChecksumType checksumType) { var sysCfg = Mockito.mock(SystemConfig.class); Mockito.when(sysCfg.isStorageQuotasEnforced()).thenReturn(isStorageQuataEnforced); Mockito.when(sysCfg.getMaxFileUploadSizeForStore(any())).thenReturn(maxFileUploadSizeForStore); + Mockito.when(sysCfg.getFileFixityChecksumAlgorithm()).thenReturn(checksumType); return sysCfg; } From 2ebde7bf4995db5144bcbd9621741b388781d610 Mon Sep 17 00:00:00 2001 From: jo-pol Date: Thu, 12 Sep 2024 16:06:18 +0200 Subject: [PATCH 09/20] fixed expectations --- .../command/impl/CreateNewDataFilesTest.java | 36 ++++++++++++++----- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java index dfe81cc66a4..14f75a57be2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java @@ -57,7 +57,7 @@ public void execute_fails_to_upload_when_tmp_does_not_exist() throws FileNotFoun mockTmpLookup(); var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion()); - var ctxt = mockCommandContext(mockSysConfig(true, 0L, MD5)); + var ctxt = mockCommandContext(mockSysConfig(true, 0L, MD5, 10)); assertThatThrownBy(() -> cmd.execute(ctxt)) .isInstanceOf(CommandException.class) @@ -74,7 +74,7 @@ public void execute_fails_to_upload_too_big_files() throws FileNotFoundException mockTmpLookup(); var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion()); - var ctxt = mockCommandContext(mockSysConfig(true, 1000L, MD5)); + var ctxt = mockCommandContext(mockSysConfig(true, 1000L, MD5, 10)); assertThatThrownBy(() -> cmd.execute(ctxt)) .isInstanceOf(CommandException.class) @@ -88,7 +88,7 @@ public void execute_fails_on_too_little_remaining_storage() throws FileNotFoundE mockTmpLookup(); var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion()); - var ctxt = mockCommandContext(mockSysConfig(true, 1000000L, MD5)); + var ctxt = mockCommandContext(mockSysConfig(true, 1000000L, MD5, 0)); try (MockedStatic mockedStatic = Mockito.mockStatic(JhoveFileType.class)) { mockedStatic.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); @@ -97,17 +97,33 @@ public void execute_fails_on_too_little_remaining_storage() throws FileNotFoundE .hasMessage("This file (size 56.0 KB) exceeds the remaining storage quota of 500 B."); } } + @Test + @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") + public void execute_fails_on_too_little_remaining_storage_for_unzipped_files() throws FileNotFoundException { + createDirectories(Path.of("target/test/CreateNewDataFilesTest/tmp/temp")); + + mockTmpLookup(); + var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion()); + var ctxt = mockCommandContext(mockSysConfig(true, 1000000L, MD5, 10)); + try (MockedStatic mockedStatic = Mockito.mockStatic(JhoveFileType.class)) { + mockedStatic.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); + + assertThatThrownBy(() -> cmd.execute(ctxt)) + .isInstanceOf(CommandException.class) + .hasMessage("Unzipped files exceed the remaining storage quota of 500 B."); + } + } @Test @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") - public void execute_does_not_unzip() throws Exception { // TODO fix warnings + public void execute_without_shape_files() throws Exception { var tempDir = testDir.resolve("tmp/temp"); var testFile = "scripts/search/data/binary/3files.zip"; createDirectories(tempDir); mockTmpLookup(); var cmd = createCmd(testFile, mockDatasetVersion()); - var ctxt = mockCommandContext(mockSysConfig(false, 1000000L, MD5)); + var ctxt = mockCommandContext(mockSysConfig(false, 1000000L, MD5, 10)); try (MockedStatic mockedStatic = Mockito.mockStatic(JhoveFileType.class)) { mockedStatic.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); @@ -115,10 +131,11 @@ public void execute_does_not_unzip() throws Exception { // TODO fix warnings var result = cmd.execute(ctxt); assertThat(result.getErrors()).hasSize(0); - assertThat(result.getDataFiles()).hasSize(1); assertThat(result.getDataFiles().stream().map(DataFile::toString)) .containsExactlyInAnyOrder( - "[DataFile id:null label:example.zip]" + "[DataFile id:null label:file1.txt]", + "[DataFile id:null label:file2.txt]", + "[DataFile id:null label:file3.txt]" ); var ids = result.getDataFiles().stream().map(DataFile::getStorageIdentifier).toList(); assertThat(tempDir.toFile().list()).containsExactlyInAnyOrderElementsOf(ids); @@ -140,7 +157,7 @@ public void execute_with_2_shapes() throws Exception { mockTmpLookup(); var cmd = createCmd(testFile.toString(), mockDatasetVersion()); - var ctxt = mockCommandContext(mockSysConfig(false, 1000000L, MD5)); + var ctxt = mockCommandContext(mockSysConfig(false, 1000000L, MD5, 0)); try (var mockedHandler = Mockito.mockStatic(BagItFileHandler.class); var mockedJHoveFileType = Mockito.mockStatic(JhoveFileType.class) ) { @@ -204,11 +221,12 @@ private File createAndZipFiles(List file_names, Path zipfile) throws IOE return ctxt; } - private static @NotNull SystemConfig mockSysConfig(boolean isStorageQuataEnforced, long maxFileUploadSizeForStore, DataFile.ChecksumType checksumType) { + private static @NotNull SystemConfig mockSysConfig(boolean isStorageQuataEnforced, long maxFileUploadSizeForStore, DataFile.ChecksumType checksumType, int zipUploadFilesLimit) { var sysCfg = Mockito.mock(SystemConfig.class); Mockito.when(sysCfg.isStorageQuotasEnforced()).thenReturn(isStorageQuataEnforced); Mockito.when(sysCfg.getMaxFileUploadSizeForStore(any())).thenReturn(maxFileUploadSizeForStore); Mockito.when(sysCfg.getFileFixityChecksumAlgorithm()).thenReturn(checksumType); + Mockito.when(sysCfg.getZipUploadFilesLimit()).thenReturn(zipUploadFilesLimit); return sysCfg; } From b93561b7bb7f1ed8045b6b572ecebef017b7c80a Mon Sep 17 00:00:00 2001 From: jo-pol Date: Mon, 16 Sep 2024 11:41:31 +0200 Subject: [PATCH 10/20] try around bagItFileHandler --- .../harvard/iq/dataverse/util/FileUtil.java | 11 +-- .../dataverse/util/file/BagItFileHandler.java | 5 -- .../command/impl/CreateNewDataFilesTest.java | 70 ++++++------------- 3 files changed, 28 insertions(+), 58 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 349bd684042..4f61f62f742 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -515,10 +515,13 @@ public static String determineFileType(File f, String fileName) throws IOExcepti // logger.info("------- shapefile FOUND ----------"); fileType = ShapefileHandler.SHAPEFILE_FILE_TYPE; //"application/zipped-shapefile"; } - - Optional bagItFileHandler = BagItFileHandler.getFromCDI(); - if(bagItFileHandler.isPresent() && bagItFileHandler.get().isBagItPackage(fileName, f)) { - fileType = BagItFileHandler.FILE_TYPE; + try { + Optional bagItFileHandler = CDI.current().select(BagItFileHandlerFactory.class).get().getBagItFileHandler(); + if (bagItFileHandler.isPresent() && bagItFileHandler.get().isBagItPackage(fileName, f)) { + fileType = BagItFileHandler.FILE_TYPE; + } + } catch (Exception e) { + logger.warning("Error checking for BagIt package: " + e.getMessage()); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandler.java b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandler.java index 6748b8e1083..6162df049f8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandler.java @@ -11,7 +11,6 @@ import edu.harvard.iq.dataverse.util.bagit.data.FileDataProvider; import edu.harvard.iq.dataverse.util.bagit.data.FileDataProviderFactory; import edu.harvard.iq.dataverse.util.bagit.data.FileUtilWrapper; -import jakarta.enterprise.inject.spi.CDI; import java.io.File; import java.io.IOException; @@ -45,10 +44,6 @@ public BagItFileHandler(FileUtilWrapper fileUtil, FileDataProviderFactory fileDa this.postProcessor = postProcessor; } - public static Optional getFromCDI() { - return CDI.current().select(BagItFileHandlerFactory.class).get().getBagItFileHandler(); - } - public boolean isBagItPackage(String uploadedFilename, File file) throws IOException { try(FileDataProvider fileDataProvider = fileDataProviderFactory.getFileDataProvider(file)) { boolean isBagItPackage = bagValidator.hasBagItPackage(fileDataProvider); diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java index 14f75a57be2..2d9175cb181 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java @@ -8,27 +8,30 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; +import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JhoveFileType; import edu.harvard.iq.dataverse.util.SystemConfig; -import edu.harvard.iq.dataverse.util.file.BagItFileHandler; import edu.harvard.iq.dataverse.util.testing.JvmSetting; import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; import org.jetbrains.annotations.NotNull; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.mockito.MockedStatic; import org.mockito.Mockito; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; +import java.io.PrintStream; import java.nio.file.NoSuchFileException; import java.nio.file.Path; import java.util.Arrays; import java.util.List; -import java.util.Optional; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; @@ -44,13 +47,19 @@ public class CreateNewDataFilesTest { // TODO keep constants for annotations in sync with class name Path testDir = Path.of("target/test/").resolve(getClass().getSimpleName()); + PrintStream original_stderr; @BeforeEach public void cleanTmpDir() throws IOException { + original_stderr = System.err; if(testDir.toFile().exists()) deleteDirectory(testDir); } + @AfterEach void restoreStderr() { + System.setErr(original_stderr); + } + @Test @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") public void execute_fails_to_upload_when_tmp_does_not_exist() throws FileNotFoundException { @@ -69,48 +78,18 @@ public void execute_fails_to_upload_when_tmp_does_not_exist() throws FileNotFoun @Test @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") - public void execute_fails_to_upload_too_big_files() throws FileNotFoundException { - createDirectories(Path.of("target/test/CreateNewDataFilesTest/tmp/temp")); - - mockTmpLookup(); - var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion()); - var ctxt = mockCommandContext(mockSysConfig(true, 1000L, MD5, 10)); - - assertThatThrownBy(() -> cmd.execute(ctxt)) - .isInstanceOf(CommandException.class) - .hasMessage("This file size (56.0 KB) exceeds the size limit of 1000 B."); - } - - @Test - @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") - public void execute_fails_on_too_little_remaining_storage() throws FileNotFoundException { - createDirectories(Path.of("target/test/CreateNewDataFilesTest/tmp/temp")); - - mockTmpLookup(); - var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion()); - var ctxt = mockCommandContext(mockSysConfig(true, 1000000L, MD5, 0)); - try (MockedStatic mockedStatic = Mockito.mockStatic(JhoveFileType.class)) { - mockedStatic.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); - - assertThatThrownBy(() -> cmd.execute(ctxt)) - .isInstanceOf(CommandException.class) - .hasMessage("This file (size 56.0 KB) exceeds the remaining storage quota of 500 B."); - } - } - @Test - @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") - public void execute_fails_on_too_little_remaining_storage_for_unzipped_files() throws FileNotFoundException { + public void execute_fails_on_size_limit() throws Exception { createDirectories(Path.of("target/test/CreateNewDataFilesTest/tmp/temp")); mockTmpLookup(); - var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion()); - var ctxt = mockCommandContext(mockSysConfig(true, 1000000L, MD5, 10)); - try (MockedStatic mockedStatic = Mockito.mockStatic(JhoveFileType.class)) { + var cmd = createCmd("scripts/search/data/binary/3files.zip", mockDatasetVersion()); + var ctxt = mockCommandContext(mockSysConfig(true, 50L, MD5, 0)); + try (var mockedStatic = Mockito.mockStatic(JhoveFileType.class)) { mockedStatic.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); assertThatThrownBy(() -> cmd.execute(ctxt)) .isInstanceOf(CommandException.class) - .hasMessage("Unzipped files exceed the remaining storage quota of 500 B."); + .hasMessage("This file size (462 B) exceeds the size limit of 50 B."); } } @@ -144,7 +123,7 @@ public void execute_without_shape_files() throws Exception { @Test @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") - public void execute_with_2_shapes() throws Exception { + public void execute_with_2_shapes_does_not_check_zipUploadFilesLimit() throws Exception { var tempDir = testDir.resolve("tmp/temp"); createDirectories(tempDir); @@ -158,11 +137,7 @@ public void execute_with_2_shapes() throws Exception { mockTmpLookup(); var cmd = createCmd(testFile.toString(), mockDatasetVersion()); var ctxt = mockCommandContext(mockSysConfig(false, 1000000L, MD5, 0)); - try (var mockedHandler = Mockito.mockStatic(BagItFileHandler.class); - var mockedJHoveFileType = Mockito.mockStatic(JhoveFileType.class) - ) { - var opt = mockOptional(false); - mockedHandler.when(BagItFileHandler::getFromCDI).thenReturn(opt); + try (var mockedJHoveFileType = Mockito.mockStatic(JhoveFileType.class)) { mockedJHoveFileType.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); // the test @@ -185,14 +160,11 @@ public void execute_with_2_shapes() throws Exception { } } - private @NotNull Optional mockOptional(boolean t) { - var mockedOptional = Mockito.mock(Optional.class); - Mockito.when(mockedOptional.isPresent()).thenReturn(t); - return mockedOptional; - } - // simplified version from ShapefileHandlerTest private File createAndZipFiles(List file_names, Path zipfile) throws IOException { + // TODO provoke java.util.zip.ZipException: only DEFLATED entries can have EXT descriptor + // and test target/test/CreateNewDataFilesTest/shapes.zip on VM/demo + // write a test that tries to read it with ZipInputStream try (ZipOutputStream zip_stream = new ZipOutputStream(new FileOutputStream(zipfile.toFile()))) { for (var file_name : file_names) { zip_stream.putNextEntry(new ZipEntry(file_name)); From 2aaae1ed20a9039786bb9f526f926f5b2d533631 Mon Sep 17 00:00:00 2001 From: jo-pol Date: Mon, 16 Sep 2024 15:26:45 +0200 Subject: [PATCH 11/20] zips that reproduce reported behavior manually --- .../iq/dataverse/util/ShapefileHandler.java | 3 +- .../command/impl/CreateNewDataFilesTest.java | 43 ++---------------- .../own-cloud-downloads/greetings.zip | Bin 0 -> 679 bytes .../resources/own-cloud-downloads/shapes.zip | Bin 0 -> 4336 bytes 4 files changed, 6 insertions(+), 40 deletions(-) create mode 100644 src/test/resources/own-cloud-downloads/greetings.zip create mode 100644 src/test/resources/own-cloud-downloads/shapes.zip diff --git a/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java b/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java index ea62776f754..277ecfda494 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java @@ -5,7 +5,6 @@ import java.io.IOException; import java.nio.file.Path; import java.nio.file.StandardCopyOption; -import java.text.MessageFormat; import java.util.Date; import java.util.ArrayList; import java.util.List; @@ -15,7 +14,6 @@ import java.nio.file.Files; import static java.nio.file.StandardCopyOption.REPLACE_EXISTING; -import static java.text.MessageFormat.format; import java.util.logging.Logger; import org.apache.commons.io.FileUtils; @@ -338,6 +336,7 @@ private boolean unzipFilesToDirectory(ZipFile zipfileInput, File target_director } logger.fine("Write zip file: " + outpath); try(var inputStream = zipfileInput.getInputStream(origEntry)) { + Files.createDirectories(new File(outpath).getParentFile().toPath()); Files.copy(inputStream, Path.of(outpath), StandardCopyOption.REPLACE_EXISTING); } } // end outer while diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java index 2d9175cb181..2c85c35371f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java @@ -8,7 +8,6 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; -import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JhoveFileType; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.testing.JvmSetting; @@ -16,24 +15,16 @@ import org.jetbrains.annotations.NotNull; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.mockito.MockedStatic; import org.mockito.Mockito; -import java.io.ByteArrayOutputStream; -import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintStream; import java.nio.file.NoSuchFileException; import java.nio.file.Path; -import java.util.Arrays; -import java.util.List; -import java.util.zip.ZipEntry; -import java.util.zip.ZipOutputStream; import static edu.harvard.iq.dataverse.DataFile.ChecksumType.MD5; import static org.apache.commons.io.file.FilesUncheck.createDirectories; @@ -97,11 +88,10 @@ public void execute_fails_on_size_limit() throws Exception { @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") public void execute_without_shape_files() throws Exception { var tempDir = testDir.resolve("tmp/temp"); - var testFile = "scripts/search/data/binary/3files.zip"; createDirectories(tempDir); mockTmpLookup(); - var cmd = createCmd(testFile, mockDatasetVersion()); + var cmd = createCmd("src/test/resources/own-cloud-downloads/greetings.zip", mockDatasetVersion()); var ctxt = mockCommandContext(mockSysConfig(false, 1000000L, MD5, 10)); try (MockedStatic mockedStatic = Mockito.mockStatic(JhoveFileType.class)) { mockedStatic.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); @@ -112,9 +102,8 @@ public void execute_without_shape_files() throws Exception { assertThat(result.getErrors()).hasSize(0); assertThat(result.getDataFiles().stream().map(DataFile::toString)) .containsExactlyInAnyOrder( - "[DataFile id:null label:file1.txt]", - "[DataFile id:null label:file2.txt]", - "[DataFile id:null label:file3.txt]" + "[DataFile id:null label:hello.txt]", + "[DataFile id:null label:goodbye.txt]" ); var ids = result.getDataFiles().stream().map(DataFile::getStorageIdentifier).toList(); assertThat(tempDir.toFile().list()).containsExactlyInAnyOrderElementsOf(ids); @@ -127,16 +116,9 @@ public void execute_with_2_shapes_does_not_check_zipUploadFilesLimit() throws Ex var tempDir = testDir.resolve("tmp/temp"); createDirectories(tempDir); - File testFile = createAndZipFiles(Arrays.asList( - "shape1.shp", "shape1.shx", "shape1.dbf", "shape1.prj", "shape1.fbn", "shape1.fbx", // 1st shapefile - "shape2.shp", "shape2.shx", "shape2.dbf", "shape2.prj", // 2nd shapefile - "shape2.txt", "shape2.pdf", "shape2", // single files, same basename as 2nd shapefile - "README.MD", "shp_dictionary.xls", "notes" // single files - ), testDir.resolve("shapes.zip")); - mockTmpLookup(); - var cmd = createCmd(testFile.toString(), mockDatasetVersion()); - var ctxt = mockCommandContext(mockSysConfig(false, 1000000L, MD5, 0)); + var cmd = createCmd("src/test/resources/own-cloud-downloads/shapes.zip", mockDatasetVersion()); + var ctxt = mockCommandContext(mockSysConfig(false, 100000000L, MD5, 10)); try (var mockedJHoveFileType = Mockito.mockStatic(JhoveFileType.class)) { mockedJHoveFileType.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); @@ -160,21 +142,6 @@ public void execute_with_2_shapes_does_not_check_zipUploadFilesLimit() throws Ex } } - // simplified version from ShapefileHandlerTest - private File createAndZipFiles(List file_names, Path zipfile) throws IOException { - // TODO provoke java.util.zip.ZipException: only DEFLATED entries can have EXT descriptor - // and test target/test/CreateNewDataFilesTest/shapes.zip on VM/demo - // write a test that tries to read it with ZipInputStream - try (ZipOutputStream zip_stream = new ZipOutputStream(new FileOutputStream(zipfile.toFile()))) { - for (var file_name : file_names) { - zip_stream.putNextEntry(new ZipEntry(file_name)); - zip_stream.write((file_name + " content").getBytes()); - zip_stream.closeEntry(); - } - } - return zipfile.toFile(); - } - private static @NotNull CreateNewDataFilesCommand createCmd(String name, DatasetVersion dsVersion) throws FileNotFoundException { return new CreateNewDataFilesCommand( Mockito.mock(DataverseRequest.class), diff --git a/src/test/resources/own-cloud-downloads/greetings.zip b/src/test/resources/own-cloud-downloads/greetings.zip new file mode 100644 index 0000000000000000000000000000000000000000..6e166d385d18dce3b63eac9b742c40fea5dddae9 GIT binary patch literal 679 zcmWIWW@gc400HJ?!$=VQ9||}a6c}7wbPY|-&GZ==WI$3lK>*kk4xl`1BC09ENT#Ic z=cgo9rs|bclwdQ^4as}OAjq@OSojYL zK!~Xf3`-i3O+^b%Sg62g=vMe94rK2G;+uVc(bwT01W{F uSm=P*%pd{?V6+$`gbz$lU;xy|4q~`6LHHoy0B=?{kkgofx?oOZU;qG5YmhPk literal 0 HcmV?d00001 diff --git a/src/test/resources/own-cloud-downloads/shapes.zip b/src/test/resources/own-cloud-downloads/shapes.zip new file mode 100644 index 0000000000000000000000000000000000000000..99d5f36c8952950869f77b3316e3c9d5b65c72fe GIT binary patch literal 4336 zcmbVOJ%|%Q7~NcQxm=^s5cLj=Ac7!wIg&!fLWtz9Sxy!$L{KETY)Je`Lb65tDHIgJ zLR9cLEd)_38wCXm3vCn|wX(FZ6S1=s(fP9Tk}s2)u$yfj+1+{X&6oN1dyXBAEn0LP zcXKBQ|H&(1WvoicD;4YQrHHkY9vLsk$g+vCxLvPZ(sHdZW}hn|?z}xV>}dN4*U{xd zadkPnTGSc5%Sf@2{({U}v*Ec7&#`9`_a5bM&&ij8W*4au7mcRpc61JQL)K(nu{%vYyY zYPq>=rCik&B|9}*l)(z_;7#t*{dxH^FqDI8xuJ9L-0VQC;11lqpD#ig7}7yk=O7<7 z)}UJMat9B$q@O|=7|KDbeMaZtrP+a4!5z&0{OTnCTqsiy3PmR&WRJ?IT~I z3=HW&`!Bp6=V0hx`1WJ{Jd}aH1KNZ7<=m|HLSFmPJ2*WkfO$LVQL_$0(lNf1Lv|pM#W+P$+H*)#G zmHjXPoXCj2+2&`A6;Yb)!$#~63;-uFV(7WYW+O_^CH7o93j@H(jA&bLf#$oslrz$) z4YgkK`pGXa0Gz~#*Y$KA^U7>Qtl(#)`Tg$!7yvqUWNI;L&QFx4EGudKDp07xMs+?= z48n1&l&AEpLDQA*82O5_4-y~cE1%o4o?PXQeeQmEq75J@0eRZO1|UKa2O<=o#)G*m8}7$8qUDnF%;!2B}f zfEUO!j>=D&A_RZ0fx!#p2}b3o#1NQY<`eJ&c|KA3DGvnZmtR8g0(tsS`DxmR;MdlN zXAAI)gQCp}{`pbA68_u{kQ{~^nua^fQ+610IP*(`B!L_NRDmhU!^+4B+%J|mH+1m{?AY=6f`8k>T&15Wc@OBwLLZv-ABc6mJGYYl?}z}$o{p2n KlvoX`mh~UjzepMY literal 0 HcmV?d00001 From 255159ebd65352588b0448af5673371c1abee858 Mon Sep 17 00:00:00 2001 From: jo-pol Date: Thu, 19 Sep 2024 10:22:48 +0200 Subject: [PATCH 12/20] cleanup, assert directoryLabel+displayName --- .../command/impl/CreateNewDataFilesTest.java | 48 ++++++++++--------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java index 2c85c35371f..a6401b68d30 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java @@ -86,7 +86,7 @@ public void execute_fails_on_size_limit() throws Exception { @Test @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") - public void execute_without_shape_files() throws Exception { + public void execute_loads_individual_files_from_uploaded_zip() throws Exception { var tempDir = testDir.resolve("tmp/temp"); createDirectories(tempDir); @@ -100,19 +100,20 @@ public void execute_without_shape_files() throws Exception { var result = cmd.execute(ctxt); assertThat(result.getErrors()).hasSize(0); - assertThat(result.getDataFiles().stream().map(DataFile::toString)) - .containsExactlyInAnyOrder( - "[DataFile id:null label:hello.txt]", - "[DataFile id:null label:goodbye.txt]" - ); - var ids = result.getDataFiles().stream().map(DataFile::getStorageIdentifier).toList(); - assertThat(tempDir.toFile().list()).containsExactlyInAnyOrderElementsOf(ids); + assertThat(result.getDataFiles().stream().map(dataFile -> + dataFile.getFileMetadata().getDirectoryLabel() + "/" + dataFile.getDisplayName() + )).containsExactlyInAnyOrder( + "DD-1576/goodbye.txt", "DD-1576/hello.txt" + ); + var storageIds = result.getDataFiles().stream().map(DataFile::getStorageIdentifier).toList(); + assertThat(tempDir.toFile().list()) + .containsExactlyInAnyOrderElementsOf(storageIds); } } @Test @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") - public void execute_with_2_shapes_does_not_check_zipUploadFilesLimit() throws Exception { + public void execute_rezips_sets_of_shape_files_from_uploaded_zip() throws Exception { var tempDir = testDir.resolve("tmp/temp"); createDirectories(tempDir); @@ -126,19 +127,22 @@ public void execute_with_2_shapes_does_not_check_zipUploadFilesLimit() throws Ex var result = cmd.execute(ctxt); assertThat(result.getErrors()).hasSize(0); - assertThat(result.getDataFiles().stream().map(DataFile::toString)) - .containsExactlyInAnyOrder( - "[DataFile id:null label:shp_dictionary.xls]", - "[DataFile id:null label:notes]", - "[DataFile id:null label:shape1.zip]", - "[DataFile id:null label:shape2.txt]", - "[DataFile id:null label:shape2.pdf]", - "[DataFile id:null label:shape2]", - "[DataFile id:null label:shape2.zip]", - "[DataFile id:null label:README.MD]" - ); - var ids = result.getDataFiles().stream().map(DataFile::getStorageIdentifier).toList(); - assertThat(tempDir.toFile().list()).containsExactlyInAnyOrderElementsOf(ids); + assertThat(result.getDataFiles().stream().map(dataFile -> + (dataFile.getFileMetadata().getDirectoryLabel() + "/" + dataFile.getDisplayName()) + .replaceAll(".*temp/shp_[-0-9]*/", "") + )).containsExactlyInAnyOrder( + "dataDir/shape1.zip", + "dataDir/shape2/shape2", + "dataDir/shape2/shape2.pdf", + "dataDir/shape2/shape2.txt", + "dataDir/shape2/shape2.zip", + "dataDir/extra/shp_dictionary.xls", + "dataDir/extra/notes", + "dataDir/extra/README.MD" + ); + var storageIds = result.getDataFiles().stream().map(DataFile::getStorageIdentifier).toList(); + assertThat(tempDir.toFile().list()) + .containsExactlyInAnyOrderElementsOf(storageIds); } } From 571cfa7206c0d7e25fcfdec7928f6811129f1a81 Mon Sep 17 00:00:00 2001 From: jo-pol Date: Mon, 30 Sep 2024 13:23:02 +0200 Subject: [PATCH 13/20] restored shortName-regexp and second check on fileNumberLimit --- .../command/impl/CreateNewDataFilesCommand.java | 15 +++++++++++++-- .../command/impl/CreateNewDataFilesTest.java | 12 ++++++------ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java index 31b11a4a367..97ff5092196 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java @@ -361,13 +361,20 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException try (var zipFile = openZipFile(tempFile, charset)) { for (var entry : filteredZipEntries(zipFile)) { + if (datafiles.size() > fileNumberLimit) { + logger.warning("Zip upload - too many files."); + warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit + + "); please upload a zip archive with fewer files, if you want them to be ingested " + + "as individual DataFiles."; + throw new IOException(); + } String storageIdentifier = FileUtil.generateStorageIdentifier(); File unzippedFile = new File(getFilesTempDirectory() + "/" + storageIdentifier); Files.copy(zipFile.getInputStream(entry), unzippedFile.toPath(), StandardCopyOption.REPLACE_EXISTING); // No need to check the size of this unpacked file against the size limit, // since we've already checked for that in the first pass. var fileEntryName = entry.getName(); - var shortName = new File(fileEntryName).getName(); + var shortName = getShortName(fileEntryName); DataFile datafile = FileUtil.createSingleDataFile(version, null, storageIdentifier, shortName, MIME_TYPE_UNDETERMINED_DEFAULT, ctxt.systemConfig().getFileFixityChecksumAlgorithm(), null, false); @@ -679,10 +686,14 @@ private static boolean isNotFakeFile(String fileName) { // check if it's a "fake" file - a zip archive entry // created for a MacOS X filesystem element: (these // start with "._") - var shortName = new File(fileName).getName(); + var shortName = getShortName(fileName); return !shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName); } + private static String getShortName(String fileName) { + return fileName.replaceFirst("^.*[\\/]", ""); + } + @Override public Map> getRequiredPermissions() { Map> ret = new HashMap<>(); diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java index a6401b68d30..1262984eb27 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java @@ -56,7 +56,7 @@ public void cleanTmpDir() throws IOException { public void execute_fails_to_upload_when_tmp_does_not_exist() throws FileNotFoundException { mockTmpLookup(); - var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion()); + var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion(), 1000L, 500L); var ctxt = mockCommandContext(mockSysConfig(true, 0L, MD5, 10)); assertThatThrownBy(() -> cmd.execute(ctxt)) @@ -73,7 +73,7 @@ public void execute_fails_on_size_limit() throws Exception { createDirectories(Path.of("target/test/CreateNewDataFilesTest/tmp/temp")); mockTmpLookup(); - var cmd = createCmd("scripts/search/data/binary/3files.zip", mockDatasetVersion()); + var cmd = createCmd("scripts/search/data/binary/3files.zip", mockDatasetVersion(), 1000L, 500L); var ctxt = mockCommandContext(mockSysConfig(true, 50L, MD5, 0)); try (var mockedStatic = Mockito.mockStatic(JhoveFileType.class)) { mockedStatic.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); @@ -91,7 +91,7 @@ public void execute_loads_individual_files_from_uploaded_zip() throws Exception createDirectories(tempDir); mockTmpLookup(); - var cmd = createCmd("src/test/resources/own-cloud-downloads/greetings.zip", mockDatasetVersion()); + var cmd = createCmd("src/test/resources/own-cloud-downloads/greetings.zip", mockDatasetVersion(), 1000L, 500L); var ctxt = mockCommandContext(mockSysConfig(false, 1000000L, MD5, 10)); try (MockedStatic mockedStatic = Mockito.mockStatic(JhoveFileType.class)) { mockedStatic.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); @@ -118,7 +118,7 @@ public void execute_rezips_sets_of_shape_files_from_uploaded_zip() throws Except createDirectories(tempDir); mockTmpLookup(); - var cmd = createCmd("src/test/resources/own-cloud-downloads/shapes.zip", mockDatasetVersion()); + var cmd = createCmd("src/test/resources/own-cloud-downloads/shapes.zip", mockDatasetVersion(), 1000L, 500L); var ctxt = mockCommandContext(mockSysConfig(false, 100000000L, MD5, 10)); try (var mockedJHoveFileType = Mockito.mockStatic(JhoveFileType.class)) { mockedJHoveFileType.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); @@ -146,7 +146,7 @@ public void execute_rezips_sets_of_shape_files_from_uploaded_zip() throws Except } } - private static @NotNull CreateNewDataFilesCommand createCmd(String name, DatasetVersion dsVersion) throws FileNotFoundException { + private static @NotNull CreateNewDataFilesCommand createCmd(String name, DatasetVersion dsVersion, long allocatedQuotaLimit, long usedQuotaLimit) throws FileNotFoundException { return new CreateNewDataFilesCommand( Mockito.mock(DataverseRequest.class), dsVersion, @@ -154,7 +154,7 @@ public void execute_rezips_sets_of_shape_files_from_uploaded_zip() throws Except "example.zip", "application/zip", null, - new UploadSessionQuotaLimit(1000L, 500L), + new UploadSessionQuotaLimit(allocatedQuotaLimit, usedQuotaLimit), "sha"); } From 98aca80cafcf86f25a6d79ba6784118ac4c61f75 Mon Sep 17 00:00:00 2001 From: jo-pol Date: Mon, 30 Sep 2024 15:57:55 +0200 Subject: [PATCH 14/20] restored unrelated code reformatting --- .../engine/command/impl/CreateNewDataFilesCommand.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java index 97ff5092196..e4091586dd6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java @@ -331,10 +331,7 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException // file is above the individual size limit, unzipped, // we give up on unpacking this zip archive as well: if (fileSizeLimit != null && entry.getSize() > fileSizeLimit) { - // TODO why no log and warning message? - throw new FileExceedsMaxSizeException( - MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(entry.getSize()), - bytesToHumanReadable(fileSizeLimit))); + throw new FileExceedsMaxSizeException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(entry.getSize()), bytesToHumanReadable(fileSizeLimit))); } // Similarly, we want to check if saving all these unpacked // files is going to push the disk usage over the @@ -347,8 +344,7 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException // we reject the upload outright, rather than accepting the zip // file as is. // TODO why no log and warning message? - throw new CommandExecutionException( - MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.unzipped.quota_exceeded"), bytesToHumanReadable(storageQuotaLimit)), this); + throw new CommandExecutionException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.unzipped.quota_exceeded"), bytesToHumanReadable(storageQuotaLimit)), this); } } } From c664a88b0074e0d180872b8060206f57a6bfebc9 Mon Sep 17 00:00:00 2001 From: jo-pol Date: Mon, 30 Sep 2024 16:07:24 +0200 Subject: [PATCH 15/20] reduced changes --- .../engine/command/impl/CreateNewDataFilesCommand.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java index e4091586dd6..d661ac06578 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java @@ -364,13 +364,14 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException + "as individual DataFiles."; throw new IOException(); } + var fileEntryName = entry.getName(); + var shortName = getShortName(fileEntryName); + logger.fine("ZipEntry, file: " + fileEntryName); String storageIdentifier = FileUtil.generateStorageIdentifier(); File unzippedFile = new File(getFilesTempDirectory() + "/" + storageIdentifier); Files.copy(zipFile.getInputStream(entry), unzippedFile.toPath(), StandardCopyOption.REPLACE_EXISTING); // No need to check the size of this unpacked file against the size limit, // since we've already checked for that in the first pass. - var fileEntryName = entry.getName(); - var shortName = getShortName(fileEntryName); DataFile datafile = FileUtil.createSingleDataFile(version, null, storageIdentifier, shortName, MIME_TYPE_UNDETERMINED_DEFAULT, ctxt.systemConfig().getFileFixityChecksumAlgorithm(), null, false); From 8fa79c33dfd04af17f90bb8c193cef168e7440ab Mon Sep 17 00:00:00 2001 From: jo-pol Date: Mon, 30 Sep 2024 16:38:33 +0200 Subject: [PATCH 16/20] reduced changes --- .../impl/CreateNewDataFilesCommand.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java index d661ac06578..76968880c57 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java @@ -297,8 +297,16 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException * then attempt to save it as is. */ - int numberOfUnpackableFiles = 0; - + int numberOfUnpackableFiles = 0; + + /** + * Perform a quick check for how many individual files are + * inside this zip archive. If it's above the limit, we can + * give up right away, without doing any unpacking. + * This should be a fairly inexpensive operation, we just need + * to read the directory at the end of the file. + */ + /** * Note that we can't just use zipFile.size(), * unfortunately, since that's the total number of entries, @@ -309,13 +317,6 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException try (var zipFile = openZipFile(tempFile, charset)) { for (var entry : filteredZipEntries(zipFile)) { - /** - * Perform a quick check for how many individual files are - * inside this zip archive. If it's above the limit, we can - * give up right away, without doing any unpacking. - * This should be a fairly inexpensive operation, we just need - * to read the directory at the end of the file. - */ logger.fine("inside first zip pass; this entry: " + entry.getName()); numberOfUnpackableFiles++; if (numberOfUnpackableFiles > fileNumberLimit) { From da90717fb4693aca6b9d09d8a46118d80190db28 Mon Sep 17 00:00:00 2001 From: jo-pol Date: Mon, 30 Sep 2024 16:40:37 +0200 Subject: [PATCH 17/20] reduced changes --- .../engine/command/impl/CreateNewDataFilesCommand.java | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java index 76968880c57..a1e3a6c2954 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java @@ -299,14 +299,6 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException int numberOfUnpackableFiles = 0; - /** - * Perform a quick check for how many individual files are - * inside this zip archive. If it's above the limit, we can - * give up right away, without doing any unpacking. - * This should be a fairly inexpensive operation, we just need - * to read the directory at the end of the file. - */ - /** * Note that we can't just use zipFile.size(), * unfortunately, since that's the total number of entries, From 3c332fee63f4ce5c564e95f0ecb65d9b8e1e425b Mon Sep 17 00:00:00 2001 From: jo-pol Date: Mon, 30 Sep 2024 16:42:36 +0200 Subject: [PATCH 18/20] reduced changes --- .../engine/command/impl/CreateNewDataFilesCommand.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java index a1e3a6c2954..90b53e3847e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java @@ -288,6 +288,15 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException } */ + /** + * Perform a quick check for how many individual files are + * inside this zip archive. If it's above the limit, we can + * give up right away, without doing any unpacking. + * This should be a fairly inexpensive operation, we just need + * to read the directory at the end of the file. + */ + + /** * The ZipFile constructors in openZipFile will throw ZipException - * a type of IOException - if there's something wrong From d1f1b6fc8bc109ad179294da5d4f72341b7af2c1 Mon Sep 17 00:00:00 2001 From: jo-pol Date: Tue, 1 Oct 2024 09:39:49 +0200 Subject: [PATCH 19/20] reduced changes --- .../util/shapefile/ShapefileHandlerTest.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java b/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java index c5684534335..4fcac9579cc 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java @@ -64,6 +64,19 @@ private File createBlankFile(String filename) throws IOException { return Files.createFile(tempFolder.resolve(filename)).toFile(); } + private FileInputStream createZipReturnFilestream(List file_names, String zipfile_name) throws IOException{ + + File zip_file_obj = this.createAndZipFiles(file_names, zipfile_name); + if (zip_file_obj == null){ + return null; + } + + FileInputStream file_input_stream = new FileInputStream(zip_file_obj); + + return file_input_stream; + + } + /* Convenience method to create .zip file and return a File From b75d25804888ea543e6347b75907c92a97a9093a Mon Sep 17 00:00:00 2001 From: jo-pol Date: Tue, 1 Oct 2024 09:48:02 +0200 Subject: [PATCH 20/20] reduced changes --- .../ingest/IngestServiceShapefileHelper.java | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceShapefileHelper.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceShapefileHelper.java index 958b3636ef7..27a2ab99376 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceShapefileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceShapefileHelper.java @@ -101,6 +101,29 @@ public IngestServiceShapefileHelper(File zippedShapefile, File rezipFolder){ } + private FileInputStream getFileInputStream(File fileObject){ + if (fileObject==null){ + return null; + } + try { + return new FileInputStream(fileObject); + } catch (FileNotFoundException ex) { + logger.severe("Failed to create FileInputStream from File: " + fileObject.getAbsolutePath()); + return null; + } + } + + private void closeFileInputStream(FileInputStream fis){ + if (fis==null){ + return; + } + try { + fis.close(); + } catch (IOException ex) { + logger.info("Failed to close FileInputStream"); + } + } + public boolean processFile() { if ((!isValidFile(this.zippedShapefile))||(!isValidFolder(this.rezipFolder))){