Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/ingest folder #25

Merged
merged 12 commits into from
Jan 28, 2022
246 changes: 245 additions & 1 deletion Module.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
namespace FileSideload;

use FileSideload\Form\ConfigForm;
use Omeka\Module\AbstractModule;
use Laminas\EventManager\Event;
use Laminas\EventManager\SharedEventManagerInterface;
use Laminas\Mvc\Controller\AbstractController;
use Laminas\ServiceManager\ServiceLocatorInterface;
use Laminas\View\Renderer\PhpRenderer;
use Omeka\Module\AbstractModule;
use Omeka\Stdlib\Message;

class Module extends AbstractModule
{
Expand All @@ -22,6 +25,15 @@ public function uninstall(ServiceLocatorInterface $serviceLocator)
$settings->delete('file_sideload_max_files');
}

public function attachListeners(SharedEventManagerInterface $sharedEventManager): void
{
$sharedEventManager->attach(
\Omeka\Api\Adapter\ItemAdapter::class,
'api.hydrate.pre',
[$this, 'handleItemApiHydratePre']
);
}

public function getConfigForm(PhpRenderer $renderer)
{
$settings = $this->getServiceLocator()->get('Omeka\Settings');
Expand Down Expand Up @@ -51,4 +63,236 @@ public function handleConfigForm(AbstractController $controller)
$settings->set('file_sideload_max_files', (int) $formData['filesideload_max_files']);
return true;
}

public function handleItemApiHydratePre(Event $event)
{
static $isChecked;

/** @var \Omeka\Api\Request $request */
$request = $event->getParam('request');
$data = $request->getContent();
if (empty($data['o:media'])) {
return;
}

if (is_null($isChecked)) {
$isChecked = false;
$settings = $this->getServiceLocator()->get('Omeka\Settings');
$mainDir = (string) $settings->get('file_sideload_directory', '');
if (!strlen($mainDir)) {
return;
}

$mainDir = realpath($mainDir);
if ($mainDir === false) {
return;
}

$dir = new \SplFileInfo($mainDir);
if (!$dir->isDir() || !$dir->isReadable() || !$dir->isExecutable()) {
return;
}

$this->directory = $mainDir;
$this->deleteFile = $settings->get('file_sideload_delete_file') === 'yes';

$isChecked = true;
}

if (!$isChecked) {
return;
}

$errorStore = $event->getParam('errorStore');

$newDataMedias = [];
foreach ($data['o:media'] as $dataMedia) {
$newDataMedias[] = $dataMedia;

if (empty($dataMedia['o:ingester']) || $dataMedia['o:ingester'] !== 'sideload_dir') {
continue;
}

if (!array_key_exists('ingest_folder', $dataMedia)) {
$errorStore->addError('ingest_folder', 'No ingest folder specified.'); // @translate
continue;
}

$ingestFolder = (string) $dataMedia['ingest_folder'];

// Some quick security checks are done here instead of ingester
// to simplify conversion into multiple media.

if (!strlen($ingestFolder)) {
$errorStore->addError('ingest_folder', 'No ingest folder specified.'); // @translate
continue;
}

if ($ingestFolder === '.' || $ingestFolder === '..' || $ingestFolder === '/') {
$errorStore->addError('ingest_folder', 'Illegal ingest folder specified.'); // @translate
continue;
}

$isAbsolutePathInsideDir = $this->directory && strpos($ingestFolder, $this->directory) === 0;
$folder = $isAbsolutePathInsideDir
? $ingestFolder
: $this->directory . DIRECTORY_SEPARATOR . $ingestFolder;
$fileinfo = new \SplFileInfo($folder);
$folder = $this->verifyFileOrDir($fileinfo, true);

if (is_null($folder)) {
// Set a clearer message in some cases.
if ($this->deleteFile && !$fileinfo->getPathInfo()->isWritable()) {
$errorStore->addError('ingest_folder', new Message(
'Ingest folder "%s" is not writeable but the config requires deletion after upload.', // @translate
$ingestFolder
));
} elseif (!$fileinfo->isDir()) {
$errorStore->addError('ingest_folder', new Message(
'Invalid ingest folder "%s" specified: not a directory', // @translate
$ingestFolder
));
} else {
$errorStore->addError('ingest_folder', new Message(
'Invalid ingest folder "%s" specified: incorrect path or insufficient permissions', // @translate
$ingestFolder
));
}
continue;
}

$listFiles = $this->listFiles($folder, !empty($dataMedia['ingest_folder_recursively']));
if (!count($listFiles)) {
$errorStore->addError('ingest_folder', new Message(
'Ingest folder "%s" is empty.', // @translate
$ingestFolder
));
continue;
}

// Convert the media to a list of media for the item hydration.
// Remove the added media folder from list of media.
array_pop($newDataMedias);
foreach ($listFiles as $filepath) {
$dataMedia['ingest_filename'] = $filepath;
$newDataMedias[] = $dataMedia;
}
}
$data['o:media'] = $newDataMedias;
$request->setContent($data);
}

/**
* Get all files available to sideload from a folder inside the main dir.
*
* @return array List of filepaths relative to the main directory.
*/
protected function listFiles(string $directory, bool $recursive = false): array
{
$dir = new \SplFileInfo($directory);
if (!$dir->isDir() || !$dir->isReadable() || !$dir->isExecutable()) {
return [];
}

// Check if the dir is inside main directory: don't import root files.
$directory = $this->verifyFileOrDir($dir, true);
if (is_null($directory)) {
return [];
}

$listFiles = [];

// To simplify sort.
$listRootFiles = [];

$lengthDir = strlen($this->directory) + 1;
if ($recursive) {
$dir = new \RecursiveDirectoryIterator($directory);
// Prevent UnexpectedValueException "Permission denied" by excluding
// directories that are not executable or readable.
$dir = new \RecursiveCallbackFilterIterator($dir, function ($current, $key, $iterator) {
if ($iterator->isDir() && (!$iterator->isExecutable() || !$iterator->isReadable())) {
return false;
}
return true;
});
$iterator = new \RecursiveIteratorIterator($dir);
/** @var \SplFileInfo $file */
foreach ($iterator as $filepath => $file) {
if ($this->verifyFileOrDir($file)) {
// For security, don't display the full path to the user.
$relativePath = substr($filepath, $lengthDir);
// Use keys for quicker process on big directories.
$listFiles[$relativePath] = null;
if (pathinfo($filepath, PATHINFO_DIRNAME) === $directory) {
$listRootFiles[$relativePath] = null;
}
}
}
} else {
$iterator = new \DirectoryIterator($dir);
/** @var \DirectoryIterator $file */
foreach ($iterator as $file) {
$filepath = $this->verifyFileOrDir($file);
if (!is_null($filepath)) {
// For security, don't display the full path to the user.
$relativePath = substr($filepath, $lengthDir);
// Use keys for quicker process on big directories.
$listFiles[$relativePath] = null;
}
}
}

// Don't mix directories and files. List root files, then sub-folders.
$listFiles = array_keys($listFiles);
natcasesort($listFiles);
$listRootFiles = array_keys($listRootFiles);
natcasesort($listRootFiles);
return array_values(array_unique(array_merge($listRootFiles, $listFiles)));
}

/**
* Verify the passed file or directory.
*
* Working off the "real" base directory and "real" filepath: both must
* exist and have sufficient permissions; the filepath must begin with the
* base directory path to avoid problems with symlinks; the base directory
* must be server-writable to delete the file; and the file must be a
* readable regular file or directory.
*
* @param \SplFileInfo $fileinfo
* @return string|null The real file path or null if the file is invalid.
*
* @todo Factorize with \FileSideload\Media\Ingester\SideloadDir::verifyFileOrDir()
Daniel-KM marked this conversation as resolved.
Show resolved Hide resolved
*/
protected function verifyFileOrDir(\SplFileInfo $fileinfo, bool $isDir = false): ?string
{
if (false === $this->directory) {
return null;
}
$realPath = $fileinfo->getRealPath();
if (false === $realPath) {
return null;
}
if ($realPath === $this->directory) {
return null;
}
if (0 !== strpos($realPath, $this->directory)) {
return null;
}
if ($this->deleteFile && !$fileinfo->getPathInfo()->isWritable()) {
return null;
}
if (!$fileinfo->isReadable()) {
return null;
}
if ($isDir) {
if (!$fileinfo->isDir() || !$fileinfo->isExecutable()) {
return null;
}
} elseif (!$fileinfo->isFile()) {
return null;
}
return $realPath;
}
}
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# FileSideload

This module provides functionality to "sideload" files (ingesting files that are already on the server). It will allow users to batch-add many files at once to their repository, rather than uploading the files individually. It also will enable users to circumvent server file size restrictions that limit the capacity of web-form upload.
Files can be selected individually or as a set at the folder level, for example all images of a scanned manuscript.

See the [Omeka S user manual](http://omeka.org/s/docs/user-manual/modules/filesideload/) for user documentation.

Expand Down
1 change: 1 addition & 0 deletions config/module.config.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
'media_ingesters' => [
'factories' => [
'sideload' => Service\MediaIngesterSideloadFactory::class,
'sideload_dir' => Service\MediaIngesterSideloadDirFactory::class,
],
],
'translator' => [
Expand Down
6 changes: 3 additions & 3 deletions config/module.ini
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[info]
version = "1.4.0"
omeka_version_constraint = "^3.0.0"
name = "File Sideload"
description = "Add files that are already on your server to items."
author = "Omeka Team"
name = "File and Folder Sideload"
description = "Add files and folders of files that are already on your server to items."
author = "Omeka Team and Daniel Berthereau"
author_link = "https://omeka.org/"
configurable = true
module_link = "https://omeka.org/s/docs/user-manual/modules/filesideload/"
Expand Down
2 changes: 1 addition & 1 deletion src/Media/Ingester/Sideload.php
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ public function ingest(Media $media, Request $request, ErrorStore $errorStore)
{
$data = $request->getContent();
if (!isset($data['ingest_filename'])) {
$errorStore->addError('ingest_filename', 'No ingest filename specified'); // @translate;
$errorStore->addError('ingest_filename', 'No ingest filename specified'); // @translate
return;
}

Expand Down
Loading