Skip to content

Commit

Permalink
discoverygarden#69: add option to cache EAD HTML transformations to disk
Browse files Browse the repository at this point in the history
  • Loading branch information
ctgraham committed Nov 2, 2021
1 parent 3b738dd commit 8ba48fa
Show file tree
Hide file tree
Showing 6 changed files with 186 additions and 4 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ In `charlie`, we have three logical containers:

Do note that the code tries not to make any assumptions about the numbering of boxes or folders. Folders could either be numbered sequentially across boxes (in which case specifying a range of folders could make sense when specifying a range of boxes) or specific to a box. Additionally, pluralization of types is largely ignored.

### Q. What does the EAD caching feature do?

A. When enabled, the EAD HTML caching will perform the XSLT transform of the EAD to HTML and then cache the resulting HTML to a temporary file, for later reuse. This file will be written into Drupal's Temporary directory, and the last modification time will be used to gauge whether the cache expiration has been exceeded. If Islandora Solr Search is enabled, Drupal's cron wil pre-cache any EADs.

## Maintainers/Sponsors
Current maintainers:

Expand Down
22 changes: 22 additions & 0 deletions includes/admin.form.inc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ function islandora_manuscript_admin_settings_form(array $form, array &$form_stat
'#default_value' => variable_get('islandora_manuscript_metadata_display', FALSE),
),
),
'islandora_manuscript_ead_caching' => array(
'#type' => 'textfield',
'#title' => t('Cache HTML rendering of EAD'),
'#description' => t("The length of time to cache the HTML rendering of EAD, e.g. \"+90 minutes\"."),
'#default_value' => variable_get('islandora_manuscript_ead_caching', ''),
'#element_validate' => array('islandora_manuscript_validate_strtotime'),
'#size' => 30,
),
);

// Solr field containing the parent book PID.
Expand Down Expand Up @@ -96,6 +104,20 @@ function islandora_manuscript_admin_settings_form(array $form, array &$form_stat
return system_settings_form($form);
}

/**
* Check if the form element is parseable by strtotime
*
* @param array $element
* The element to check.
* @param array $form_state
* The Drupal form state.
*/
function islandora_manuscript_validate_strtotime($element, &$form_state) {
if (!empty($element['#value']) && (!is_numeric(strtotime($element['#value'])) || strtotime($element['#value']) < 0)) {
form_error($element, t('The "!name" option must contain a valid value. You may either leave the text field empty or enter a string like "+30 minutes", "+4 hours", or "+1 week 2 days 4 hours 2 seconds".', array('!name' => t($element['#title']))));
}
}

/**
* Check if the required resources are enabled.
*
Expand Down
82 changes: 79 additions & 3 deletions includes/ead_html.inc
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ function islandora_manuscript_preprocess_ead_display_variables(&$variables) {
* This function populates:
* - processor: The XSLTProcessor instance which was used.
* - markup_doc: A DOMDocument containing the markup to output, after
* this function has run.
* this function has run, iff not cached.
*/
function islandora_manuscript_process_ead_display_variables(&$variables) {
$variables['processor'] = $proc = new XSLTProcessor();
Expand All @@ -61,8 +61,19 @@ function islandora_manuscript_process_ead_display_variables(&$variables) {
$proc->setParameter($namespace_uri, $parameters);
}
$proc->registerPhpFunctions($variables['xslt_functions']);
$variables['markup_doc'] = $proc->transformToDoc($variables['doc']);
$variables['rendered_ead_html'] = $variables['markup_doc']->saveXML($variables['markup_doc']->documentElement);
$variables['rendered_ead_html'] = islandora_manuscript_getcache_eadhtml($variables['object']->id);
if (!$variables['rendered_ead_html']) {
// TODO: handle this failure more elegantly
// Pending that, write a default template out to the cache so that when we die unexpectedly in $proc->transformToDoc($variables['doc'])
// we don't pick up and retry this same object again immediately
$failureContent = theme('islandora_manuscript_ead_display_failure', $variables);
islandora_manuscript_setcache_eadhtml($variables['object']->id, $failureContent);
$variables['markup_doc'] = $proc->transformToDoc($variables['doc']);
$variables['rendered_ead_html'] = $variables['markup_doc']->saveXML($variables['markup_doc']->documentElement);
islandora_manuscript_setcache_eadhtml($variables['object']->id, $variables['rendered_ead_html']);
} else {
$variables['markup_doc'] = null;
}
}


Expand Down Expand Up @@ -287,3 +298,68 @@ function islandora_manuscript_build_flat_subfile_query(array $containers) {
// Merge down to single array.
return empty($parts) ? array() : call_user_func_array('array_merge', $parts);
}

/**
* Return the path to the cache of EAD HTML files
* @return string The path to the files
*/
function islandora_manuscript_cache_eadhtml_path() {
return file_directory_temp() . DIRECTORY_SEPARATOR . 'islandora_manuscript_eadhtml' . DIRECTORY_SEPARATOR;
}

/**
* Return the suffix to the cached EAD HTML files
* @return string The suffix to the files
*/
function islandora_manuscript_cache_eadhtml_suffix() {
return '.cache';
}


/**
* Get a rendered EAD to HTML transform, indexed by object id
*
* @param string $objectid The object's PID
* @return string|boolean The HTML result of the transformation, or false if none
*/
function islandora_manuscript_getcache_eadhtml($objectid) {
if (!variable_get('islandora_manuscript_ead_caching', false)) {
return false;
}
$cached_output = false;
$cachepath = islandora_manuscript_cache_eadhtml_path();
$cachekey = md5($objectid).islandora_manuscript_cache_eadhtml_suffix();
if (file_exists($cachepath.$cachekey)) {
$updated = filemtime($cachepath.$cachekey);
if ($updated + strtotime(variable_get('islandora_manuscript_ead_caching', '')) - time() > time()) {
$cache = file_get_contents($cachepath.$cachekey);
if (isset($cache)) {
$cached_output = $cache;
}
}
}
return $cached_output;
}

/**
* Cache a rendered EAD to HTML transform, indexed by object id
* We are caching into the temp directory instead of using Drupal's
* native cache function because the HTML will probably exceed size limits
* for the native cache configuration.
*
* @param string $objectid The object's PID
* @param string $html The HTML result of the XSLT transform
* @return boolean Success
*/
function islandora_manuscript_setcache_eadhtml($objectid, $html) {
if (!variable_get('islandora_manuscript_ead_caching', false)) {
return false;
}
$cachepath = islandora_manuscript_cache_eadhtml_path();
if (!file_exists($cachepath)) {
mkdir($cachepath);
}
$cachekey = md5($objectid).islandora_manuscript_cache_eadhtml_suffix();
return (boolean) file_put_contents($cachepath.$cachekey, $html);
}

69 changes: 69 additions & 0 deletions islandora_manuscript.module
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,14 @@ function islandora_manuscript_theme() {
'file' => 'theme/theme.inc',
'template' => 'theme/islandora-manuscript-ead-display',
),
'islandora_manuscript_ead_display_failure' => array(
'variables' => array(
'object' => NULL,
),
'file' => 'theme/theme.inc',
'template' => 'theme/islandora-manuscript-ead-display-failure',
),

);
}

Expand Down Expand Up @@ -644,3 +652,64 @@ EOQ;
),
);
}

/**
* Implements hook_cron().
*/
function islandora_manuscript_cron() {
if (variable_get('islandora_manuscript_ead_caching', false)) {
module_load_include('inc', 'islandora_manuscript', 'includes/ead_html');
$cachepath = islandora_manuscript_cache_eadhtml_path();
$files = glob($cachepath.'*'.islandora_manuscript_cache_eadhtml_suffix());
foreach ($files as $file) {
$updated = filemtime($file);
if ($updated + strtotime(variable_get('islandora_manuscript_ead_caching', '')) - time() < time()) {
unlink($file);
}
}
if (module_exists('islandora_solr')) {
module_load_include('inc', 'islandora_solr', 'includes/utilities');
// Find any findingAidCModel objects with an EAD datastream
$qp = new IslandoraSolrQueryProcessor();
$qp->buildQuery('fedora_datastreams_ms:EAD');
$qp->solrParams['fq'][] = format_string('!field:("info:fedora/!pid" OR "!pid")', array(
'!field' => islandora_solr_lesser_escape(variable_get('islandora_solr_content_model_field', 'RELS_EXT_hasModel_uri_ms')),
'!pid' => 'islandora:findingAidCModel',
));
$qp->solrParams['fl'] = implode(',', array(
'PID',
));
$qp->executeQuery();
// queue this object, if a cachefile does not already exist
$toCache = array();
$pageIndex = 0;
$numFound = $qp->islandoraSolrResult['response']['numFound'];
while ($pageIndex < $numFound - 1) {
foreach ($qp->islandoraSolrResult['response']['objects'] as $result) {
$pageIndex++;
if (!islandora_manuscript_getcache_eadhtml($result['PID'])) {
$toCache[$result['PID']] = $result['PID'];
}
}
if ($pageIndex < $numFound - 1) {
$qp->solrStart = $pageIndex;
$qp->solrLimit = $numFound;
$qp->executeQuery();
$numFound = $qp->islandoraSolrResult['response']['numFound'];
}
}
unset($qp);
// cache each queued object
foreach ($toCache as $pid) {

$themevars = islandora_manuscript_theme();
$vars = $themevars['islandora_manuscript_ead_display']['variables'];
$vars['object'] = islandora_object_load($pid);
islandora_manuscript_preprocess_ead_display_variables($vars);
islandora_manuscript_process_ead_display_variables($vars);
islandora_manuscript_setcache_eadhtml($pid, $vars['rendered_ead_html']);
unset($vars);
}
}
}
}
11 changes: 11 additions & 0 deletions theme/islandora-manuscript-ead-display-failure.tpl.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?php
/**
* This file will be temporarily used as the cache of EAD to HTML transformation
* and will be overwritten when the transform succeeds. The presence of this content
* in the cache directory means that the transform failed with a PHP Fatal error,
* such as memory limits, time limits, etc.
*
* If you want to render a error message to the user, do so here.
*/
?>
<!-- out of memory error: <?php echo $object->id; ?> -->
2 changes: 1 addition & 1 deletion theme/islandora-manuscript-ead-display.tpl.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* - $xslt_doc: A DOMDocument containing the parsed XSLT to run.
* - $processor: The XSLTProcessor instance which was used.
* - $markup_doc: A DOMDocument containing the markup to output, after
* this function has run.
* this function has run. If the cache was used, this will be null!
* - $rendered_ead_html: The rendered HTML from the $markup_doc transform
*/
?>
Expand Down

0 comments on commit 8ba48fa

Please sign in to comment.