Skip to content

Commit

Permalink
Attempt to get version of cron that updates and then pulls from cache…
Browse files Browse the repository at this point in the history
… working
  • Loading branch information
kcmcg committed Dec 28, 2023
1 parent 7f5aea0 commit bcfe74b
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 46 deletions.
108 changes: 64 additions & 44 deletions PmiRdrModule.php
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,16 @@ public function resume_cache_or_restart($rdrUrl) {
return $returnValue;
}

public function getCachedDataByURL($rdrUrl) {
$cacheSetting = $this->getCacheSettingByUrl($rdrUrl);

$cachedData = $this->getSystemSetting($cacheSetting);

$cachedData = json_decode($cachedData, true);

return $cachedData;
}

public function getCacheSettingByUrl($rdrUrl) {
$cachedSnapshots = $this->getSystemSetting(self::RDR_CACHE_SNAPSHOTS);
$cachedSnapshots = json_decode($cachedSnapshots, true);
Expand Down Expand Up @@ -377,7 +387,7 @@ public function fetchNextSnapshots($rdrUrl, $currentSnapshots) {
$snapshotSetting = $this->getCacheSettingByUrl($rdrUrl);

$storedSnapshots = $this->getSystemSetting($snapshotSetting);
$storedSnapshots = json_decode($storedSnapshots, true) ?? [];
$storedSnapshots = json_decode($storedSnapshots, true) ?: [];

$latestSnapshot = max($currentSnapshots);

Expand All @@ -393,11 +403,17 @@ public function fetchNextSnapshots($rdrUrl, $currentSnapshots) {
$storedSnapshots = json_encode($storedSnapshots);
$this->setSystemSetting($snapshotSetting, $storedSnapshots);

## TODO Not currently sure how to check if last snapshot, so just always assuming last
## It's possible the API changed and there's no longer away to limit count of responses
$isLastSnapshot = true;

if($isLastSnapshot) {
$timeout = ((int)$this->getSystemSetting('timeout_duration')) ?: 24*60*60;
$this->log(self::RDR_CACHE_STATUS,[
self::RDR_CACHE_SNAPSHOTS => $currentSnapshots,
self::RDR_CACHE_STATUS => "done",
"url" => $rdrUrl
"url" => $rdrUrl,
"timeout" => time() + $timeout
]);

## Restore $_GET PID
Expand Down Expand Up @@ -431,36 +447,65 @@ public function rdrPullSnapshotsFromAPI($rdrUrl, $debugApi = false) {
if($debugApi) {
echo "Debug Test<Br />";
echo "Results Details: ".$results->getStatusCode()."<br />";
echo "<pre>".htmlspecialchars(var_export($decodedResults,true))."</pre><br />";
echo "Total Records Pulled: ".count($decodedResults)."<br />";
if(count($decodedResults) > 1000) {
$outputResults = array_slice($decodedResults, 0, 1000);
}
else {
$outputResults = $decodedResults;
}

echo "<pre>".htmlspecialchars(var_export($outputResults,true))."</pre><br />";
}

return $decodedResults;
}

## RDR Cron method to pull data in
public function rdr_pull($debugApi = false,$singleRecord = false) {
error_log("RDR: Ran pull cron");
$this->log("RDR: Ran pull cron");

if(is_array($debugApi)) {
## When run from the cron, an array is passed in here
$debugApi = false;
}

/** @var \Vanderbilt\GSuiteIntegration\GSuiteIntegration $module */
$client = $this->getGoogleClient();

/** @var GuzzleHttp\ClientInterface $httpClient */
$httpClient = $client->authorize();

$cronBeginTime = microtime(true);
$projectList = $this->framework->getProjectsWithModuleEnabled();

## Start by looping through all projects with this module enabled
## Start by looping through all projects with this module enabled to update cache
foreach($projectList as $projectId) {
## If a null or empty project ID gets passed in, skip it
if(!$projectId) {
continue;
}


$allUrlsCached = true;

## Cache the cron results for this project,
## stop if over 90 seconds for single pull or 240 for whole cron
$rdrUrls = $module->getProjectSetting("rdr-urls");
$dataConnectionTypes = $this->getProjectSetting("rdr-connection-type",$projectId);
foreach($rdrUrls as $urlKey => $thisUrl) {
## Only processing pull connections here, also skip empty URLs
if($dataConnectionTypes[$urlKey] != "pull" || empty($thisUrl)) {
continue;
}

$startTime = microtime(true);

$cacheDone = $this->resume_cache_or_restart($thisUrl);
if(!$cacheDone) {
$allUrlsCached = false;
}

$endTime = microtime(true);
if(($endTime - $startTime) > 90 || ($endTime - $cronBeginTime) > 240) {
continue;
}
}

if($allUrlsCached) {
## Pull event ID and Arm ID from the \Project object for this project
$proj = new \Project($projectId);
$proj->loadEvents();
Expand All @@ -471,18 +516,16 @@ public function rdr_pull($debugApi = false,$singleRecord = false) {
$metadata = $this->getMetadata($projectId);

## Pull the module settings needed for import from this project
$rdrUrl = $this->getProjectSetting("rdr-urls",$projectId);
$dataMappingJson = $this->getProjectSetting("rdr-data-mapping-json",$projectId);
$dataMappingFields = $this->getProjectSetting("rdr-redcap-field-name",$projectId);
$dataMappingApiFields = $this->getProjectSetting("rdr-redcap-field-name",$projectId);
$apiRecordFields = $this->getProjectSetting("rdr-endpoint-record",$projectId);
// $redcapRecordFields = $this->getProjectSetting("rdr-record-field",$projectId);
$dataFormats = $this->getProjectSetting("rdr-data-format",$projectId);
$testingOnly = $this->getProjectSetting("rdr-test-only",$projectId);
$dataConnectionTypes = $this->getProjectSetting("rdr-connection-type",$projectId);

## Loop through each of the URLs this project is pointed to
foreach($rdrUrl as $urlKey => $thisUrl) {
foreach($rdrUrls as $urlKey => $thisUrl) {
## Only processing pull connections here, also skip empty URLs
if($dataConnectionTypes[$urlKey] != "pull" || empty($thisUrl)) {
continue;
Expand Down Expand Up @@ -514,38 +557,15 @@ public function rdr_pull($debugApi = false,$singleRecord = false) {

$recordIds = array_keys($recordList);
$maxRecordId = max($recordIds);

## Pull the data from the API and then decode it (assuming its JSON for now)
if($singleRecord) {
$results = $httpClient->get($thisUrl."?snapshot_id=".$singleRecord);
}
else if(count($recordIds) > 0) {
$results = $httpClient->get($thisUrl."?last_snapshot_id=".$maxRecordId);
}
else {
$results = $httpClient->get($thisUrl);
}

$decodedResults = json_decode($results->getBody()->getContents(),true);

## Export full API results if trying to debug
if($debugApi) {
echo "Debug Test<Br />";
echo "Results Details: ".$results->getStatusCode()."<br />";
echo "<pre>".htmlspecialchars(var_export($decodedResults,true))."</pre><br />";
continue;
}

## This value is set if an error is returned from the RDR
if($decodedResults["message"] != "") {
echo "Error getting results: received message \"".$decodedResults["message"]."\"<br />";
continue;
}


## TODO Need to have this pull from cache instead
$decodedResults = $this->getCachedDataByURL($thisUrl);

## Start looping through the data returned from the API (this is the "record" level)
foreach($decodedResults as $dataKey => $dataDetails) {
## This could be because an error message was received or the API data isn't formatted properly
if(!is_array($dataDetails)) {
## Or if not yet at $maxRecordId
if(!is_array($dataDetails) || $dataKey < $maxRecordId) {
continue;
}

Expand Down
5 changes: 5 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,11 @@
"key": "auth-user-email",
"name": "Authorized User to Perform GSuite Actions",
"type": "text"
},
{
"key": "timeout_duration",
"name": "How long to cache results for RDR URLs",
"type": "text"
}
],

Expand Down
4 changes: 2 additions & 2 deletions test_rdr.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@
<tr><td>Run RDR Pull With This Value:</td><td><input type='text' data-lpignore='true' value='".htmlspecialchars($_GET['pull_latest_record'])."' name='pull_latest_record' /></td></tr>
<tr><td>Workspace to pull:</td><td><input type='text' data-lpignore='true' value='".htmlspecialchars($_GET['pull_record'])."' name='pull_record' /></td></tr>
<tr><td>Check to run the cron:</td><td><input type='checkbox' data-lpignore='true' value='1' ".(empty($_GET['run_cron']) ? "" : "checked")." name='run_cron' /></td></tr>
<tr><td>Check to output debug:</td><td><input type='checkbox' data-lpignore='true' value='1' ".(empty($_GET['debug']) ? "" : "checked")." name='debug' /></td></tr>
<tr><td><input type='submit' value='Submit' /></td></tr>
</table></form>";

if($_GET['pull_latest_record']) {
$rdrUrls = $module->getProjectSetting("rdr-urls");
foreach($rdrUrls as $thisUrl) {
$thisUrl .= "?last_snapshot_id=".$_GET['pull_latest_record'];
$module->rdrPullSnapshotsFromAPI($thisUrl, true);
$module->rdrPullSnapshotsFromAPI($thisUrl, $_GET['debug']);
}
}

Expand Down

0 comments on commit bcfe74b

Please sign in to comment.