Skip to content

Process Documents

Process Documents #243

Workflow file for this run

name: Process Documents
on:
workflow_dispatch:
schedule:
- cron: '10 0 * * *' # Runs every day at midnight
jobs:
process_docs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
sudo apt-get install -y jq
# Install any other necessary dependencies
- name: Fetch documents and process in batches
env:
NETLIFY_FUNCTION_URL: ${{ secrets.NETLIFY_FUNCTION_URL }}
run: |
# Make a call to your getAllDocs Netlify function
response=$(curl -s "${NETLIFY_FUNCTION_URL}/getAllDocs")
echo "Response from getAllDocs: $response"
# Check if the response is valid JSON
if ! echo "$response" | jq -e . >/dev/null 2>&1; then
echo "Error: Invalid JSON response from getAllDocs function"
exit 1
fi
docs=$(echo "$response" | jq -c '.')
# Break documents into batches
batch_size=1
batches=$(echo "$docs" | jq -c --argjson bs "$batch_size" 'range(0; length; $bs) as $i | .[$i:$i+$bs]')
# Process each batch
while IFS= read -r batch; do
# Call checkStatusChanges Netlify function
statusChangeResponse=$(curl -s -X POST -H "Content-Type: application/json" -d "{\"docs\": $batch, \"test\": false}" "${NETLIFY_FUNCTION_URL}/checkStatusChanges" | jq -r '.')
# Call checkRecentChanges Netlify function
recentChangesResponse=$(curl -s -X POST -H "Content-Type: application/json" -d "{\"docs\": $batch}" "${NETLIFY_FUNCTION_URL}/checkRecentChanges" | jq -r '.')
echo "Processed results for batch: {\"statusChangeResponse\": $statusChangeResponse, \"recentChangesResponse\": $recentChangesResponse }"
# Process status changes
if [ -n "$statusChangeResponse" ] && [ "$statusChangeResponse" != "[]" ]; then
# Delete first copy from Google Drive for changed docs
for changedDocId in $(echo "$statusChangeResponse" | jq -r '.[]'); do
changedDoc=$(echo "$batch" | jq -r --arg id "$changedDocId" 'map(select(.google_id == $id)) | .[0]')
if [ -n "$changedDoc" ]; then
if [ "$(echo "$changedDoc" | jq -r '.all_copy_ids | length')" -gt 2 ]; then
firstCopyId=$(echo "$changedDoc" | jq -r '.all_copy_ids[0]')
curl -s -X POST -H "Content-Type: application/json" -d "{\"fileId\": \"$firstCopyId\"}" "${NETLIFY_FUNCTION_URL}/deleteFileFromDrive"
fi
fi
done
# Call copyChangedDocs Netlify function
curl -s -X POST -H "Content-Type: application/json" -d "{\"docs\": $batch, \"statusChangeResponse\": $statusChangeResponse, \"test\": false}" "${NETLIFY_FUNCTION_URL}/copyChangedDocs"
# Call getDocText Netlify function
curl -s -X POST -H "Content-Type: application/json" -d "{\"docs\": $batch, \"statusChangeResponse\": $statusChangeResponse, \"test\": false}" "${NETLIFY_FUNCTION_URL}/getDocText"
fi
# Call commitNonExistingDocs Netlify function
curl -s -X POST -H "Content-Type: application/json" -d "{\"docs\": $batch, \"test\": false}" "${NETLIFY_FUNCTION_URL}/commitNonExistingDocs"
# Call commitNonExistingComments Netlify function
curl -s -X POST -H "Content-Type: application/json" -d "{\"docs\": $batch, \"test\": false}" "${NETLIFY_FUNCTION_URL}/commitNonExistingComments"
# Call commitNonExistingDocsWithComments Netlify function
curl -s -X POST -H "Content-Type: application/json" -d "{\"docs\": $batch, \"test\": false}" "${NETLIFY_FUNCTION_URL}/commitNonExistingDocsWithComments"
# Process recent changes
if [ -n "$recentChangesResponse" ] && [ "$recentChangesResponse" != "[]" ]; then
# Call getDocBodyAndCommitToGitHub Netlify function
curl -s -X POST -H "Content-Type: application/json" -d "{\"docs\": $batch, \"recentChangesResponse\": $recentChangesResponse, \"test\": false}" "${NETLIFY_FUNCTION_URL}/getDocBodyAndCommitToGitHub"
# Call getDocCommentsAndCommitToGitHub Netlify function
curl -s -X POST -H "Content-Type: application/json" -d "{\"docs\": $batch, \"recentChangesResponse\": $recentChangesResponse, \"test\": false}" "${NETLIFY_FUNCTION_URL}/getDocCommentsAndCommitToGitHub"
# Call getDocBodyAndCommentsAndCommitToGitHub Netlify function
curl -s -X POST -H "Content-Type: application/json" -d "{\"docs\": $batch, \"recentChangesResponse\": $recentChangesResponse, \"test\": false}" "${NETLIFY_FUNCTION_URL}/getDocBodyAndCommentsAndCommitToGitHub"
fi
done <<< "$batches"