Skip to content

Commit

Permalink
Revert "Segment Large CSV Batch Processes (#1475)" (#1491)
Browse files Browse the repository at this point in the history
This reverts commit 2a311eb.
  • Loading branch information
K8Sewell authored Feb 20, 2025
1 parent 0aa6487 commit 823dadc
Show file tree
Hide file tree
Showing 19 changed files with 36 additions and 311 deletions.
5 changes: 2 additions & 3 deletions app/jobs/create_new_parent_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ def default_priority
-50
end

def perform(batch_process, start_index = 0)
index = batch_process.create_new_parent_csv(start_index)
CreateNewParentJob.perform_later(batch_process, index) if !index.nil? && index != -1 && index > BatchProcess::BATCH_LIMIT
def perform(batch_process)
batch_process.create_new_parent_csv
end
end
5 changes: 2 additions & 3 deletions app/jobs/delete_parent_objects_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ def default_priority
-50
end

def perform(batch_process, start_index = 0)
index = batch_process.delete_parent_objects(start_index)
DeleteParentObjectsJob.perform_later(batch_process, index) if !index.nil? && index != -1 && index > BatchProcess::BATCH_LIMIT
def perform(batch_process)
batch_process.delete_parent_objects
end
end
5 changes: 2 additions & 3 deletions app/jobs/reassociate_child_oids_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@ def default_priority
50
end

def perform(batch_process, start_index = 0)
index = batch_process.reassociate_child_oids(start_index)
ReassociateChildOidsJob.perform_later(batch_process, index) if !index.nil? && index != -1 && index > BatchProcess::BATCH_LIMIT
def perform(batch_process)
batch_process.reassociate_child_oids
rescue => e
batch_process.batch_processing_event("ReassociateChildOidsJob failed due to #{e.message}", "failed")
end
Expand Down
5 changes: 2 additions & 3 deletions app/jobs/recreate_child_oid_ptiffs_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ def default_priority
9
end

def perform(batch_process, start_index = 0)
index = batch_process.recreate_child_oid_ptiffs(start_index)
RecreateChildOidPtiffsJob.perform_later(batch_process, index) if !index.nil? && index != -1 && index > BatchProcess::BATCH_LIMIT
def perform(batch_process)
batch_process.recreate_child_oid_ptiffs
end
end
5 changes: 2 additions & 3 deletions app/jobs/update_parent_objects_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ def default_priority
50
end

def perform(batch_process, start_index = 0)
index = batch_process.update_parent_objects(start_index)
UpdateParentObjectsJob.perform_later(batch_process, index) if !index.nil? && index != -1 && index > BatchProcess::BATCH_LIMIT
def perform(batch_process)
batch_process.update_parent_objects
end
end
1 change: 0 additions & 1 deletion app/models/batch_process.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ class BatchProcess < ApplicationRecord # rubocop:disable Metrics/ClassLength
has_many :child_objects, through: :batch_connections, source_type: "ChildObject", source: :connectable

CSV_MAXIMUM_ENTRIES = 10_000
BATCH_LIMIT = 50

# SHARED BY ALL BATCH ACTIONS: ------------------------------------------------------------------- #

Expand Down
5 changes: 1 addition & 4 deletions app/models/concerns/create_parent_object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,10 @@ module CreateParentObject
# rubocop:disable Metrics/CyclomaticComplexity
# rubocop:disable Metrics/BlockLength
# rubocop:disable Layout/LineLength
def create_new_parent_csv(start_index = 0)
def create_new_parent_csv
self.admin_set = ''
sets = admin_set
parsed_csv.each_with_index do |row, index|
next if start_index > index
if row['digital_object_source'].present? && row['preservica_uri'].present? && !row['preservica_uri'].blank?
begin
parent_object = CsvRowParentService.new(row, index, current_ability, user).parent_object
Expand Down Expand Up @@ -117,9 +116,7 @@ def create_new_parent_csv(start_index = 0)
rescue StandardError => e
batch_processing_event("Skipping row [#{index + 2}] Unable to save parent: #{e.message}.", "Skipped Row")
end
return index + 1 if index + 1 - start_index > BatchProcess::BATCH_LIMIT
end
-1
end
# rubocop:enable Metrics/AbcSize
# rubocop:enable Metrics/MethodLength
Expand Down
7 changes: 1 addition & 6 deletions app/models/concerns/deletable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@ module Deletable
# DELETE PARENT OBJECTS: ------------------------------------------------------------------------ #

# DELETES PARENT OBJECTS FROM INGESTED CSV
# rubocop:disable Metrics/MethodLength
def delete_parent_objects(start_index = 0)
def delete_parent_objects
self.admin_set = ''
sets = admin_set
parsed_csv.each_with_index do |row, index|
next if start_index > index
oid = row['oid']
action = row['action']
metadata_source = row['source']
Expand All @@ -24,11 +22,8 @@ def delete_parent_objects(start_index = 0)
setup_for_background_jobs(parent_object, metadata_source)
parent_object.destroy!
parent_object.processing_event("Parent #{parent_object.oid} has been deleted", 'deleted')
return index + 1 if index + 1 - start_index > BatchProcess::BATCH_LIMIT
end
-1
end
# rubocop:enable Metrics/MethodLength

# CHECKS TO SEE IF USER HAS ABILITY TO DELETE OBJECTS:
def deletable_parent_object(oid, index)
Expand Down
15 changes: 4 additions & 11 deletions app/models/concerns/reassociatable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,16 @@ module Reassociatable
BLANK_VALUE = "_blank_"

# triggers the reassociate process
def reassociate_child_oids(start_index = 0)
def reassociate_child_oids
return unless batch_action == "reassociate child oids"
parents_needing_update, parent_destination_map, index = update_child_objects(start_index)
parents_needing_update, parent_destination_map = update_child_objects
update_related_parent_objects(parents_needing_update, parent_destination_map)
index
end

# finds which parents are needed to update
# rubocop:disable Metrics/AbcSize
# rubocop:disable Metrics/MethodLength
# rubocop:disable Metrics/CyclomaticComplexity
# rubocop:disable Metrics/PerceivedComplexity
def update_child_objects(start_index)
def update_child_objects
self.admin_set = ''
sets = admin_set
return unless batch_action == "reassociate child oids"
Expand All @@ -28,7 +25,6 @@ def update_child_objects(start_index)
parent_destination_map = {}

parsed_csv.each_with_index do |row, index|
next if start_index > index
co = load_child(index, row["child_oid"].to_i)
po = load_parent(index, row["parent_oid"].to_i)
next unless co.present? && po.present?
Expand All @@ -49,14 +45,11 @@ def update_child_objects(start_index)

values_to_update = check_headers(child_headers, row)
update_child_values(values_to_update, co, row, index)
return [parents_needing_update, parent_destination_map, index + 1] if index + 1 - start_index > BatchProcess::BATCH_LIMIT
end
[parents_needing_update, parent_destination_map, -1]
[parents_needing_update, parent_destination_map]
end
# rubocop:enable Metrics/AbcSize
# rubocop:enable Metrics/MethodLength
# rubocop:enable Metrics/CyclomaticComplexity
# rubocop:enable Metrics/PerceivedComplexity

# verifies headers are included. child headers found in csv_exportable:90
def check_headers(headers, row)
Expand Down
5 changes: 1 addition & 4 deletions app/models/concerns/recreate_child_ptiff.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,11 @@ module RecreateChildPtiff
# rubocop:disable Metrics/MethodLength
# rubocop:disable Metrics/CyclomaticComplexity
# rubocop:disable Metrics/PerceivedComplexity
def recreate_child_oid_ptiffs(start_index = 0)
def recreate_child_oid_ptiffs
parents = Set[]
self.admin_set = ''
sets = admin_set
oids.each_with_index do |oid, index|
next if start_index > index
child_object = ChildObject.find_by_oid(oid.to_i)
unless child_object
batch_processing_event("Skipping row [#{index + 2}] with unknown Child: #{oid}", 'Skipped Row')
Expand All @@ -36,9 +35,7 @@ def recreate_child_oid_ptiffs(start_index = 0)
GeneratePtiffJob.perform_later(child_object, self) if file_size <= SetupMetadataJob::FIVE_HUNDRED_MB
attach_item(child_object)
child_object.processing_event("Ptiff Queued", "ptiff-queued")
return index + 1 if index + 1 - start_index > BatchProcess::BATCH_LIMIT
end
-1
end
# rubocop:enable Metrics/AbcSize
# rubocop:enable Metrics/MethodLength
Expand Down
11 changes: 4 additions & 7 deletions app/models/concerns/updatable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,11 @@ def update_child_objects_caption
end

# rubocop:disable Metrics/BlockLength
def update_parent_objects(start_index = 0)
def update_parent_objects
self.admin_set = ''
sets = admin_set
return unless batch_action == 'update parent objects'
return unless batch_action == "update parent objects"
parsed_csv.each_with_index do |row, index|
next if start_index > index
oid = row['oid'] unless ['oid'].nil?
redirect = row['redirect_to'] unless ['redirect_to'].nil?
parent_object = updatable_parent_object(oid, index)
Expand All @@ -88,10 +87,10 @@ def update_parent_objects(start_index = 0)
setup_for_background_jobs(parent_object, metadata_source)
parent_object.admin_set = admin_set unless admin_set.nil?

if row['visibility'] == 'Open with Permission' && row['permission_set_key'].blank?
if row['visibility'] == "Open with Permission" && row['permission_set_key'].blank?
batch_processing_event("Skipping row [#{index + 2}]. Process failed. Permission Set missing from CSV.", 'Skipped Row')
next
elsif row['visibility'] == 'Open with Permission' && row['permission_set_key'] != parent_object&.permission_set&.key
elsif row['visibility'] == "Open with Permission" && row['permission_set_key'] != parent_object&.permission_set&.key
permission_set = OpenWithPermission::PermissionSet.find_by(key: row['permission_set_key'])
if permission_set.nil?
batch_processing_event("Skipping row [#{index + 2}]. Process failed. Permission Set missing or nonexistent.", 'Skipped Row')
Expand All @@ -114,9 +113,7 @@ def update_parent_objects(start_index = 0)
sync_from_preservica if parent_object.digital_object_source == 'Preservica'

processing_event_for_parent(parent_object)
return index + 1 if index + 1 - start_index > BatchProcess::BATCH_LIMIT
end
-1
end
# rubocop:enable Metrics/CyclomaticComplexity
# rubocop:enable Metrics/PerceivedComplexity
Expand Down
5 changes: 0 additions & 5 deletions spec/fixtures/csv/create_many_parent_fixture_ids.csv

This file was deleted.

5 changes: 0 additions & 5 deletions spec/fixtures/csv/delete_many_parent_fixture_ids.csv

This file was deleted.

5 changes: 0 additions & 5 deletions spec/fixtures/csv/reassociate_many_child_objects.csv

This file was deleted.

44 changes: 0 additions & 44 deletions spec/jobs/create_new_parent_job_spec.rb

This file was deleted.

54 changes: 0 additions & 54 deletions spec/jobs/delete_parent_job_spec.rb

This file was deleted.

Loading

0 comments on commit 823dadc

Please sign in to comment.