Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Segment Large CSV Batch Processes" #1491

Merged
merged 1 commit into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions app/jobs/create_new_parent_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ def default_priority
-50
end

def perform(batch_process, start_index = 0)
index = batch_process.create_new_parent_csv(start_index)
CreateNewParentJob.perform_later(batch_process, index) if !index.nil? && index != -1 && index > BatchProcess::BATCH_LIMIT
def perform(batch_process)
batch_process.create_new_parent_csv
end
end
5 changes: 2 additions & 3 deletions app/jobs/delete_parent_objects_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ def default_priority
-50
end

def perform(batch_process, start_index = 0)
index = batch_process.delete_parent_objects(start_index)
DeleteParentObjectsJob.perform_later(batch_process, index) if !index.nil? && index != -1 && index > BatchProcess::BATCH_LIMIT
def perform(batch_process)
batch_process.delete_parent_objects
end
end
5 changes: 2 additions & 3 deletions app/jobs/reassociate_child_oids_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@ def default_priority
50
end

def perform(batch_process, start_index = 0)
index = batch_process.reassociate_child_oids(start_index)
ReassociateChildOidsJob.perform_later(batch_process, index) if !index.nil? && index != -1 && index > BatchProcess::BATCH_LIMIT
def perform(batch_process)
batch_process.reassociate_child_oids
rescue => e
batch_process.batch_processing_event("ReassociateChildOidsJob failed due to #{e.message}", "failed")
end
Expand Down
5 changes: 2 additions & 3 deletions app/jobs/recreate_child_oid_ptiffs_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ def default_priority
9
end

def perform(batch_process, start_index = 0)
index = batch_process.recreate_child_oid_ptiffs(start_index)
RecreateChildOidPtiffsJob.perform_later(batch_process, index) if !index.nil? && index != -1 && index > BatchProcess::BATCH_LIMIT
def perform(batch_process)
batch_process.recreate_child_oid_ptiffs
end
end
5 changes: 2 additions & 3 deletions app/jobs/update_parent_objects_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ def default_priority
50
end

def perform(batch_process, start_index = 0)
index = batch_process.update_parent_objects(start_index)
UpdateParentObjectsJob.perform_later(batch_process, index) if !index.nil? && index != -1 && index > BatchProcess::BATCH_LIMIT
def perform(batch_process)
batch_process.update_parent_objects
end
end
1 change: 0 additions & 1 deletion app/models/batch_process.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ class BatchProcess < ApplicationRecord # rubocop:disable Metrics/ClassLength
has_many :child_objects, through: :batch_connections, source_type: "ChildObject", source: :connectable

CSV_MAXIMUM_ENTRIES = 10_000
BATCH_LIMIT = 50

# SHARED BY ALL BATCH ACTIONS: ------------------------------------------------------------------- #

Expand Down
5 changes: 1 addition & 4 deletions app/models/concerns/create_parent_object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,10 @@ module CreateParentObject
# rubocop:disable Metrics/CyclomaticComplexity
# rubocop:disable Metrics/BlockLength
# rubocop:disable Layout/LineLength
def create_new_parent_csv(start_index = 0)
def create_new_parent_csv
self.admin_set = ''
sets = admin_set
parsed_csv.each_with_index do |row, index|
next if start_index > index
if row['digital_object_source'].present? && row['preservica_uri'].present? && !row['preservica_uri'].blank?
begin
parent_object = CsvRowParentService.new(row, index, current_ability, user).parent_object
Expand Down Expand Up @@ -117,9 +116,7 @@ def create_new_parent_csv(start_index = 0)
rescue StandardError => e
batch_processing_event("Skipping row [#{index + 2}] Unable to save parent: #{e.message}.", "Skipped Row")
end
return index + 1 if index + 1 - start_index > BatchProcess::BATCH_LIMIT
end
-1
end
# rubocop:enable Metrics/AbcSize
# rubocop:enable Metrics/MethodLength
Expand Down
7 changes: 1 addition & 6 deletions app/models/concerns/deletable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@ module Deletable
# DELETE PARENT OBJECTS: ------------------------------------------------------------------------ #

# DELETES PARENT OBJECTS FROM INGESTED CSV
# rubocop:disable Metrics/MethodLength
def delete_parent_objects(start_index = 0)
def delete_parent_objects
self.admin_set = ''
sets = admin_set
parsed_csv.each_with_index do |row, index|
next if start_index > index
oid = row['oid']
action = row['action']
metadata_source = row['source']
Expand All @@ -24,11 +22,8 @@ def delete_parent_objects(start_index = 0)
setup_for_background_jobs(parent_object, metadata_source)
parent_object.destroy!
parent_object.processing_event("Parent #{parent_object.oid} has been deleted", 'deleted')
return index + 1 if index + 1 - start_index > BatchProcess::BATCH_LIMIT
end
-1
end
# rubocop:enable Metrics/MethodLength

# CHECKS TO SEE IF USER HAS ABILITY TO DELETE OBJECTS:
def deletable_parent_object(oid, index)
Expand Down
15 changes: 4 additions & 11 deletions app/models/concerns/reassociatable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,16 @@ module Reassociatable
BLANK_VALUE = "_blank_"

# triggers the reassociate process
def reassociate_child_oids(start_index = 0)
def reassociate_child_oids
return unless batch_action == "reassociate child oids"
parents_needing_update, parent_destination_map, index = update_child_objects(start_index)
parents_needing_update, parent_destination_map = update_child_objects
update_related_parent_objects(parents_needing_update, parent_destination_map)
index
end

# finds which parents are needed to update
# rubocop:disable Metrics/AbcSize
# rubocop:disable Metrics/MethodLength
# rubocop:disable Metrics/CyclomaticComplexity
# rubocop:disable Metrics/PerceivedComplexity
def update_child_objects(start_index)
def update_child_objects
self.admin_set = ''
sets = admin_set
return unless batch_action == "reassociate child oids"
Expand All @@ -28,7 +25,6 @@ def update_child_objects(start_index)
parent_destination_map = {}

parsed_csv.each_with_index do |row, index|
next if start_index > index
co = load_child(index, row["child_oid"].to_i)
po = load_parent(index, row["parent_oid"].to_i)
next unless co.present? && po.present?
Expand All @@ -49,14 +45,11 @@ def update_child_objects(start_index)

values_to_update = check_headers(child_headers, row)
update_child_values(values_to_update, co, row, index)
return [parents_needing_update, parent_destination_map, index + 1] if index + 1 - start_index > BatchProcess::BATCH_LIMIT
end
[parents_needing_update, parent_destination_map, -1]
[parents_needing_update, parent_destination_map]
end
# rubocop:enable Metrics/AbcSize
# rubocop:enable Metrics/MethodLength
# rubocop:enable Metrics/CyclomaticComplexity
# rubocop:enable Metrics/PerceivedComplexity

# verifies headers are included. child headers found in csv_exportable:90
def check_headers(headers, row)
Expand Down
5 changes: 1 addition & 4 deletions app/models/concerns/recreate_child_ptiff.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,11 @@ module RecreateChildPtiff
# rubocop:disable Metrics/MethodLength
# rubocop:disable Metrics/CyclomaticComplexity
# rubocop:disable Metrics/PerceivedComplexity
def recreate_child_oid_ptiffs(start_index = 0)
def recreate_child_oid_ptiffs
parents = Set[]
self.admin_set = ''
sets = admin_set
oids.each_with_index do |oid, index|
next if start_index > index
child_object = ChildObject.find_by_oid(oid.to_i)
unless child_object
batch_processing_event("Skipping row [#{index + 2}] with unknown Child: #{oid}", 'Skipped Row')
Expand All @@ -36,9 +35,7 @@ def recreate_child_oid_ptiffs(start_index = 0)
GeneratePtiffJob.perform_later(child_object, self) if file_size <= SetupMetadataJob::FIVE_HUNDRED_MB
attach_item(child_object)
child_object.processing_event("Ptiff Queued", "ptiff-queued")
return index + 1 if index + 1 - start_index > BatchProcess::BATCH_LIMIT
end
-1
end
# rubocop:enable Metrics/AbcSize
# rubocop:enable Metrics/MethodLength
Expand Down
11 changes: 4 additions & 7 deletions app/models/concerns/updatable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,11 @@ def update_child_objects_caption
end

# rubocop:disable Metrics/BlockLength
def update_parent_objects(start_index = 0)
def update_parent_objects
self.admin_set = ''
sets = admin_set
return unless batch_action == 'update parent objects'
return unless batch_action == "update parent objects"
parsed_csv.each_with_index do |row, index|
next if start_index > index
oid = row['oid'] unless ['oid'].nil?
redirect = row['redirect_to'] unless ['redirect_to'].nil?
parent_object = updatable_parent_object(oid, index)
Expand All @@ -88,10 +87,10 @@ def update_parent_objects(start_index = 0)
setup_for_background_jobs(parent_object, metadata_source)
parent_object.admin_set = admin_set unless admin_set.nil?

if row['visibility'] == 'Open with Permission' && row['permission_set_key'].blank?
if row['visibility'] == "Open with Permission" && row['permission_set_key'].blank?
batch_processing_event("Skipping row [#{index + 2}]. Process failed. Permission Set missing from CSV.", 'Skipped Row')
next
elsif row['visibility'] == 'Open with Permission' && row['permission_set_key'] != parent_object&.permission_set&.key
elsif row['visibility'] == "Open with Permission" && row['permission_set_key'] != parent_object&.permission_set&.key
permission_set = OpenWithPermission::PermissionSet.find_by(key: row['permission_set_key'])
if permission_set.nil?
batch_processing_event("Skipping row [#{index + 2}]. Process failed. Permission Set missing or nonexistent.", 'Skipped Row')
Expand All @@ -114,9 +113,7 @@ def update_parent_objects(start_index = 0)
sync_from_preservica if parent_object.digital_object_source == 'Preservica'

processing_event_for_parent(parent_object)
return index + 1 if index + 1 - start_index > BatchProcess::BATCH_LIMIT
end
-1
end
# rubocop:enable Metrics/CyclomaticComplexity
# rubocop:enable Metrics/PerceivedComplexity
Expand Down
5 changes: 0 additions & 5 deletions spec/fixtures/csv/create_many_parent_fixture_ids.csv

This file was deleted.

5 changes: 0 additions & 5 deletions spec/fixtures/csv/delete_many_parent_fixture_ids.csv

This file was deleted.

5 changes: 0 additions & 5 deletions spec/fixtures/csv/reassociate_many_child_objects.csv

This file was deleted.

44 changes: 0 additions & 44 deletions spec/jobs/create_new_parent_job_spec.rb

This file was deleted.

54 changes: 0 additions & 54 deletions spec/jobs/delete_parent_job_spec.rb

This file was deleted.

Loading