Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Outcome Result Groups IDs #1206

Merged
merged 15 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions app/models/calculated_value.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ class CalculatedValue < ActiveRecord::Base
belongs_to :study, :foreign_key => 'nct_id'

def self.populate_for(nct_ids)
Rails.logger.info "Calculating values for #{nct_ids.size} studies"
return if nct_ids.empty?
@nct_ids = nct_ids.freeze
@calculations = initialize_calculations
Expand Down
37 changes: 22 additions & 15 deletions app/models/outcome.rb
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
class Outcome < StudyRelationship
has_many :outcome_counts, inverse_of: :outcome
has_many :outcome_analyses, inverse_of: :outcome
has_many :outcome_measurements, inverse_of: :outcome
has_many :outcome_counts
has_many :outcome_analyses
has_many :outcome_measurements
has_many :result_groups

add_mapping do
{
table: :outcomes,
root: [:resultsSection, :outcomeMeasuresModule, :outcomeMeasures],
requires: :result_groups,
columns: [
{ name: :outcome_type, value: :type },
{ name: :title, value: :title },
Expand All @@ -22,13 +22,23 @@ class Outcome < StudyRelationship
{ name: :param_type, value: :paramType }
],
children: [
{
table: :result_groups,
root: [:groups],
columns: [
{ name: :ctgov_group_code, value: :id },
{ name: :result_type, value: 'Outcome' },
{ name: :title, value: :title },
{ name: :description, value: :description }
]
},
{
table: :outcome_measurements,
root: nil,
# TODO: after removing dup values can update root to [:classes] to reduce object size
root: nil,
flatten: [:classes, :categories, :measurements],
columns: [
{ name: :outcome_id, value: nil },
{ name: :result_group_id, value: reference(:result_groups)[:groupId, 'Outcome'] },
# result_group_id is set by ResultGroup.set_outcome_results_group_ids
{ name: :ctgov_group_code, value: :groupId },
{ name: :classification, value: [:$parent, :$parent, :title] },
{ name: :category, value: [:$parent, :title] },
Expand All @@ -52,12 +62,10 @@ class Outcome < StudyRelationship
},
{
table: :outcome_counts,
root: nil,
requires: :result_groups, # do I need this since the parent has it?
flatten: [:denoms, :counts],
root: [:denoms],
flatten: [:counts],
columns: [
{ name: :outcome_id, value: nil },
{ name: :result_group_id, value: reference(:result_groups)[:groupId, 'Outcome'] },
# result_group_id is set by ResultGroup.handle_outcome_result_groups_ids
{ name: :ctgov_group_code, value: [:groupId] },
{ name: :scope, value: 'Measure' },
{ name: :units, value: [:$parent, :units] },
Expand Down Expand Up @@ -98,9 +106,8 @@ class Outcome < StudyRelationship
table: :outcome_analysis_groups,
root: [:groupIds],
columns: [
{ name: :outcome_analysis_id, value: nil },
{ name: :result_group_id, value: reference(:result_groups)[nil, 'Outcome'] },
{ name: :ctgov_group_code, value: nil }
# result_group_id is set by ResultGroup.handle_outcome_result_groups_ids
{ name: :ctgov_group_code, value: nil } # will be single ctgov_group_code from [:groupIds]
]
}
]
Expand Down
5 changes: 2 additions & 3 deletions app/models/outcome_analysis.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
class OutcomeAnalysis < StudyRelationship
belongs_to :outcome, inverse_of: :outcome_analyses
has_many :outcome_analysis_groups, inverse_of: :outcome_analysis
has_many :result_groups, :through => :outcome_analysis_groups
belongs_to :outcome
has_many :outcome_analysis_groups
end
4 changes: 2 additions & 2 deletions app/models/outcome_analysis_group.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
class OutcomeAnalysisGroup < StudyRelationship
belongs_to :outcome_analysis, inverse_of: :outcome_analysis_groups
belongs_to :result_group, inverse_of: :outcome_analysis_groups
belongs_to :outcome_analysis#, inverse_of: :outcome_analysis_groups
belongs_to :result_group#, inverse_of: :outcome_analysis_groups

end
85 changes: 61 additions & 24 deletions app/models/result_group.rb
Original file line number Diff line number Diff line change
@@ -1,38 +1,16 @@
class ResultGroup < StudyRelationship

has_many :reported_events
has_many :milestones
has_many :drop_withdrawals
has_many :baseline_counts
has_many :baseline_measures
has_many :outcome_counts
has_many :outcome_measurements
has_many :outcome_analysis_groups, inverse_of: :result_group
has_many :outcome_analyses, :through => :outcome_analysis_groups
belongs_to :outcome, optional: true

add_mapping do
[
{
table: :result_groups,
root: [:resultsSection, :baselineCharacteristicsModule, :groups],
index: [:ctgov_group_code, :result_type],
unique: true,
columns: [
{ name: :ctgov_group_code, value: :id },
{ name: :result_type, value: 'Baseline' },
{ name: :title, value: :title },
{ name: :description, value: :description },
]
},
{
table: :result_groups,
root: [:resultsSection, :outcomeMeasuresModule, :outcomeMeasures],
flatten: [:groups],
index: [:ctgov_group_code, :result_type],
unique: true,
columns: [
{ name: :ctgov_group_code, value: :id },
{ name: :result_type, value: 'Outcome' },
{ name: :result_type, value: 'Baseline' },
{ name: :title, value: :title },
{ name: :description, value: :description },
]
Expand All @@ -53,6 +31,8 @@ class ResultGroup < StudyRelationship
table: :result_groups,
root: [:resultsSection, :adverseEventsModule, :eventGroups],
index: [:ctgov_group_code, :result_type],
# outcomes should be loaded first to avoid double loading of reported events
requires: :outcomes, # TODO: review - this shouldn't be necessary
unique: true,
columns: [
{ name: :ctgov_group_code, value: :id },
Expand All @@ -63,4 +43,61 @@ class ResultGroup < StudyRelationship
}
]
end

def self.handle_outcome_result_groups_ids(nct_ids)
Rails.logger.info "Handling Results Group IDs for Outcome Models"
result_groups = fetch_outcome_groups_for(nct_ids)
set_results_group_ids_for(OutcomeCount, nct_ids, result_groups)
set_results_group_ids_for(OutcomeMeasurement, nct_ids, result_groups)
set_outcome_analysis_group_ids(nct_ids, result_groups)
end


private

# TODO: add test for private methods

def self.set_outcome_analysis_group_ids(nct_ids, result_groups)
Rails.logger.info "Setting Outcome Analysis Group IDs"
analyses = OutcomeAnalysis.where(nct_id: nct_ids).pluck(:id, :outcome_id).to_h
groups = OutcomeAnalysisGroup.where(nct_id: nct_ids)

updates = groups.map do | group |
outcome_id = analyses[group.outcome_analysis_id]
next unless outcome_id

key = [group.nct_id, group.ctgov_group_code, outcome_id]
result_group_id = result_groups[key]&.id
{ id: group.id, result_group_id: result_group_id } if result_group_id
end.compact

bulk_update(OutcomeAnalysisGroup, updates)
end

def self.set_results_group_ids_for(model, nct_ids, result_groups)
Rails.logger.info "Setting Result Group IDs for #{model}"

records = model.where(nct_id: nct_ids).select(:id, :nct_id, :ctgov_group_code, :outcome_id)
updates = records.map do |record|
key = [record.nct_id, record.ctgov_group_code, record.outcome_id]
result_group_id = result_groups[key]&.id
{ id: record.id, result_group_id: result_group_id } if result_group_id
end.compact

bulk_update(model, updates)
end


# TODO: consider adding index for nct_id and result_type
def self.fetch_outcome_groups_for(nct_ids)
where(nct_id: nct_ids, result_type: "Outcome")
.select(:id, :nct_id, :ctgov_group_code, :outcome_id)
.index_by do |group|
[group.nct_id, group.ctgov_group_code, group.outcome_id]
end
end

def self.bulk_update(model, updates)
model.import updates, on_duplicate_key_update: { conflict_target: [:id], columns: [:result_group_id] }
end
end
27 changes: 18 additions & 9 deletions app/models/study_json_record/worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def save_children(parents, indent=" ")
klass = parents.first.class
return if klass == Study


klass.reflect_on_all_associations(:has_many).each do |association|
# update the nct_id and parent_id of the children
collection = []
Expand All @@ -59,6 +60,12 @@ def save_children(parents, indent=" ")
next unless inverse_name

parent = child.send(inverse_name)
# keeping this temprorary fix until we find a better solution
if parent.nil?
puts "Skipping child because parent is nil"
puts "Parrent: #{inverse_name}, child: #{association.name}"
next
end
child.nct_id = parent.nct_id
child[association.foreign_key] = parent.id
collection << child
Expand Down Expand Up @@ -90,11 +97,12 @@ def process_study(nct_id)
def process(count = 1, records = nil)
# load records
records = StudyJsonRecord.version_2.needs_processing.limit(count) if records.nil?

Rails.logger.debug { "records: #{records.count}" }
return if records.empty?
nct_ids = records.map(&:nct_id)
Rails.logger.debug { "records: #{nct_ids.count}" }

puts "worker is about to process #{records.count} records".green unless Rails.env.test?
remove_study_data(records.map(&:nct_id))
puts "worker is about to process #{nct_ids.count} records".green unless Rails.env.test?
remove_study_data(nct_ids)

@collections = Hash.new { |h, k| h[k] = [] }
@index = Hash.new { |h, k| h[k] = {} }
Expand All @@ -103,11 +111,11 @@ def process(count = 1, records = nil)
StudyRelationship.sorted_mapping.each do |mapping| # process each mapping instructions
process_mapping(mapping, records)
end

# recalculate values for processed records
CalculatedValue.populate_for(records.pluck(:nct_id))
ResultGroup.handle_outcome_result_groups_ids(nct_ids)
CalculatedValue.populate_for(nct_ids)
# mark study records as saved
StudyJsonRecord.version_2.where(nct_id: records.map(&:nct_id)).update_all(saved_study_at: Time.zone.now) # rubocop:disable Rails/SkipsModelValidations
StudyJsonRecord.version_2.where(nct_id: nct_ids).update_all(saved_study_at: Time.zone.now) # rubocop:disable Rails/SkipsModelValidations
end

# private
Expand All @@ -119,7 +127,6 @@ def prepare_children(parent, content, children)
collection = [] # this array will collect all the models to be imported
model = mapping[:table].to_s.classify.constantize # get the model from the table name
root = mapping[:root].map(&:to_s) if mapping[:root] # normalize root path to array of strings

mapping_root = root ? content.dig(*root) : content
next if mapping_root.nil? # skip if no root found

Expand Down Expand Up @@ -254,13 +261,15 @@ def process_mapping(mapping, records)
print "\r #{mapping[:table]} - #{collection.count}" unless ENV['RAILS_ENV'] == 'test'
model.import(collection)
puts "\r✅ #{mapping[:table]} - #{collection.count}" unless ENV['RAILS_ENV'] == 'test'

if mapping[:index]
index = [:nct_id] + mapping[:index]
collection.each do |row|
row_index = index.map { |i| row.send(i) }
@index[mapping[:table]][row_index] = row.id
end
end

save_children(collection)
rescue
raise "Error processing #{mapping[:table]}"
Expand Down
6 changes: 6 additions & 0 deletions db/foreign_keys.json
Original file line number Diff line number Diff line change
Expand Up @@ -100,5 +100,11 @@
"parent_table": "result_groups",
"child_column": "result_group_id",
"parent_column": "id"
},
{
"child_table": "result_groups",
"parent_table": "outcomes",
"child_column": "outcome_id",
"parent_column": "id"
}
]
5 changes: 5 additions & 0 deletions db/migrate/20240730222441_add_outome_id_to_result_groups.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class AddOutomeIdToResultGroups < ActiveRecord::Migration[6.0]
def change
add_column :result_groups, :outcome_id, :integer
end
end
3 changes: 3 additions & 0 deletions lib/tasks/load.rake
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@ namespace :db do

desc 'process study json records'
task :import_study, [:nct_id] => :environment do |t, args|
dbmgr = Util::DbManager.new
dbmgr.remove_constraints
worker = StudyJsonRecord::Worker.new
records = StudyJsonRecord.where(nct_id: args[:nct_id], version: '2')
worker.process(1, records)
dbmgr.add_constraints
end

desc 'process study json records'
Expand Down
3 changes: 2 additions & 1 deletion spec/models/baseline_count_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def expected_result_group_data
'ctgov_group_code' => record['id'],
'result_type' => 'Baseline',
'title' => record['title'],
'description' => record['description']
'description' => record['description'],
'outcome_id' => nil
}
end
end
Expand Down
3 changes: 2 additions & 1 deletion spec/models/baseline_measurement_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ def expected_result_group_data
ctgov_group_code: record["id"],
result_type: "Baseline",
title: record["title"],
description: record["description"]
description: record["description"],
outcome_id: nil
}
end
end
Expand Down
35 changes: 18 additions & 17 deletions spec/models/result_group_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,13 @@
StudyJsonRecord.create(nct_id: "NCT000001", version: '2', content: content) # create a brand new json record

# process the json
StudyJsonRecord::Worker.new.process # import the new json record
StudyJsonRecord::Worker.new.process_study("NCT000001")

# load the database entries
imported = ResultGroup.all.map{|x| x.attributes }
imported.each{|x| x.delete("id")}
imported = ResultGroup.all.map { |x| x.attributes.except('id', 'outcome_id') }

# order of expected depends on order of importing different types of result groups
expected = [
{
"nct_id"=>"NCT000001",
"result_type"=>"Participant Flow",
"ctgov_group_code"=>"FG000",
"title"=>"Cohort 1",
"description"=>"Cohort 1 received..."
},
{
"nct_id"=>"NCT000001",
"result_type"=>"Participant Flow",
"ctgov_group_code"=>"FG001",
"title"=>"Cohort 2",
"description"=>"Cohort 2 received..."
},
{
"nct_id"=>"NCT000001",
"result_type"=>"Outcome",
Expand All @@ -48,8 +34,23 @@
"ctgov_group_code" => "OG0000",
"title" => "Outcome 3",
"description" => "Outcome Group 3 description"
},
{
"nct_id"=>"NCT000001",
"result_type"=>"Participant Flow",
"ctgov_group_code"=>"FG000",
"title"=>"Cohort 1",
"description"=>"Cohort 1 received..."
},
{
"nct_id"=>"NCT000001",
"result_type"=>"Participant Flow",
"ctgov_group_code"=>"FG001",
"title"=>"Cohort 2",
"description"=>"Cohort 2 received..."
}
]

expect(imported).to eq(expected)
end
end
Loading