Skip to content

Commit

Permalink
Merge pull request #1206 from ctti-clinicaltrials/fix/outcome-result-…
Browse files Browse the repository at this point in the history
…group-id

Fix Outcome Result Groups IDs
  • Loading branch information
akabishau authored Aug 15, 2024
2 parents 8de5299 + 9a792a5 commit 46fc0a9
Show file tree
Hide file tree
Showing 12 changed files with 142 additions and 72 deletions.
1 change: 1 addition & 0 deletions app/models/calculated_value.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ class CalculatedValue < ActiveRecord::Base
belongs_to :study, :foreign_key => 'nct_id'

def self.populate_for(nct_ids)
Rails.logger.info "Calculating values for #{nct_ids.size} studies"
return if nct_ids.empty?
@nct_ids = nct_ids.freeze
@calculations = initialize_calculations
Expand Down
37 changes: 22 additions & 15 deletions app/models/outcome.rb
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
class Outcome < StudyRelationship
has_many :outcome_counts, inverse_of: :outcome
has_many :outcome_analyses, inverse_of: :outcome
has_many :outcome_measurements, inverse_of: :outcome
has_many :outcome_counts
has_many :outcome_analyses
has_many :outcome_measurements
has_many :result_groups

add_mapping do
{
table: :outcomes,
root: [:resultsSection, :outcomeMeasuresModule, :outcomeMeasures],
requires: :result_groups,
columns: [
{ name: :outcome_type, value: :type },
{ name: :title, value: :title },
Expand All @@ -22,13 +22,23 @@ class Outcome < StudyRelationship
{ name: :param_type, value: :paramType }
],
children: [
{
table: :result_groups,
root: [:groups],
columns: [
{ name: :ctgov_group_code, value: :id },
{ name: :result_type, value: 'Outcome' },
{ name: :title, value: :title },
{ name: :description, value: :description }
]
},
{
table: :outcome_measurements,
root: nil,
# TODO: after removing dup values can update root to [:classes] to reduce object size
root: nil,
flatten: [:classes, :categories, :measurements],
columns: [
{ name: :outcome_id, value: nil },
{ name: :result_group_id, value: reference(:result_groups)[:groupId, 'Outcome'] },
# result_group_id is set by ResultGroup.set_outcome_results_group_ids
{ name: :ctgov_group_code, value: :groupId },
{ name: :classification, value: [:$parent, :$parent, :title] },
{ name: :category, value: [:$parent, :title] },
Expand All @@ -52,12 +62,10 @@ class Outcome < StudyRelationship
},
{
table: :outcome_counts,
root: nil,
requires: :result_groups, # do I need this since the parent has it?
flatten: [:denoms, :counts],
root: [:denoms],
flatten: [:counts],
columns: [
{ name: :outcome_id, value: nil },
{ name: :result_group_id, value: reference(:result_groups)[:groupId, 'Outcome'] },
# result_group_id is set by ResultGroup.handle_outcome_result_groups_ids
{ name: :ctgov_group_code, value: [:groupId] },
{ name: :scope, value: 'Measure' },
{ name: :units, value: [:$parent, :units] },
Expand Down Expand Up @@ -98,9 +106,8 @@ class Outcome < StudyRelationship
table: :outcome_analysis_groups,
root: [:groupIds],
columns: [
{ name: :outcome_analysis_id, value: nil },
{ name: :result_group_id, value: reference(:result_groups)[nil, 'Outcome'] },
{ name: :ctgov_group_code, value: nil }
# result_group_id is set by ResultGroup.handle_outcome_result_groups_ids
{ name: :ctgov_group_code, value: nil } # will be single ctgov_group_code from [:groupIds]
]
}
]
Expand Down
5 changes: 2 additions & 3 deletions app/models/outcome_analysis.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
class OutcomeAnalysis < StudyRelationship
belongs_to :outcome, inverse_of: :outcome_analyses
has_many :outcome_analysis_groups, inverse_of: :outcome_analysis
has_many :result_groups, :through => :outcome_analysis_groups
belongs_to :outcome
has_many :outcome_analysis_groups
end
4 changes: 2 additions & 2 deletions app/models/outcome_analysis_group.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
class OutcomeAnalysisGroup < StudyRelationship
belongs_to :outcome_analysis, inverse_of: :outcome_analysis_groups
belongs_to :result_group, inverse_of: :outcome_analysis_groups
belongs_to :outcome_analysis#, inverse_of: :outcome_analysis_groups
belongs_to :result_group#, inverse_of: :outcome_analysis_groups

end
85 changes: 61 additions & 24 deletions app/models/result_group.rb
Original file line number Diff line number Diff line change
@@ -1,38 +1,16 @@
class ResultGroup < StudyRelationship

has_many :reported_events
has_many :milestones
has_many :drop_withdrawals
has_many :baseline_counts
has_many :baseline_measures
has_many :outcome_counts
has_many :outcome_measurements
has_many :outcome_analysis_groups, inverse_of: :result_group
has_many :outcome_analyses, :through => :outcome_analysis_groups
belongs_to :outcome, optional: true

add_mapping do
[
{
table: :result_groups,
root: [:resultsSection, :baselineCharacteristicsModule, :groups],
index: [:ctgov_group_code, :result_type],
unique: true,
columns: [
{ name: :ctgov_group_code, value: :id },
{ name: :result_type, value: 'Baseline' },
{ name: :title, value: :title },
{ name: :description, value: :description },
]
},
{
table: :result_groups,
root: [:resultsSection, :outcomeMeasuresModule, :outcomeMeasures],
flatten: [:groups],
index: [:ctgov_group_code, :result_type],
unique: true,
columns: [
{ name: :ctgov_group_code, value: :id },
{ name: :result_type, value: 'Outcome' },
{ name: :result_type, value: 'Baseline' },
{ name: :title, value: :title },
{ name: :description, value: :description },
]
Expand All @@ -53,6 +31,8 @@ class ResultGroup < StudyRelationship
table: :result_groups,
root: [:resultsSection, :adverseEventsModule, :eventGroups],
index: [:ctgov_group_code, :result_type],
# outcomes should be loaded first to avoid double loading of reported events
requires: :outcomes, # TODO: review - this shouldn't be necessary
unique: true,
columns: [
{ name: :ctgov_group_code, value: :id },
Expand All @@ -63,4 +43,61 @@ class ResultGroup < StudyRelationship
}
]
end

def self.handle_outcome_result_groups_ids(nct_ids)
Rails.logger.info "Handling Results Group IDs for Outcome Models"
result_groups = fetch_outcome_groups_for(nct_ids)
set_results_group_ids_for(OutcomeCount, nct_ids, result_groups)
set_results_group_ids_for(OutcomeMeasurement, nct_ids, result_groups)
set_outcome_analysis_group_ids(nct_ids, result_groups)
end


private

# TODO: add test for private methods

def self.set_outcome_analysis_group_ids(nct_ids, result_groups)
Rails.logger.info "Setting Outcome Analysis Group IDs"
analyses = OutcomeAnalysis.where(nct_id: nct_ids).pluck(:id, :outcome_id).to_h
groups = OutcomeAnalysisGroup.where(nct_id: nct_ids)

updates = groups.map do | group |
outcome_id = analyses[group.outcome_analysis_id]
next unless outcome_id

key = [group.nct_id, group.ctgov_group_code, outcome_id]
result_group_id = result_groups[key]&.id
{ id: group.id, result_group_id: result_group_id } if result_group_id
end.compact

bulk_update(OutcomeAnalysisGroup, updates)
end

def self.set_results_group_ids_for(model, nct_ids, result_groups)
Rails.logger.info "Setting Result Group IDs for #{model}"

records = model.where(nct_id: nct_ids).select(:id, :nct_id, :ctgov_group_code, :outcome_id)
updates = records.map do |record|
key = [record.nct_id, record.ctgov_group_code, record.outcome_id]
result_group_id = result_groups[key]&.id
{ id: record.id, result_group_id: result_group_id } if result_group_id
end.compact

bulk_update(model, updates)
end


# TODO: consider adding index for nct_id and result_type
def self.fetch_outcome_groups_for(nct_ids)
where(nct_id: nct_ids, result_type: "Outcome")
.select(:id, :nct_id, :ctgov_group_code, :outcome_id)
.index_by do |group|
[group.nct_id, group.ctgov_group_code, group.outcome_id]
end
end

def self.bulk_update(model, updates)
model.import updates, on_duplicate_key_update: { conflict_target: [:id], columns: [:result_group_id] }
end
end
27 changes: 18 additions & 9 deletions app/models/study_json_record/worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def save_children(parents, indent=" ")
klass = parents.first.class
return if klass == Study


klass.reflect_on_all_associations(:has_many).each do |association|
# update the nct_id and parent_id of the children
collection = []
Expand All @@ -59,6 +60,12 @@ def save_children(parents, indent=" ")
next unless inverse_name

parent = child.send(inverse_name)
# keeping this temprorary fix until we find a better solution
if parent.nil?
puts "Skipping child because parent is nil"
puts "Parrent: #{inverse_name}, child: #{association.name}"
next
end
child.nct_id = parent.nct_id
child[association.foreign_key] = parent.id
collection << child
Expand Down Expand Up @@ -90,11 +97,12 @@ def process_study(nct_id)
def process(count = 1, records = nil)
# load records
records = StudyJsonRecord.version_2.needs_processing.limit(count) if records.nil?

Rails.logger.debug { "records: #{records.count}" }
return if records.empty?
nct_ids = records.map(&:nct_id)
Rails.logger.debug { "records: #{nct_ids.count}" }

puts "worker is about to process #{records.count} records".green unless Rails.env.test?
remove_study_data(records.map(&:nct_id))
puts "worker is about to process #{nct_ids.count} records".green unless Rails.env.test?
remove_study_data(nct_ids)

@collections = Hash.new { |h, k| h[k] = [] }
@index = Hash.new { |h, k| h[k] = {} }
Expand All @@ -103,11 +111,11 @@ def process(count = 1, records = nil)
StudyRelationship.sorted_mapping.each do |mapping| # process each mapping instructions
process_mapping(mapping, records)
end

# recalculate values for processed records
CalculatedValue.populate_for(records.pluck(:nct_id))
ResultGroup.handle_outcome_result_groups_ids(nct_ids)
CalculatedValue.populate_for(nct_ids)
# mark study records as saved
StudyJsonRecord.version_2.where(nct_id: records.map(&:nct_id)).update_all(saved_study_at: Time.zone.now) # rubocop:disable Rails/SkipsModelValidations
StudyJsonRecord.version_2.where(nct_id: nct_ids).update_all(saved_study_at: Time.zone.now) # rubocop:disable Rails/SkipsModelValidations
end

# private
Expand All @@ -119,7 +127,6 @@ def prepare_children(parent, content, children)
collection = [] # this array will collect all the models to be imported
model = mapping[:table].to_s.classify.constantize # get the model from the table name
root = mapping[:root].map(&:to_s) if mapping[:root] # normalize root path to array of strings

mapping_root = root ? content.dig(*root) : content
next if mapping_root.nil? # skip if no root found

Expand Down Expand Up @@ -254,13 +261,15 @@ def process_mapping(mapping, records)
print "\r #{mapping[:table]} - #{collection.count}" unless ENV['RAILS_ENV'] == 'test'
model.import(collection)
puts "\r#{mapping[:table]} - #{collection.count}" unless ENV['RAILS_ENV'] == 'test'

if mapping[:index]
index = [:nct_id] + mapping[:index]
collection.each do |row|
row_index = index.map { |i| row.send(i) }
@index[mapping[:table]][row_index] = row.id
end
end

save_children(collection)
rescue
raise "Error processing #{mapping[:table]}"
Expand Down
6 changes: 6 additions & 0 deletions db/foreign_keys.json
Original file line number Diff line number Diff line change
Expand Up @@ -100,5 +100,11 @@
"parent_table": "result_groups",
"child_column": "result_group_id",
"parent_column": "id"
},
{
"child_table": "result_groups",
"parent_table": "outcomes",
"child_column": "outcome_id",
"parent_column": "id"
}
]
5 changes: 5 additions & 0 deletions db/migrate/20240730222441_add_outome_id_to_result_groups.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class AddOutomeIdToResultGroups < ActiveRecord::Migration[6.0]
def change
add_column :result_groups, :outcome_id, :integer
end
end
3 changes: 3 additions & 0 deletions lib/tasks/load.rake
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@ namespace :db do

desc 'process study json records'
task :import_study, [:nct_id] => :environment do |t, args|
dbmgr = Util::DbManager.new
dbmgr.remove_constraints
worker = StudyJsonRecord::Worker.new
records = StudyJsonRecord.where(nct_id: args[:nct_id], version: '2')
worker.process(1, records)
dbmgr.add_constraints
end

desc 'process study json records'
Expand Down
3 changes: 2 additions & 1 deletion spec/models/baseline_count_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def expected_result_group_data
'ctgov_group_code' => record['id'],
'result_type' => 'Baseline',
'title' => record['title'],
'description' => record['description']
'description' => record['description'],
'outcome_id' => nil
}
end
end
Expand Down
3 changes: 2 additions & 1 deletion spec/models/baseline_measurement_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ def expected_result_group_data
ctgov_group_code: record["id"],
result_type: "Baseline",
title: record["title"],
description: record["description"]
description: record["description"],
outcome_id: nil
}
end
end
Expand Down
35 changes: 18 additions & 17 deletions spec/models/result_group_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,13 @@
StudyJsonRecord.create(nct_id: "NCT000001", version: '2', content: content) # create a brand new json record

# process the json
StudyJsonRecord::Worker.new.process # import the new json record
StudyJsonRecord::Worker.new.process_study("NCT000001")

# load the database entries
imported = ResultGroup.all.map{|x| x.attributes }
imported.each{|x| x.delete("id")}
imported = ResultGroup.all.map { |x| x.attributes.except('id', 'outcome_id') }

# order of expected depends on order of importing different types of result groups
expected = [
{
"nct_id"=>"NCT000001",
"result_type"=>"Participant Flow",
"ctgov_group_code"=>"FG000",
"title"=>"Cohort 1",
"description"=>"Cohort 1 received..."
},
{
"nct_id"=>"NCT000001",
"result_type"=>"Participant Flow",
"ctgov_group_code"=>"FG001",
"title"=>"Cohort 2",
"description"=>"Cohort 2 received..."
},
{
"nct_id"=>"NCT000001",
"result_type"=>"Outcome",
Expand All @@ -48,8 +34,23 @@
"ctgov_group_code" => "OG0000",
"title" => "Outcome 3",
"description" => "Outcome Group 3 description"
},
{
"nct_id"=>"NCT000001",
"result_type"=>"Participant Flow",
"ctgov_group_code"=>"FG000",
"title"=>"Cohort 1",
"description"=>"Cohort 1 received..."
},
{
"nct_id"=>"NCT000001",
"result_type"=>"Participant Flow",
"ctgov_group_code"=>"FG001",
"title"=>"Cohort 2",
"description"=>"Cohort 2 received..."
}
]

expect(imported).to eq(expected)
end
end

0 comments on commit 46fc0a9

Please sign in to comment.