Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: Add new AgroPortal model migration script #15

Merged
merged 5 commits into from
Sep 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,4 @@ group :test do
gem 'test-unit-minitest'
end

gem "binding_of_caller", "~> 1.0"
13 changes: 9 additions & 4 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
GIT
remote: https://github.com/ontoportal-lirmm/goo.git
revision: 1d78bde5a711d05475da0459308c7db074af5e21
revision: ddb95e427950fde3ac715aec340394208c8166fe
branch: development
specs:
goo (0.0.2)
Expand Down Expand Up @@ -77,12 +77,15 @@ GEM
addressable (2.8.5)
public_suffix (>= 2.0.2, < 6.0)
bcrypt (3.1.19)
binding_of_caller (1.0.0)
debug_inspector (>= 0.0.1)
builder (3.2.4)
coderay (1.1.3)
concurrent-ruby (1.2.2)
connection_pool (2.4.1)
cube-ruby (0.0.3)
dante (0.2.0)
debug_inspector (1.1.0)
declarative (0.0.20)
docile (1.4.0)
domain_name (0.5.20190701)
Expand Down Expand Up @@ -183,7 +186,7 @@ GEM
addressable (>= 2.2)
redis (5.0.7)
redis-client (>= 0.9.0)
redis-client (0.16.0)
redis-client (0.17.0)
connection_pool
representable (3.2.0)
declarative (< 0.1.0)
Expand All @@ -206,7 +209,7 @@ GEM
rubyzip (2.3.2)
rufus-scheduler (2.0.24)
tzinfo (>= 0.3.22)
signet (0.17.0)
signet (0.18.0)
addressable (~> 2.8)
faraday (>= 0.17.5, < 3.a)
jwt (>= 1.5, < 3.0)
Expand Down Expand Up @@ -237,9 +240,11 @@ GEM
webrick (1.8.1)

PLATFORMS
x86_64-darwin-21
x86_64-linux

DEPENDENCIES
binding_of_caller (~> 1.0)
cube-ruby
email_spec
faraday (~> 1.9)
Expand Down Expand Up @@ -267,4 +272,4 @@ DEPENDENCIES
test-unit-minitest

BUNDLED WITH
2.3.14
2.3.15
232 changes: 232 additions & 0 deletions bin/agroportal_model_migration
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
#!/usr/bin/env ruby

# Exit cleanly from an early interrupt
Signal.trap("INT") { exit 1 }

# Setup the bundled gems in our environment
require 'bundler/setup'
require 'binding_of_caller'
# Configure the process for the current cron configuration.
require_relative '../lib/ncbo_cron'
config_exists = File.exist?(File.expand_path('../../config/config.rb', __FILE__))
abort("Please create a config/config.rb file using the config/config.rb.sample as a template") unless config_exists
require_relative '../config/config'

require 'optparse'
options = {}
opt_parser = OptionParser.new do |opts|
# Set a banner, displayed at the top of the help screen.
# opts.banner = "Usage: ncbo_ontology_process [options]"
opts.separator 'A script that migrate data from old EcoPortal model to the new (AgroPortal) model'
opts.on('-o', '--ontologies ACRONYM[,ACRONYM,...]', 'Migrate submission model of the of this ontology acronym(s).') do |acronym|
options[:ontologies] = acronym.split(',')
end
opts.on('--migrate-all', 'Migrate all submission models') do |d|
options[:migrate_all] = true
end

options[:logfile] = "ecoportal_migration.log"
opts.on('-l', '--logfile FILE', "Write log to FILE (default is 'deletions.log')") do |filename|
options[:logfile] = filename
end

# Display the help screen, all programs are assumed to have this option.
opts.on('-h', '--help', 'Display this screen') do
puts opts
exit
end
end
# Parse the command-line. The 'parse' method simply parses ARGV, while the 'parse!' method parses ARGV and removes
# any options found there, as well as any parameters for the options.
opt_parser.parse!

# Migrator class
class AgroPortalMigrator

def initialize(submission:, logger:)
@submission = submission
@logger = logger
end

def migrate_submission
sub = @submission
sub.bring_remaining

log_info "Start submission #{sub.id} migration"
sub.class.model_settings[:attributes][:keyClasses][:enforce].delete(:class)
LinkedData::Models::OntologySubmission.agents_attrs.each do |key|
sub.class.model_settings[:attributes][key][:enforce].delete(:is_person)
sub.class.model_settings[:attributes][key][:enforce].delete(:is_organization)
end

if not_valid?(sub, "submission not valid")
sub.valid = nil if attribute_error?(sub, :valid)
sub.curatedOn = sub.creationDate if attribute_error?(sub, :curatedOn)

sub.hasPart = Array(sub.hasPart) + Array(sub.ontology.views.map { |x| x.id }) if attribute_error?(sub,:hasPart)


if attribute_error?(sub, :URI, :existence)
sub.URI = sub.id
end

if attribute_error?(sub, :URI, :distinct_of_identifier)
sub.identifier = sub.identifier.reject { |x| x.to_s.eql?(sub.URI.to_s) }
end

if attribute_error?(sub, :designedForOntologyTask)
sub.designedForOntologyTask = sub.designedForOntologyTask.map { |x| RDF::URI.new("http://omv.ontoware.org/2005/05/ontology##{x..split(' ').collect(&:capitalize).join}") }
end

if attribute_error?(sub, :modificationDate, :superior_equal_to_creationDate)
sub.modificationDate = sub.creationDate
end

if attribute_error?(sub, :naturalLanguage, :lexvo_language)
sub.naturalLanguage = Array(sub.naturalLanguage).map do |lang|
next lang if lang.to_s.start_with?('http://lexvo.org/id/iso')
RDF::URI.new("http://lexvo.org/id/iso639-1/#{lang}")
end
end

if attribute_error?(sub, :isOfType, :uri)
sub.isOfType = RDF::URI.new("http://omv.ontoware.org/2005/05/ontology##{sub.isOfType.to_s.capitalize}")
end

if attribute_error?(sub, :description)
sub.description = "#{sub.ontology.id.to_s.split('/').last} description"
end


end


LinkedData::Models::OntologySubmission.agents_attrs.each do |key|
values = sub.send(key)
is_array = values.is_a?(Array)
next if values.nil? || values.empty?

values = Array(values).map do |string_values|
next string_values unless string_values.nil? || string_values.is_a?(String)
next nil if string_values.empty?

string_values = string_values.to_s.split(',').map do |value|
creator = LinkedData::Models::Agent.where(name: value).first
unless creator
creator = LinkedData::Models::Agent.new(name: value, agentType: 'person', creator: admin_user)
stop_to_fix('creator not valid') unless creator.valid?
creator.save
end
creator
end

string_values
end.flatten.compact

sub.send("#{key}=", is_array ? values : values.first)
end

sub.errors.each do |key, errors|
next unless errors.keys.include?(:uri)

log_info "Fix submission #{sub.id} #{key} URL values"
values = sub.send(key)
next if values.nil? || values.empty?

is_array = values.is_a?(Array)

values = Array(values).map { |x| RDF::URI.new(x) }

sub.class.model_settings[:attributes][key.to_sym][:enforce].delete(:uri)
sub.send("#{key}=", is_array ? [] : nil)
sub.save rescue stop_to_fix('not valid submission')
sub.class.model_settings[:attributes][key.to_sym][:enforce].push(:uri)
sub.send("#{key}=", is_array ? values : values.first)
end

if sub.valid?
sub.save rescue stop_to_fix('not valid submission')
log_info ">> #{sub.id} migrated successfully"
else
stop_to_fix "#{sub.id} migration failed submission not valid"
end

log_info "> Submission #{sub.id} migration ended"
end

private

def attribute_error?(sub, attr, error_type = nil)
sub.errors.keys.include?(attr) && (error_type.nil? || sub.errors[attr].keys.include?(error_type))
end

def logger
@logger
end

def admin_user(username = 'admin')
user = LinkedData::Models::User.find(username).first
raise "The user #{username} does not exist" if user.nil?
user.bring_remaining
end

def array_migrate(sub, attr, new_val)
old_val = sub.send(attr.to_s)
sub.send("#{attr}=", (Array(old_val) + [new_val]).uniq) if new_val
end

def not_valid?(sub, msg)
unless sub.valid?
log_error msg
log_error "Submission #{sub.id} errors: #{sub.errors}"
true
end
end

def stop_to_fix(msg)
log_error "Stop to fix #{msg}"
caller_binding = binding.of_caller(1)
binding.pry(caller_binding)
log_error "End stop to fix"
end

def log_error(msg)
logger.error "> #{msg}"
end

def log_info(msg)
logger.info "> #{msg}"
end

end

logger = Logger.new(options[:logfile])
# a formatter to write simultaneously into a file and stout
logger.formatter = proc do |severity, datetime, progname, msg|
out = "#{severity} [#{datetime}] #{msg} \n"
puts out
out
end

begin
puts "AgroPortal migration details are logged to #{options[:logfile]}"

if options[:migrate_all]
submissions = LinkedData::Models::Ontology.all.each { |o| o.latest_submission }
else
submissions = options[:ontologies].map do |acronym|
ont = LinkedData::Models::Ontology.find(acronym).first
ont.latest_submission(status: :any)
end
end

submissions.each do |sub|
AgroPortalMigrator.new(submission: sub, logger: logger).migrate_submission
end

rescue Exception => e
logger.error "Failed, exception: #{e.to_json}."
binding.pry
exit(1)
end