diff --git a/Gemfile b/Gemfile index b771aaf8..ebccbac6 100644 --- a/Gemfile +++ b/Gemfile @@ -33,3 +33,4 @@ group :test do gem 'test-unit-minitest' end +gem "binding_of_caller", "~> 1.0" diff --git a/Gemfile.lock b/Gemfile.lock index f171ab2a..c126a84d 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/ontoportal-lirmm/goo.git - revision: 1d78bde5a711d05475da0459308c7db074af5e21 + revision: ddb95e427950fde3ac715aec340394208c8166fe branch: development specs: goo (0.0.2) @@ -77,12 +77,15 @@ GEM addressable (2.8.5) public_suffix (>= 2.0.2, < 6.0) bcrypt (3.1.19) + binding_of_caller (1.0.0) + debug_inspector (>= 0.0.1) builder (3.2.4) coderay (1.1.3) concurrent-ruby (1.2.2) connection_pool (2.4.1) cube-ruby (0.0.3) dante (0.2.0) + debug_inspector (1.1.0) declarative (0.0.20) docile (1.4.0) domain_name (0.5.20190701) @@ -183,7 +186,7 @@ GEM addressable (>= 2.2) redis (5.0.7) redis-client (>= 0.9.0) - redis-client (0.16.0) + redis-client (0.17.0) connection_pool representable (3.2.0) declarative (< 0.1.0) @@ -206,7 +209,7 @@ GEM rubyzip (2.3.2) rufus-scheduler (2.0.24) tzinfo (>= 0.3.22) - signet (0.17.0) + signet (0.18.0) addressable (~> 2.8) faraday (>= 0.17.5, < 3.a) jwt (>= 1.5, < 3.0) @@ -237,9 +240,11 @@ GEM webrick (1.8.1) PLATFORMS + x86_64-darwin-21 x86_64-linux DEPENDENCIES + binding_of_caller (~> 1.0) cube-ruby email_spec faraday (~> 1.9) @@ -267,4 +272,4 @@ DEPENDENCIES test-unit-minitest BUNDLED WITH - 2.3.14 + 2.3.15 diff --git a/bin/agroportal_model_migration b/bin/agroportal_model_migration new file mode 100755 index 00000000..ad695b7e --- /dev/null +++ b/bin/agroportal_model_migration @@ -0,0 +1,232 @@ +#!/usr/bin/env ruby + +# Exit cleanly from an early interrupt +Signal.trap("INT") { exit 1 } + +# Setup the bundled gems in our environment +require 'bundler/setup' +require 'binding_of_caller' +# Configure the process for the current cron configuration. +require_relative '../lib/ncbo_cron' +config_exists = File.exist?(File.expand_path('../../config/config.rb', __FILE__)) +abort("Please create a config/config.rb file using the config/config.rb.sample as a template") unless config_exists +require_relative '../config/config' + +require 'optparse' +options = {} +opt_parser = OptionParser.new do |opts| + # Set a banner, displayed at the top of the help screen. + # opts.banner = "Usage: ncbo_ontology_process [options]" + opts.separator 'A script that migrate data from old EcoPortal model to the new (AgroPortal) model' + opts.on('-o', '--ontologies ACRONYM[,ACRONYM,...]', 'Migrate submission model of the of this ontology acronym(s).') do |acronym| + options[:ontologies] = acronym.split(',') + end + opts.on('--migrate-all', 'Migrate all submission models') do |d| + options[:migrate_all] = true + end + + options[:logfile] = "ecoportal_migration.log" + opts.on('-l', '--logfile FILE', "Write log to FILE (default is 'deletions.log')") do |filename| + options[:logfile] = filename + end + + # Display the help screen, all programs are assumed to have this option. + opts.on('-h', '--help', 'Display this screen') do + puts opts + exit + end +end +# Parse the command-line. The 'parse' method simply parses ARGV, while the 'parse!' method parses ARGV and removes +# any options found there, as well as any parameters for the options. +opt_parser.parse! + +# Migrator class +class AgroPortalMigrator + + def initialize(submission:, logger:) + @submission = submission + @logger = logger + end + + def migrate_submission + sub = @submission + sub.bring_remaining + + log_info "Start submission #{sub.id} migration" + sub.class.model_settings[:attributes][:keyClasses][:enforce].delete(:class) + LinkedData::Models::OntologySubmission.agents_attrs.each do |key| + sub.class.model_settings[:attributes][key][:enforce].delete(:is_person) + sub.class.model_settings[:attributes][key][:enforce].delete(:is_organization) + end + + if not_valid?(sub, "submission not valid") + sub.valid = nil if attribute_error?(sub, :valid) + sub.curatedOn = sub.creationDate if attribute_error?(sub, :curatedOn) + + sub.hasPart = Array(sub.hasPart) + Array(sub.ontology.views.map { |x| x.id }) if attribute_error?(sub,:hasPart) + + + if attribute_error?(sub, :URI, :existence) + sub.URI = sub.id + end + + if attribute_error?(sub, :URI, :distinct_of_identifier) + sub.identifier = sub.identifier.reject { |x| x.to_s.eql?(sub.URI.to_s) } + end + + if attribute_error?(sub, :designedForOntologyTask) + sub.designedForOntologyTask = sub.designedForOntologyTask.map { |x| RDF::URI.new("http://omv.ontoware.org/2005/05/ontology##{x..split(' ').collect(&:capitalize).join}") } + end + + if attribute_error?(sub, :modificationDate, :superior_equal_to_creationDate) + sub.modificationDate = sub.creationDate + end + + if attribute_error?(sub, :naturalLanguage, :lexvo_language) + sub.naturalLanguage = Array(sub.naturalLanguage).map do |lang| + next lang if lang.to_s.start_with?('http://lexvo.org/id/iso') + RDF::URI.new("http://lexvo.org/id/iso639-1/#{lang}") + end + end + + if attribute_error?(sub, :isOfType, :uri) + sub.isOfType = RDF::URI.new("http://omv.ontoware.org/2005/05/ontology##{sub.isOfType.to_s.capitalize}") + end + + if attribute_error?(sub, :description) + sub.description = "#{sub.ontology.id.to_s.split('/').last} description" + end + + + end + + + LinkedData::Models::OntologySubmission.agents_attrs.each do |key| + values = sub.send(key) + is_array = values.is_a?(Array) + next if values.nil? || values.empty? + + values = Array(values).map do |string_values| + next string_values unless string_values.nil? || string_values.is_a?(String) + next nil if string_values.empty? + + string_values = string_values.to_s.split(',').map do |value| + creator = LinkedData::Models::Agent.where(name: value).first + unless creator + creator = LinkedData::Models::Agent.new(name: value, agentType: 'person', creator: admin_user) + stop_to_fix('creator not valid') unless creator.valid? + creator.save + end + creator + end + + string_values + end.flatten.compact + + sub.send("#{key}=", is_array ? values : values.first) + end + + sub.errors.each do |key, errors| + next unless errors.keys.include?(:uri) + + log_info "Fix submission #{sub.id} #{key} URL values" + values = sub.send(key) + next if values.nil? || values.empty? + + is_array = values.is_a?(Array) + + values = Array(values).map { |x| RDF::URI.new(x) } + + sub.class.model_settings[:attributes][key.to_sym][:enforce].delete(:uri) + sub.send("#{key}=", is_array ? [] : nil) + sub.save rescue stop_to_fix('not valid submission') + sub.class.model_settings[:attributes][key.to_sym][:enforce].push(:uri) + sub.send("#{key}=", is_array ? values : values.first) + end + + if sub.valid? + sub.save rescue stop_to_fix('not valid submission') + log_info ">> #{sub.id} migrated successfully" + else + stop_to_fix "#{sub.id} migration failed submission not valid" + end + + log_info "> Submission #{sub.id} migration ended" + end + + private + + def attribute_error?(sub, attr, error_type = nil) + sub.errors.keys.include?(attr) && (error_type.nil? || sub.errors[attr].keys.include?(error_type)) + end + + def logger + @logger + end + + def admin_user(username = 'admin') + user = LinkedData::Models::User.find(username).first + raise "The user #{username} does not exist" if user.nil? + user.bring_remaining + end + + def array_migrate(sub, attr, new_val) + old_val = sub.send(attr.to_s) + sub.send("#{attr}=", (Array(old_val) + [new_val]).uniq) if new_val + end + + def not_valid?(sub, msg) + unless sub.valid? + log_error msg + log_error "Submission #{sub.id} errors: #{sub.errors}" + true + end + end + + def stop_to_fix(msg) + log_error "Stop to fix #{msg}" + caller_binding = binding.of_caller(1) + binding.pry(caller_binding) + log_error "End stop to fix" + end + + def log_error(msg) + logger.error "> #{msg}" + end + + def log_info(msg) + logger.info "> #{msg}" + end + +end + +logger = Logger.new(options[:logfile]) +# a formatter to write simultaneously into a file and stout +logger.formatter = proc do |severity, datetime, progname, msg| + out = "#{severity} [#{datetime}] #{msg} \n" + puts out + out +end + +begin + puts "AgroPortal migration details are logged to #{options[:logfile]}" + + if options[:migrate_all] + submissions = LinkedData::Models::Ontology.all.each { |o| o.latest_submission } + else + submissions = options[:ontologies].map do |acronym| + ont = LinkedData::Models::Ontology.find(acronym).first + ont.latest_submission(status: :any) + end + end + + submissions.each do |sub| + AgroPortalMigrator.new(submission: sub, logger: logger).migrate_submission + end + +rescue Exception => e + logger.error "Failed, exception: #{e.to_json}." + binding.pry + exit(1) +end +