diff --git a/Gemfile.lock b/Gemfile.lock index cb652588..570ae5bd 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/ontoportal-lirmm/goo.git - revision: f9b73ce6b6fac92bffd0a2a1b9dc9ba9266acf79 + revision: c3456c45c12ed92d4a3ae43cac7c1d4cdbf290b6 branch: development specs: goo (0.0.2) @@ -32,7 +32,7 @@ GEM multi_json (~> 1.3) thread_safe (~> 0.1) tzinfo (~> 0.3.37) - addressable (2.8.1) + addressable (2.8.4) public_suffix (>= 2.0.2, < 6.0) ansi (1.5.0) ast (2.4.2) @@ -40,7 +40,7 @@ GEM builder (3.2.4) coderay (1.1.3) concurrent-ruby (1.2.2) - connection_pool (2.3.0) + connection_pool (2.4.0) cube-ruby (0.0.3) daemons (1.4.1) date (3.3.3) @@ -123,8 +123,8 @@ GEM oj (2.18.5) omni_logger (0.1.4) logger - parallel (1.22.1) - parser (3.2.1.1) + parallel (1.23.0) + parser (3.2.2.1) ast (~> 2.4.1) pony (1.13.1) mail (>= 2.0) @@ -143,9 +143,9 @@ GEM addressable (>= 2.2) redis (5.0.6) redis-client (>= 0.9.0) - redis-client (0.14.0) + redis-client (0.14.1) connection_pool - regexp_parser (2.7.0) + regexp_parser (2.8.0) request_store (1.5.1) rack (>= 1.4) rest-client (2.1.0) @@ -156,17 +156,17 @@ GEM rexml (3.2.5) rsolr (1.1.2) builder (>= 2.1.2) - rubocop (1.48.1) + rubocop (1.50.2) json (~> 2.3) parallel (~> 1.10) parser (>= 3.2.0.0) rainbow (>= 2.2.2, < 4.0) regexp_parser (>= 1.8, < 3.0) rexml (>= 3.2.5, < 4.0) - rubocop-ast (>= 1.26.0, < 2.0) + rubocop-ast (>= 1.28.0, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 3.0) - rubocop-ast (1.27.0) + rubocop-ast (1.28.0) parser (>= 3.2.1.0) ruby-progressbar (1.13.0) ruby2_keywords (0.0.5) @@ -183,13 +183,13 @@ GEM systemu (2.6.5) test-unit-minitest (0.9.1) minitest (~> 4.7) - thin (1.8.1) + thin (1.8.2) daemons (~> 1.0, >= 1.0.9) eventmachine (~> 1.0, >= 1.0.4) rack (>= 1, < 3) thread_safe (0.3.6) timeout (0.3.2) - tzinfo (0.3.61) + tzinfo (0.3.62) unf (0.1.4) unf_ext unf_ext (0.0.8.2) @@ -199,7 +199,6 @@ GEM PLATFORMS x86_64-darwin-21 - x86_64-linux DEPENDENCIES activesupport (~> 4) @@ -233,4 +232,4 @@ DEPENDENCIES thin BUNDLED WITH - 2.3.15 + 2.3.23 diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index 31333c0f..4fa3f351 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -21,7 +21,9 @@ class OntologySubmission < LinkedData::Models::Base include SKOS::RootsFetcher FILES_TO_DELETE = ['labels.ttl', 'mappings.ttl', 'obsolete.ttl', 'owlapi.xrdf', 'errors.log'] + FOLDERS_TO_DELETE = ['unzipped'] FLAT_ROOTS_LIMIT = 1000 + FILE_SIZE_ZIPPING_THRESHOLD = 100 * 1024 * 1024 # 100MB model :ontology_submission, scheme: File.join(__dir__, '../../../config/schemes/ontology_submission.yml'), name_with: ->(s) { submission_id_generator(s) } @@ -178,8 +180,8 @@ class OntologySubmission < LinkedData::Models::Base attribute :ontology, type: :ontology, enforce: [:existence] # Hypermedia settings - embed :contact, :ontology, :metrics - embed_values :submissionStatus => [:code], :hasOntologyLanguage => [:acronym] + embed :contact, :ontology + embed_values :submissionStatus => [:code], :hasOntologyLanguage => [:acronym], :metrics => [:classes, :individuals, :properties] serialize_default :contact, :ontology, :hasOntologyLanguage, :released, :creationDate, :homepage, :publication, :documentation, :version, :description, :status, :submissionId @@ -467,6 +469,25 @@ def delete_old_submission_files submission_files.push(csv_path) submission_files.push(parsing_log_path) unless parsing_log_path.nil? FileUtils.rm(submission_files, force: true) + + submission_folders = FOLDERS_TO_DELETE.map { |f| File.join(path_to_repo, f) } + submission_folders.each {|d| FileUtils.remove_dir(d) if File.directory?(d)} + end + + def zip_submission_uploaded_file + self.bring(:uploadFilePath) if self.bring?(:uploadFilePath) + + return self.uploadFilePath if zipped? + return self.uploadFilePath if self.uploadFilePath.nil? || self.uploadFilePath.empty? + + + return self.uploadFilePath if File.size(self.uploadFilePath) < FILE_SIZE_ZIPPING_THRESHOLD + + + old_path = self.uploadFilePath + new_path = Utils::FileHelpers.zip_file(old_path) + FileUtils.rm(old_path, force: true) + new_path end # accepts another submission in 'older' (it should be an 'older' ontology version) @@ -1004,6 +1025,25 @@ def archived? return ready?(status: [:archived]) end + def archive_submission + self.submissionStatus = nil + status = LinkedData::Models::SubmissionStatus.find("ARCHIVED").first + add_submission_status(status) + + # Delete everything except for original ontology file. + ontology.bring(:submissions) + submissions = ontology.submissions + unless submissions.nil? + submissions.each { |s| s.bring(:submissionId) } + submission = submissions.sort { |a, b| b.submissionId <=> a.submissionId }[0] + # Don't perform deletion if this is the most recent submission. + if self.submissionId < submission.submissionId + delete_old_submission_files + self.uploadFilePath = zip_submission_uploaded_file + end + end + end + ################################################################ # Possible options with their defaults: # process_rdf = false @@ -1072,21 +1112,7 @@ def process_submission(logger, options = {}) status = nil if archive - self.submissionStatus = nil - status = LinkedData::Models::SubmissionStatus.find("ARCHIVED").first - add_submission_status(status) - - # Delete everything except for original ontology file. - ontology.bring(:submissions) - submissions = ontology.submissions - unless submissions.nil? - submissions.each { |s| s.bring(:submissionId) } - submission = submissions.sort { |a, b| b.submissionId <=> a.submissionId }[0] - # Don't perform deletion if this is the most recent submission. - if (self.submissionId < submission.submissionId) - delete_old_submission_files - end - end + archive_submission else if process_rdf # Remove processing status types before starting RDF parsing etc. diff --git a/lib/ontologies_linked_data/monkeypatches/object.rb b/lib/ontologies_linked_data/monkeypatches/object.rb index deadf71c..b9b97fd2 100644 --- a/lib/ontologies_linked_data/monkeypatches/object.rb +++ b/lib/ontologies_linked_data/monkeypatches/object.rb @@ -337,6 +337,8 @@ def embed_goo_objects_just_values(hash, attribute, value, options, &block) end def add_goo_values(goo_object, embedded_values, attributes_to_embed, options, &block) + return if goo_object.nil? + if attributes_to_embed.length > 1 embedded_values_hash = {} attributes_to_embed.each do |a| diff --git a/lib/ontologies_linked_data/security/access_control.rb b/lib/ontologies_linked_data/security/access_control.rb index 16cf30e8..bbfa8cd9 100644 --- a/lib/ontologies_linked_data/security/access_control.rb +++ b/lib/ontologies_linked_data/security/access_control.rb @@ -57,12 +57,12 @@ def access_for_all? def read_restricted_based_on?(based_on) if based_on.is_a?(Proc) instance_to_base_on = based_on.call(self) - restricted = instance_to_base_on.read_restricted? + restricted = instance_to_base_on ? instance_to_base_on.read_restricted? : false elsif based_on.is_a?(LinkedData::Models::Base) restricted = based_on.read_restricted? elsif based_on.is_a?(Symbol) instance_to_base_on = based_on.send(based_on) - restricted = instance_to_base_on.read_restricted? + restricted = instance_to_base_on ? instance_to_base_on.read_restricted? : false else restricted = false end diff --git a/lib/ontologies_linked_data/serializer.rb b/lib/ontologies_linked_data/serializer.rb index 0ca9c359..c1d9f630 100644 --- a/lib/ontologies_linked_data/serializer.rb +++ b/lib/ontologies_linked_data/serializer.rb @@ -84,7 +84,9 @@ def self.response(options = {}) end def self.serialize(type, obj, params, request) - lang = params['lang'] || params['language']|| Goo.main_languages.first + + lang = self.get_language(params) + only = params['display'] || [] only = only.split(',') unless only.is_a?(Array) all = only[0] == 'all' @@ -106,5 +108,11 @@ def self.print_stacktrace? end end + def self.get_language(params) + lang = params['lang'] || params['language'] || Goo.main_languages&.first.to_s || 'en' + lang = lang.split(',').map {|l| l.downcase.to_sym} + return lang.length == 1 ? lang.first : lang + end + end end \ No newline at end of file diff --git a/lib/ontologies_linked_data/serializers/json.rb b/lib/ontologies_linked_data/serializers/json.rb index 890f651e..55782cee 100644 --- a/lib/ontologies_linked_data/serializers/json.rb +++ b/lib/ontologies_linked_data/serializers/json.rb @@ -6,8 +6,11 @@ class JSON CONTEXTS = {} def self.serialize(obj, options = {}) + + hash = obj.to_flex_hash(options) do |hash, hashed_obj| current_cls = hashed_obj.respond_to?(:klass) ? hashed_obj.klass : hashed_obj.class + result_lang = self.get_languages(get_object_submission(hashed_obj), options[:lang]) if result_lang.nil? # Add the id to json-ld attribute if current_cls.ancestors.include?(LinkedData::Hypermedia::Resource) && !current_cls.embedded? && hashed_obj.respond_to?(:id) @@ -27,7 +30,7 @@ def self.serialize(obj, options = {}) hash["links"].merge!(generate_links_context(hashed_obj)) if generate_context?(options) end end - + # Generate context if current_cls.ancestors.include?(Goo::Base::Resource) && !current_cls.embedded? if generate_context?(options) @@ -40,13 +43,35 @@ def self.serialize(obj, options = {}) context = {"@context" => context_hash} hash.merge!(context) end - hash['@context']['@language'] = options[:lang] if hash['@context'] + hash['@context']['@language'] = result_lang if hash['@context'] end MultiJson.dump(hash) end private + def self.get_object_submission(obj) + obj.class.attributes.include?(:submission) ? obj.submission : nil + end + + def self.get_languages(submission, user_languages) + result_lang = user_languages + + if submission + submission.bring :naturalLanguage + languages = get_submission_languages(submission.naturalLanguage) + # intersection of the two arrays , if the requested language is not :all + result_lang = user_languages == :all ? languages : Array(user_languages) & languages + result_lang = result_lang.first if result_lang.length == 1 + end + + result_lang + end + + def self.get_submission_languages(submission_natural_language = []) + submission_natural_language.map { |natural_language| natural_language["iso639"] && natural_language.split('/').last[0..1].to_sym }.compact + end + def self.type(current_cls, hashed_obj) if current_cls.respond_to?(:type_uri) # For internal class diff --git a/lib/ontologies_linked_data/utils/file.rb b/lib/ontologies_linked_data/utils/file.rb index e0866df3..dd517877 100644 --- a/lib/ontologies_linked_data/utils/file.rb +++ b/lib/ontologies_linked_data/utils/file.rb @@ -14,15 +14,7 @@ def initialize(gz) self.name = gz.orig_name end end - - def self.gzip?(file_path) - file_path = file_path.to_s - unless File.exist? file_path - raise ArgumentError, "File path #{file_path} not found" - end - file_type = `file --mime -b #{Shellwords.escape(file_path)}` - return file_type.split(";")[0] == "application/x-gzip" - end + def self.zip?(file_path) file_path = file_path.to_s @@ -88,6 +80,21 @@ def self.unzip(file_path, dst_folder) extracted_files end + def self.zip_file(file_path) + return file_path if self.zip?(file_path) + + zip_file_path = "#{file_path}.zip" + Zip::File.open(zip_file_path, Zip::File::CREATE) do |zipfile| + # Add the file to the zip + begin + zipfile.add(File.basename(file_path), file_path) + rescue Zip::EntryExistsError + end + + end + zip_file_path + end + def self.automaster?(path, format) self.automaster(path, format) != nil end diff --git a/test/models/test_ontology_submission.rb b/test/models/test_ontology_submission.rb index 9db71580..462959a3 100644 --- a/test/models/test_ontology_submission.rb +++ b/test/models/test_ontology_submission.rb @@ -342,6 +342,9 @@ def test_process_submission_archive parse_options = { process_rdf: false, index_search: false, index_commit: false, run_metrics: false, reasoning: false, archive: true } + old_threshold = LinkedData::Models::OntologySubmission::FILE_SIZE_ZIPPING_THRESHOLD + LinkedData::Models::OntologySubmission.const_set(:FILE_SIZE_ZIPPING_THRESHOLD, 0) + ont_count, ont_acronyms, ontologies = create_ontologies_and_submissions(ont_count: 1, submission_count: 2, process_submission: true, acronym: 'NCBO-545') @@ -371,6 +374,7 @@ def test_process_submission_archive # Process one prior to latest submission. Some files should be deleted. old_sub = sorted_submissions.last + old_file_path = old_sub.uploadFilePath old_sub.process_submission(Logger.new(old_sub.parsing_log_path), parse_options) assert old_sub.archived? @@ -391,6 +395,13 @@ def test_process_submission_archive assert_equal false, File.file?(old_sub.parsing_log_path), %-File deletion failed for '#{old_sub.parsing_log_path}'- + + assert_equal false, File.file?(old_file_path), + %-File deletion failed for '#{old_file_path}'- + + assert old_sub.zipped? + assert File.file?(old_sub.uploadFilePath) + LinkedData::Models::OntologySubmission.const_set(:FILE_SIZE_ZIPPING_THRESHOLD, old_threshold) end def test_submission_diff_across_ontologies @@ -455,6 +466,45 @@ def test_index_properties assert_equal 0, res["response"]["numFound"] end + def test_zipped_submission_process + acronym = "PIZZA" + name = "PIZZA Ontology" + ontologyFile = "./test/data/ontology_files/pizza.owl.zip" + archived_submission = nil + 2.times do |i| + id = 20 + i + ont_submision = LinkedData::Models::OntologySubmission.new({ :submissionId => id}) + assert (not ont_submision.valid?) + assert_equal 4, ont_submision.errors.length + uploadFilePath = LinkedData::Models::OntologySubmission.copy_file_repository(acronym, id,ontologyFile) + ont_submision.uploadFilePath = uploadFilePath + owl, bro, user, contact = submission_dependent_objects("OWL", acronym, "test_linked_models", name) + ont_submision.released = DateTime.now - 4 + ont_submision.hasOntologyLanguage = owl + ont_submision.ontology = bro + ont_submision.contact = [contact] + assert ont_submision.valid? + ont_submision.save + parse_options = {process_rdf: true, reasoning: true, index_search: false, run_metrics: false, diff: true} + begin + tmp_log = Logger.new(TestLogFile.new) + ont_submision.process_submission(tmp_log, parse_options) + rescue Exception => e + puts "Error, logged in #{tmp_log.instance_variable_get("@logdev").dev.path}" + raise e + end + archived_submission = ont_submision if i.zero? + end + parse_options = { process_rdf: false, index_search: false, index_commit: false, + run_metrics: false, reasoning: false, archive: true } + archived_submission.process_submission(Logger.new(TestLogFile.new), parse_options) + + assert_equal false, File.file?(archived_submission.zip_folder), + %-File deletion failed for '#{archived_submission.zip_folder}'- + + + + end def test_submission_parse_zip skip if ENV["BP_SKIP_HEAVY_TESTS"] == "1"