-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcsv_export_service.rb
84 lines (72 loc) · 3.13 KB
/
csv_export_service.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
module CdmMigrator
class CsvExportService
# A service for exporting work and file set metadata to a csv. You can customize headers/fields by
# overriding the included_fields method below. You can also specify different work types by changing/overriding
# the available_work_types method.
# @param [Array <Class>] - the available work types, passed in from the controller, such as GenericWork
def initialize(work_types)
@work_types = work_types
end
# @param [Array <String>] - the work ids (for GenericWork or other work type) to export metadata for
# @param [String] - where to save the csv file to (filepath)
def write_to_csv(work_ids, filepath)
File.open(filepath, 'w') { |file| file.write(rows_for(work_ids).map(&:to_csv).join) }
end
def csv_for(work_ids)
rows_for(work_ids).map(&:to_csv).join
end
# @param [Array <String>] - the work ids (for GenericWork or other work type) to export metadata for
# @return [Array <Array>] - An array of arrays where each nested array contains the metadata
# for a work or file set and corresponds to a csv row.
def rows_for(work_ids)
csv_array = [csv_headers]
work_ids.each_with_object(csv_array).each do |work_id|
doc = ::SolrDocument.find work_id
csv_array << row_for(doc)
doc._source[:file_set_ids_ssim].each do |file_id|
file_doc = ::SolrDocument.find file_id
csv_array << row_for(file_doc)
end
end
end
# @param [SolrDocument] - Any model that has the properties listed in #included_fields (e.g. GenericWork, FileSet)
# @return [Array <String>] - the csv row for the given document
def row_for(document)
line_hash = {}
line_hash['type'] = document._source[:has_model_ssim].first
included_fields.each do |field|
line_hash[field] = create_cell document, field
end
line_hash.values_at(*csv_headers).map { |cell| cell.blank? ? '' : cell }
end
private
# @return [Array <String>]
def included_fields
@work_types.map { |work| work.new.attributes.keys }.flatten.uniq - excluded_fields
end
def excluded_fields
%w[date_uploaded date_modified head tail state proxy_depositor on_behalf_of arkivo_checksum label
relative_path import_url part_of resource_type access_control_id
representative_id thumbnail_id rendering_ids admin_set_id embargo_id
lease_id]
end
# @param [SolrDocument] - the document to create a cell for
# @param [String or Symbol] - the name of the field
# NOTE: any fields you want to include must also be added to the SolrDocument model as methods
# because of the check for respond_to?
def create_cell document, field
properties = document.hydra_model.properties
if document.respond_to?(field.to_sym)
if properties.keys.include?(field) && properties[field].multiple? && field.to_sym != :doi
document.send(field).join('|')
else
document.send(field)
end
end
end
# @return [Array <String>] - the heaaders for the csv
def csv_headers
['type'] + included_fields
end
end
end