-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathbrapi_to_isa.py
executable file
·490 lines (407 loc) · 23.4 KB
/
brapi_to_isa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
import datetime
import argparse
import datetime
import errno
import logging
import os
import sys
import json
import re
from collections import defaultdict
from isatools.convert import isatab2json
from isatools import isatab
from isatools.model import *
from brapi_client import BrapiClient
from brapi_to_isa_converter import BrapiToIsaConverter, att_test, PAR_NAinData, PAR_NAinBrAPI, PAR_defaultObsLvl, PAR_suppObsLvl
__author__ = 'proccaserra (Philippe Rocca-Serra)'
__author__ = 'cpommier (Cyril Pommier)'
__author__ = 'bedroesb (Bert Droesbeke)'
__author__ = 'gcornut (Guillaume Cornut)'
__author__ = 'terazus (Dominique Batista)'
log_file = "brapilog.log"
# logging.basicConfig(filename=log_file,
# filemode='a',
# level=logging.DEBUG)
logger = logging.getLogger('brapi_converter')
logger.debug('This message should go to the log file')
logger.info('Starting now...')
logger.warning('And this, too')
#logger.setLevel(logging.INFO)
logger.setLevel(logging.DEBUG)
file4log = logging.FileHandler(log_file)
file4log.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file4log.setFormatter(formatter)
logger.addHandler(file4log)
parser = argparse.ArgumentParser()
parser.add_argument('-e', '--endpoint', help="a BrAPi server endpoint", type=str)
parser.add_argument('-t', '--trials', help="comma separated list of trial Ids. 'all' to get all trials (not recommended)", type=str, action='append')
parser.add_argument('-s', '--studies', help="comma separated list of study Ids", type=str, action='append')
parser.add_argument('-J', '--json', help="flag to deactivate json dump", action="store_false")
parser.add_argument('-V', '--validator', help="flag to deactivate validation", action="store_false")
parser.add_argument('-F', '--flatten', help="flag to generate flattened data file", action="store_true")
SERVER = 'https://test-server.brapi.org/brapi/v1/'
logger.debug('Argument List:' + str(sys.argv))
args = parser.parse_args()
TRIAL_IDS = args.trials
STUDY_IDS = args.studies
JSON_boolean = args.json
VALIDATOR_boolean = args.validator
FLATTEN_boolean = args.flatten
if args.endpoint:
SERVER = args.endpoint
logger.info("\n----------------\ntrials IDs to be exported : "
+ str(TRIAL_IDS) + "\nstudy IDs to be exported : "
+ str(STUDY_IDS) + "\nTarget endpoint : "
+ str(SERVER) + "\n----------------" )
def create_study_sample_and_assay(client, brapi_study_id, isa_study, growth_protocol, phenotyping_protocol, data_transformation_protocol, OBSERVATIONUNITLIST):
spat_dist_mapping_dictionary = {
"X": "X",
"Y": "Y",
"blockNumber": "block",
"plotNumber": "plot",
"plantNumber": "plant",
"replicate": "replicate"
}
# connecting the correct observation level to the correct assayobject
# NOTE observation level is temporarily stored inside isa_study.assays[i].characteristic_categories[0] better field available?
obs_level_to_assay = {}
for k,assay in enumerate(isa_study.assays):
obs_level_to_assay[assay.characteristic_categories[0]] = k
treatments = defaultdict(list)
allready_converted_obs_unit = [] # Allow to handle multiyear observation units NOTE (INRA specific)
for obs_unit in OBSERVATIONUNITLIST:
if 'observationLevel' in obs_unit and obs_unit['observationLevel']:
i = obs_level_to_assay[obs_unit['observationLevel'].lower()]
obslvl = obs_unit['observationLevel'].lower()
else:
i = 0
obslvl = PAR_defaultObsLvl
# Getting the relevant germplasm used for that observation event:
# ---------------------------------------------------------------
this_source = isa_study.get_source(obs_unit['germplasmName'])
if this_source and obs_unit['observationUnitName'] not in allready_converted_obs_unit:
this_isa_sample = Sample(
name= obs_unit['observationUnitName'],
derives_from=[this_source])
allready_converted_obs_unit.append(obs_unit['observationUnitName'])
c = Characteristic(category=OntologyAnnotation(term="Observation Unit Type"),
value=OntologyAnnotation(term=obslvl,
term_source="",
term_accession=""))
this_isa_sample.characteristics.append(c)
spat_dist = []
for key in spat_dist_mapping_dictionary:
if att_test(obs_unit,key):
spat_dist.append(spat_dist_mapping_dictionary[key] + ':' + obs_unit[key])
if att_test(obs_unit,'observationLevels'):
for lvl in obs_unit['observationLevels'].split(", "):
if len(lvl.split(":")) == 2:
a, b = lvl.split(":")
spat_dist.append(a + ':' + b)
elif len(lvl.split(":")) == 1:
spat_dist.append(lvl)
spat_dist_str = ';'.join(spat_dist)
if spat_dist:
c = Characteristic(category=OntologyAnnotation(term="Spatial Distribution"),
value=OntologyAnnotation(term=spat_dist_str,
term_source="",
term_accession=""))
this_isa_sample.characteristics.append(c)
# Looking for treatment in BRAPI and mapping to ISA samples
# ---------------------------------------------------------
if att_test(obs_unit, 'treatments'):
treatmentbuffer = defaultdict(list)
for treatment in obs_unit['treatments']:
if att_test(treatment,'factor') and att_test(treatment, 'modality'):
if str(treatment['modality']) not in treatmentbuffer[treatment['factor']]:
treatmentbuffer[treatment['factor']].append(str(treatment['modality']))
for factor,modality in treatmentbuffer.items():
modalities = ','.join(modality)
if modalities not in treatments[factor]:
treatments[factor].append(modalities)
f = StudyFactor(name=factor, factor_type=OntologyAnnotation(term=factor))
fv = FactorValue(factor_name=f,
value=OntologyAnnotation(term=modalities,
term_source="",
term_accession=""))
this_isa_sample.factor_values.append(fv)
isa_study.samples.append(this_isa_sample)
# Creating the corresponding ISA sample entity for structure the document:
# ------------------------------------------------------------------------
growth_process = Process(executes_protocol=growth_protocol)
growth_process.inputs.append(this_source)
growth_process.outputs.append(this_isa_sample)
isa_study.process_sequence.append(growth_process)
# Assays at observation unit level
# --------------------------------
# !!!: fix isatab.py to access other protocol_type values to enable Assay Tab serialization
isa_study.assays[i].samples.append(this_isa_sample)
phenotyping_process = Process(executes_protocol=phenotyping_protocol)
phenotyping_process.inputs.append(this_isa_sample)
phenotyping_process.name = att_test(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower()
# Adding Parameter Value[Collection Date] column
# col_date_pp = ProtocolParameter(parameter_name=OntologyAnnotation(term="Collection Date"))
# col_date_pv = ParameterValue(category=col_date_pp,value=OntologyAnnotation(term=PAR_NAinBrAPI))
# sample_collection_process.parameter_values.append(col_date_pv)
# Adding Parameter Value[Sample Description] column
# sampl_des_pp = ProtocolParameter(parameter_name=OntologyAnnotation(term="Sample Description"))
# sampl_des_pv = ParameterValue(category=sampl_des_pp,value=OntologyAnnotation(term=PAR_NAinBrAPI))
# sample_collection_process.parameter_values.append(sampl_des_pv)
# Data Transformation
data_transformation_process = Process(executes_protocol=data_transformation_protocol)
# Adding Raw Data File column
RAW_datafile = DataFile(filename=PAR_NAinData,
label="Raw Data File",
generated_from=[this_isa_sample])
phenotyping_process.outputs.append(RAW_datafile)
data_transformation_process.inputs.append(RAW_datafile)
# Adding Derived Data File column
datafilename = 'd_' + str(brapi_study_id) + '_' + att_test(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() + '.txt'
DER_datafile = DataFile(filename=datafilename,
label="Derived Data File")
data_transformation_process.outputs.append(DER_datafile)
isa_study.assays[i].process_sequence.append(phenotyping_process)
plink(growth_process, phenotyping_process)
isa_study.assays[i].process_sequence.append(data_transformation_process)
plink(phenotyping_process, data_transformation_process)
# Mapping treatments to ISA study Factor Value:
# ---------------------------------------------
for factor, modalities in treatments.items():
f = StudyFactor(name=factor, factor_type=OntologyAnnotation(term=factor))
modality = ";".join(modalities)
f.comments.append(Comment(name="Study Factor Values",value=modality))
f.comments.append(Comment(name="Study Factor Description", value=PAR_NAinBrAPI))
isa_study.factors.append(f)
def write_records_to_file(this_study_id, records, this_directory, filetype, ObservationLevel=''):
logger.info('Writing to file')
# tdf_file = 'out/' + this_study_id
if ObservationLevel:
ObservationLevel = "_" + ObservationLevel
with open(this_directory + filetype + this_study_id + ObservationLevel + '.txt', 'w', encoding="utf-8") as fh:
for this_element in records:
# print(this_element)
fh.write(this_element + '\n')
fh.close()
def filenameFormat(trialName):
trialName = re.sub('[\s]+', '_', trialName)
return trialName
def get_output_path(path):
path = "outputdir/" + path + "/"
try:
if not os.path.exists(path):
os.makedirs(path)
except OSError as oserror:
logger.exception(oserror)
if oserror.errno != errno.EEXIST:
raise
return path
def get_trials( brapi_client : BrapiClient):
global TRIAL_IDS
global STUDY_IDS
if TRIAL_IDS:
return brapi_client.get_trials(TRIAL_IDS)
elif STUDY_IDS:
logger.debug("Got Study IDS : " + ','.join(STUDY_IDS))
TRIAL_IDS = []
for my_study_id in STUDY_IDS:
my_study = brapi_client.get_study(my_study_id)
if "trialDbId" in my_study.keys() and my_study["trialDbId"]:
TRIAL_IDS += my_study["trialDbId"]
elif "trialDbIds" in my_study.keys() and my_study["trialDbIds"]:
TRIAL_IDS += my_study["trialDbIds"]
logger.debug("Got the Following trial ids for the Study IDS : " + str(TRIAL_IDS))
if len(TRIAL_IDS) > 0:
return brapi_client.get_trials(TRIAL_IDS)
else:
return get_empty_trial()
else:
logger.info("Not enough parameters, provide TRIAL or STUDY IDs")
exit (1)
def get_empty_trial():
empty_trial = {
"trialDbId": "trial_less_study_" + STUDY_IDS[0],
"trialName": PAR_NAinData,
"trialType": "Project",
"endDate": "",
"startDate": "",
"datasetAuthorship": {
},
"studies":[]
}
#empty_trial_json = json.loads(empty_trial)
for my_study_id in STUDY_IDS:
empty_trial["studies"].append({"studyDbId": my_study_id})
yield from [empty_trial]
def main(arg=SERVER):
""" Given a SERVER value (and BRAPI isa_study identifier), generates an ISA-Tab document"""
client = BrapiClient(SERVER, logger)
converter = BrapiToIsaConverter(logger, SERVER)
# iterating through the trials held in a BRAPI server:
# for trial in client.get_trials(TRIAL_IDS):
for trial in get_trials(client):
logger.info('we start from a set of Trials')
investigation = Investigation()
output_directory = get_output_path(filenameFormat(trial['trialName']))
logger.info("Generating output in : " + output_directory)
# FILL IN TRIAL INFORMATION
investigation.identifier = trial['trialDbId']
investigation.title = trial['trialName']
#Investigation fields unavailable in BrAPI
investigation.description = att_test(trial, "trialDescription", PAR_NAinData)
investigation.submission_date = PAR_NAinBrAPI
investigation.public_release_date = PAR_NAinBrAPI
investigation.comments.append(Comment(name="License", value=PAR_NAinBrAPI))
if att_test(trial, 'contacts'):
for brapicontact in trial['contacts']:
#NOTE: brapi has just name attribute -> no separate first/last name
ContactName = brapicontact['name'].split(' ')
role = OntologyAnnotation(term=att_test(brapicontact, 'type', PAR_NAinData))
contact = Person(first_name=ContactName[0], last_name=' '.join(ContactName[1:]),
affiliation=att_test(brapicontact,'institutionName', PAR_NAinData), email=att_test(brapicontact,'email'), address=PAR_NAinBrAPI, roles=[role])
investigation.contacts.append(contact)
else:
role = OntologyAnnotation(term=PAR_NAinData)
contact = Person(first_name=PAR_NAinData, last_name=PAR_NAinData,
affiliation=PAR_NAinData, email=PAR_NAinData, address=PAR_NAinData, roles=[role])
investigation.contacts.append(contact)
investigation.comments.append(Comment(name="MIAPPE version", value="1.1"))
if att_test(trial, 'publications'):
for brapipublic in trial['publications']:
#This is BrAPI v1.3 specific (when older, skipped)
publication = Publication(doi=att_test(brapipublic, 'publicationPUI', PAR_NAinData))
publication.status = OntologyAnnotation(term="published")
investigation.publications.append(publication)
else:
publication = Publication(doi=PAR_NAinData)
publication.status = OntologyAnnotation(term=PAR_NAinData)
investigation.publications.append(publication)
# iterating through the BRAPI studies associated to a given BRAPI trial:
for brapi_study in trial['studies']:
germplasminfo = {}
brapi_study_id = str(brapi_study['studyDbId'])
try:
brapi_study_id.encode('ascii')
except:
logger.debug("Study " + brapi_study_id + " contains a non ascii character and will be skipped.")
continue
else:
#NOTE NEW: holding observationUnits in OBSERVATIONUNITLIST
OBSERVATIONUNITLIST = []
for i in client.get_study_observation_units(brapi_study_id):
OBSERVATIONUNITLIST.append(i)
obs_level, obs_levels = converter.get_obs_levels(brapi_study_id, OBSERVATIONUNITLIST)
# NB: this method always create an ISA Assay Type
isa_study, investigation = converter.create_isa_study(brapi_study_id, investigation, obs_level.keys())
investigation.studies.append(isa_study)
# creating the main ISA protocols:
# !!!: fix isatab.py to access other protocol_type values to enable Assay Tab serialization
# TODO: see https://github.com/ISA-tools/isa-api/blob/master/isatools/isatab.py#L886
phenotyping_protocol = Protocol(name="Phenotyping",
protocol_type=OntologyAnnotation(term="Phenotyping"))
isa_study.protocols.append(phenotyping_protocol)
growth_protocol = Protocol(name="Growth",
protocol_type=OntologyAnnotation(term="Growth"))
isa_study.protocols.append(growth_protocol)
# Sample protocol
# sample_collection_protocol = Protocol(name="Sampling",
# protocol_type=OntologyAnnotation(term="sample collection"))
# isa_study.protocols.append(sample_collection_protocol)
# col_date_pp = ProtocolParameter(parameter_name=OntologyAnnotation(term="Collection Date"))
# sample_collection_protocol.parameters.append(col_date_pp)
# sampl_des_pp = ProtocolParameter(parameter_name=OntologyAnnotation(term="Sample Description"))
# sample_collection_protocol.parameters.append(sampl_des_pp)
data_transformation_protocol = Protocol(name="Data Transformation",
protocol_type=OntologyAnnotation(term="Data Transformation"))
isa_study.protocols.append(data_transformation_protocol)
# Getting the list of all germplasms used in the BRAPI isa_study:
germplasms = client.get_study_germplasms(brapi_study_id)
# Iterating through the germplasm considered as biosource,
# For each of them, we retrieve their attributes and create isa characteristics
for germ in germplasms:
# Creating corresponding ISA biosources with is Creating isa characteristics from germplasm attributes.
# ------------------------------------------------------
source = Source(name=germ['germplasmName'], characteristics=converter.create_germplasm_chars(germ))
if germ['germplasmDbId'] not in germplasminfo:
germplasminfo[germ['germplasmDbId']] = [germ['accessionNumber']]
# Associating ISA sources to ISA isa_study object
isa_study.sources.append(source)
# Now dealing with BRAPI observation units and attempting to create ISA samples
create_study_sample_and_assay(client, brapi_study_id, isa_study, growth_protocol, phenotyping_protocol, data_transformation_protocol, OBSERVATIONUNITLIST)
# Writing isa_study to ISA-Tab format:
# ------------------------------------
try:
# isatools.isatab.dumps(investigation) # dumps() writes out the ISA
# !!!: fix isatab.py to access other protocol_type values to enable Assay Tab serialization
# !!!: if Assay Table is missing the 'Assay Name' field, remember to check protocol_type used !!!
isatab.dump(isa_obj=investigation, output_path=output_directory)
logger.info('ISA-TAB DUMP DONE!...')
except IOError as ioe:
logger.info('CONVERSION FAILED!...')
logger.info(str(ioe))
# Writing Trait Definition File:
# ------------------------------
try:
variable_records = converter.create_isa_tdf_from_obsvars(client.get_study_observed_variables(brapi_study_id))
write_records_to_file(this_study_id=str(brapi_study_id),
this_directory=output_directory,
records=variable_records,
filetype="t_")
except Exception as ioe:
logger.info('Trait definition file fails to generate!...')
logger.info(str(ioe))
# Getting Variable Data and writing Data File
# -------------------------------------------
for level, variables in obs_level.items():
try:
data_readings, data_readings_flat = converter.create_isa_obs_data_from_obsvars(OBSERVATIONUNITLIST, list(variables), level, germplasminfo, obs_levels, FLATTEN_boolean)
logger.info("Generating data files")
write_records_to_file(this_study_id=str(brapi_study_id), this_directory=output_directory, records=data_readings,
filetype="d_", ObservationLevel=level)
if FLATTEN_boolean:
write_records_to_file(this_study_id=str(brapi_study_id), this_directory=output_directory, records=data_readings_flat,
filetype="d_", ObservationLevel=level+'_flat')
except Exception as ioe:
logger.info('Data file fails to generate!...')
logger.info(str(ioe))
# Converting ISA-TAB to ISA-JSON format:
# --------------------------------------
if JSON_boolean:
try:
logger.info('Converting ISA-TAB to ISA-JSON format')
input_file_path = output_directory
output_file_path = output_directory + filenameFormat(trial['trialName']) + '.json'
isa_json = isatab2json.convert(
input_file_path, use_new_parser=True, validate_first=False)
with open(output_file_path, 'w') as out_fp:
json.dump(isa_json, out_fp, indent=4)
except Exception as ioe:
logger.info('Conversion to JSON failed!...')
logger.info(str(ioe))
# Validating ISA-TAB with configuration files
# -------------------------------------------
if VALIDATOR_boolean:
try:
isa_config_dir = "./isaconfig-phenotyping-basic"
isa_tab_dir = output_directory
logger.info('Validating isa-tab files against configuration files found in ' + isa_config_dir)
validation_log_path = output_directory + filenameFormat(trial['trialName']) + '_validation_log.json'
report = isatab.validate(open(os.path.join(isa_tab_dir, 'i_investigation.txt')), isa_config_dir)
with open(validation_log_path, 'w') as out_fp2:
json.dump(report, out_fp2, indent=4)
logger.info('VALIDATION FINISHED')
logger.info('The ISA-TAB validation log file can be found at: ' + validation_log_path)
except Exception as ioe:
logger.info('ISA-TAB validation failed!...')
logger.info(str(ioe))
logger.info('CONVERSION AND VALIDATION FINISHED')
#############################################
# MAIN METHOD TO START THE CONVERSION PROCESS
#############################################
""" starting up """
if __name__ == '__main__':
try:
main()
except Exception as e:
logging.exception(e)
sys.exit(1)