Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CIDC-1067 some perfecting tweaks #568

Merged
merged 1 commit into from
Sep 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 36 additions & 16 deletions cidc_api/models/templates/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,9 @@ def get_column_mapping(self, value, context: dict = {}) -> Dict[Column, Any]:
] = self.gcs_uri_format.format(**format_dict)

# Finally, handle process_as
context.update(column_mapping)
for column, process in self.process_as.items():
column_mapping[column] = process(value, column_mapping)
context.update(column_mapping)
column_mapping[column] = process(value, context)

except Exception as e:
raise Exception(
Expand Down Expand Up @@ -276,12 +276,17 @@ def __init__(
self.worksheet_configs = worksheet_configs
self.constants = constants

def read_and_insert(self, filename: Union[str, BinaryIO]):
def read_and_insert(self, filename: Union[str, BinaryIO]) -> List[Exception]:
"""
Extract the models from a populated instance of this template and try to
insert them, rolling back and returning a list of errors if any are encountered.
"""
return insert_record_batch(self.read(filename))
try:
records = self.read(filename)
except Exception as e:
return [e]
else:
return insert_record_batch(records)

def read(
self, filename: Union[str, BinaryIO]
Expand All @@ -305,41 +310,56 @@ def read(
# split the preamble and data rows
preamble_rows = []
data_rows = []
data_header = None
for row in workbook[config.name].iter_rows():
row_type = row_type_from_string(row[0].value)
row_type = row_type_from_string(row[0].value) # row[0]is the type
if row_type == RowType.PREAMBLE:
preamble_rows.append(row)
preamble_rows.append(row[1:])
elif row_type == RowType.DATA:
data_rows.append(row)
# if no entries, skip it
if not all(cell.value is None for cell in row[1:]):
data_rows.append(row[1:])
elif row_type == RowType.HEADER:
data_header = row[1:]

if len(preamble_rows) != len(config.preamble):
raise Exception(
f"Expected {len(config.preamble)} preamble rows but saw {len(preamble_rows)}"
f"Expected {len(config.preamble)} preamble rows but saw {len(preamble_rows)} on {config.name}"
)

# {<column instance>: <processed value, ...}
for entry, row in zip(config.preamble, preamble_rows):
cell = row[2] # row[0] is the type, row[1] is the title
preamble_values = {row[0].value: row[1].value for row in preamble_rows}
for entry in config.preamble:
preamble_dict.update(
entry.get_column_mapping(cell.value)
entry.get_column_mapping(preamble_values[entry.name])
) # process the value
model_dicts.append(preamble_dict)

# combine and flatten the lists of configs across all data_sections
data_configs = [
entry for entries in config.data_sections.values() for entry in entries
]
if len(data_rows) and data_header is None:
raise Exception(f"Received #data rows without a #header")
elif len(data_rows) and len(data_header) != len(data_configs):
raise Exception(
f"Expected {len(data_configs)} data columns but saw {len(data_header)} on {config.name}"
)

for row in data_rows:
# if no entries, skip it
if all(cell.value is None for cell in row[1:]):
continue
data_values = {
title_cell.value: data_cell.value
for title_cell, data_cell in zip(data_header, row)
}

# context will be updated by every cell for each row,
# but preamble_dict needs to persist unchanged
context = preamble_dict.copy()
data_dict = {}
for cell, entry in zip(row[1:], data_configs): # row[0] is the type
data_dict.update(entry.get_column_mapping(cell.value, context))
for entry in data_configs:
data_dict.update(
entry.get_column_mapping(data_values[entry.name], context)
)
context.update(data_dict)

model_dicts.append(data_dict)
Expand Down
119 changes: 63 additions & 56 deletions cidc_api/models/templates/manifest_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,45 +69,68 @@ def _BaseManifestTemplate(
upload_type: str,
filled_by_biorepository: List[Entry],
filled_by_cimac_lab: List[Entry],
essential_patient_data: bool = False,
constants: Dict[Column, Any] = {},
):
return MetadataTemplate(
upload_type=upload_type,
purpose="manifest",
worksheet_configs=[
WorksheetConfig(
"Shipment",
[
worksheets = [
WorksheetConfig(
"Shipment",
[
Entry(
Shipment.trial_id,
name="protocol identifier",
process_as={
Participant.trial_id: identity,
Sample.trial_id: identity,
},
),
Entry(
Shipment.manifest_id,
process_as={Sample.shipment_manifest_id: identity},
),
Entry(Shipment.assay_priority),
Entry(
Shipment.assay_type, process_as={Sample.intended_assay: identity},
),
Entry(Shipment.receiving_party),
Entry(Shipment.courier),
Entry(Shipment.tracking_number),
Entry(Shipment.account_number),
Entry(Shipment.shipping_condition),
Entry(Shipment.date_shipped),
Entry(Shipment.date_received),
Entry(Shipment.quality_of_shipment),
Entry(Shipment.ship_from),
Entry(Shipment.ship_to),
],
{},
),
WorksheetConfig(
"Samples",
[],
{
"IDs": [
Entry(Sample.shipping_entry_number),
Entry(Sample.collection_event_name),
Entry(Participant.cohort_name),
Entry(Participant.trial_participant_id),
Entry(Sample.parent_sample_id),
Entry(Sample.processed_sample_id),
Entry(
Shipment.trial_id,
name="protocol identifier",
Sample.cimac_id,
process_as={
Participant.trial_id: identity,
Sample.trial_id: identity,
Sample.cimac_participant_id: cimac_id_to_cimac_participant_id,
Participant.cimac_participant_id: cimac_id_to_cimac_participant_id,
},
),
Entry(
Shipment.manifest_id,
process_as={Sample.shipment_manifest_id: identity},
),
Entry(Shipment.assay_priority),
Entry(
Shipment.assay_type,
process_as={Sample.intended_assay: identity},
),
Entry(Shipment.receiving_party),
Entry(Shipment.courier),
Entry(Shipment.tracking_number),
Entry(Shipment.account_number),
Entry(Shipment.shipping_condition),
Entry(Shipment.date_shipped),
Entry(Shipment.date_received),
Entry(Shipment.quality_of_shipment),
Entry(Shipment.ship_from),
Entry(Shipment.ship_to),
],
{},
),
"Filled by Biorepository": filled_by_biorepository,
"Filled by CIMAC Lab": filled_by_cimac_lab,
},
),
]
if essential_patient_data:
worksheets.append(
WorksheetConfig(
"Essential Patient Data",
[],
Expand Down Expand Up @@ -139,30 +162,12 @@ def _BaseManifestTemplate(
Entry(Participant.ethnicity),
],
},
),
WorksheetConfig(
"Samples",
[],
{
"IDs": [
Entry(Sample.shipping_entry_number),
Entry(Sample.collection_event_name),
Entry(Participant.cohort_name),
Entry(Participant.trial_participant_id),
Entry(Sample.parent_sample_id),
Entry(Sample.processed_sample_id),
Entry(
Sample.cimac_id,
process_as={
Participant.cimac_participant_id: cimac_id_to_cimac_participant_id
},
),
],
"Filled by Biorepository": filled_by_biorepository,
"Filled by CIMAC Lab": filled_by_cimac_lab,
},
),
],
)
)
return MetadataTemplate(
upload_type=upload_type,
purpose="manifest",
worksheet_configs=worksheets,
constants=constants,
)

Expand Down Expand Up @@ -193,6 +198,7 @@ def _BaseManifestTemplate(
Entry(Sample.residual_sample_use),
Entry(Sample.comments),
],
essential_patient_data=True,
)

TissueSlideManifest = _BaseManifestTemplate(
Expand All @@ -219,4 +225,5 @@ def _BaseManifestTemplate(
Entry(Sample.residual_sample_use),
Entry(Sample.comments),
],
essential_patient_data=False,
)
2 changes: 1 addition & 1 deletion cidc_api/models/templates/model_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def merge(self, other):
setattr(self, column.name, incoming)
elif incoming is not None and current != incoming:
raise Exception(
f"found conflicting values for {self.__tablename__}.{column.name}: {current}!={other}"
f"found conflicting values for {self.__tablename__}.{column.name} for {self.primary_key_values()}: {current}!={incoming}"
)

@classmethod
Expand Down
22 changes: 10 additions & 12 deletions cidc_api/models/templates/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,13 +155,12 @@ def insert_record_batch(
# in case they're needed for later fk's
session.flush()

if not hold_commit:
if dry_run or len(errors):
session.rollback()
else:
session.commit()
else:
if hold_commit:
session.flush()
elif dry_run or len(errors):
session.rollback()
else:
session.commit()

return errors

Expand Down Expand Up @@ -192,13 +191,12 @@ def remove_record_batch(
except Exception as e:
errors.append(e)

if not hold_commit:
if dry_run or len(errors):
session.rollback()
else:
session.commit()
else:
if hold_commit:
session.flush()
elif dry_run or len(errors):
session.rollback()
else:
session.commit()

return errors

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@
"cidc_api.models.files",
],
url="https://github.com/CIMAC-CIDC/cidc_api-gae",
version="0.25.3",
version="0.25.4",
zip_safe=False,
)
Binary file modified tests/models/templates/examples/pbmc_manifest.xlsx
Binary file not shown.
Binary file modified tests/models/templates/examples/tissue_slide_manifest.xlsx
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/models/templates/test_assay_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def mock_get_current_user(monkeypatch):

def test_hande_assay(clean_db, cidc_api, monkeypatch, tmp_path):
# test write and empty read
f = tmp_path / "pbmc_template.xlsx"
f = tmp_path / "hande_assay.xlsx"
with cidc_api.app_context():
HandeAssay.write(f)

Expand Down
20 changes: 0 additions & 20 deletions tests/models/templates/test_manifest_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,14 +214,8 @@ def test_tissue_slide_template(clean_db, cidc_api, tmp_path):
assert partic.trial_participant_id == f"TTTP0{i+1}"
if i == 0:
assert partic.cohort_name == "Arm_Z"
assert partic.gender == "Female"
assert partic.race == "Asian"
assert partic.ethnicity == "Hispanic or Latino"
else: # i == 1
assert partic.cohort_name == "Arm_A"
assert partic.gender == "Male"
assert partic.race == "Native Hawaiian/Pacific Islander"
assert partic.ethnicity == "Unknown"

assert len(samples) == 4
for i, sample in enumerate(samples):
Expand All @@ -238,18 +232,8 @@ def test_tissue_slide_template(clean_db, cidc_api, tmp_path):
assert sample.shipment_manifest_id == shipment.manifest_id
assert sample.shipping_entry_number == i + 1
assert sample.box_number == "2"
assert (
sample.surgical_pathology_report_id
== f"Surgical pathology report {i+1}"
)
assert sample.clinical_report_id == f"clinical report {i+1}"
assert sample.parent_sample_id == f"TRIALGROUP {i+1}"
assert sample.processed_sample_id == "BIOBANK 1"
assert sample.site_description == "ANAL CANAL & ANUS"
assert sample.topography_code == "C00.1"
assert sample.topography_description == "LIP"
assert sample.histology_behavior == "8004/3"
assert sample.histology_behavior_description == "Neoplasm, malignant"
assert sample.sample_location == f"A{i+1}"
assert sample.type_of_sample == "Tumor Tissue"
assert (
Expand All @@ -271,10 +255,6 @@ def test_tissue_slide_template(clean_db, cidc_api, tmp_path):
assert sample.quality_of_sample == "Pass"
assert sample.sample_replacement == "Replacement Not Requested"
assert sample.residual_sample_use == "Not Reported"
assert (
sample.diagnosis_verification
== "Local review consistent with diagnostic pathology report"
)
assert sample.intended_assay == shipment.assay_type

if i == 0:
Expand Down