Skip to content

Commit

Permalink
Fix clinical data counts query/filter using new derived table (#10833)
Browse files Browse the repository at this point in the history
  • Loading branch information
alisman authored and haynescd committed Nov 24, 2024
1 parent a03ec54 commit 1051dea
Show file tree
Hide file tree
Showing 6 changed files with 215 additions and 84 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Repository;

import java.util.Collections;
import java.util.List;
import java.util.Map;

@Repository
public class StudyViewMyBatisRepository implements StudyViewRepository {

private static final List<String> FILTERED_CLINICAL_ATTR_VALUES = List.of("NA", "NAN", "N/A");
private static final List<String> FILTERED_CLINICAL_ATTR_VALUES = Collections.emptyList();
private final StudyViewMapper mapper;

@Autowired
Expand Down Expand Up @@ -59,7 +60,7 @@ public List<ClinicalDataCount> getPatientClinicalDataCounts(StudyViewFilter stud

@Override
public List<String> getClinicalDataAttributeNames(ClinicalAttributeDataSource clinicalAttributeDataSource, ClinicalAttributeDataType dataType) {
String tableName = clinicalAttributeDataSource.getValue().toLowerCase() + "_clinical_attribute_" + dataType.getValue().toLowerCase() + "_view";
String tableName = clinicalAttributeDataSource.getValue().toLowerCase() + "_clinical_attribute_" + dataType.getValue().toLowerCase() + "_mv";
return mapper.getClinicalAttributeNames(tableName);
}

Expand Down
46 changes: 46 additions & 0 deletions src/main/resources/db-scripts/clickhouse/derived_tables.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
-- clinical_data_derived
DROP TABLE IF EXISTS clinical_data_derived
CREATE TABLE IF NOT EXISTS clinical_data_derived
(
sample_unique_id String,
patient_unique_id String,
attribute_name String,
attribute_value String,
cancer_study_identifier String,
type String
)
ENGINE=MergeTree
ORDER BY sample_unique_id

-- Insert sample attribute data
INSERT INTO TABLE clinical_data_derived
SELECT sm.sample_unique_id AS sample_unique_id,
sm.patient_unique_id AS patient_unique_id,
cam.attr_id AS attribute_name,
csamp.attr_value AS attribute_value,
cs.cancer_study_identifier AS cancer_study_identifier,
'sample' AS type
FROM sling_db_2024_05_23_original.sample_mv AS sm
INNER JOIN sling_db_2024_05_23_original.cancer_study AS cs
ON sm.cancer_study_identifier = cs.cancer_study_identifier
FULL OUTER JOIN sling_db_2024_05_23_original.clinical_attribute_meta AS cam
ON cs.cancer_study_id = cam.cancer_study_id
FULL OUTER JOIN sling_db_2024_05_23_original.clinical_sample AS csamp
ON (sm.internal_id = csamp.internal_id) AND (csamp.attr_id = cam.attr_id)
WHERE cam.patient_attribute = 0

-- INSERT patient attribute data
INSERT INTO TABLE clinical_data_derived
SELECT NULL AS sample_unique_id,
concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id,
cam.attr_id AS attribute_name,
clinpat.attr_value AS attribute_value,
cs.cancer_study_identifier AS cancer_study_identifier,
'patient' AS type
FROM sling_db_2024_05_23_original.patient AS p
INNER JOIN sling_db_2024_05_23_original.cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id
FULL OUTER JOIN sling_db_2024_05_23_original.clinical_attribute_meta AS cam
ON cs.cancer_study_id = cam.cancer_study_id
FULL OUTER JOIN sling_db_2024_05_23_original.clinical_patient AS clinpat
ON (p.internal_id = clinpat.internal_id) AND (clinpat.attr_id = cam.attr_id)
WHERE cam.patient_attribute = 1
100 changes: 83 additions & 17 deletions src/main/resources/db-scripts/clickhouse/materialized_views.sql
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
DROP TABLE IF EXISTS sample_clinical_attribute_numeric;
DROP TABLE IF EXISTS sample_clinical_attribute_categorical;
DROP TABLE IF EXISTS patient_clinical_attribute_numeric;
DROP TABLE IF EXISTS patient_clinical_attribute_categorical;
DROP TABLE IF EXISTS sample_columnstore;
DROP TABLE IF EXISTS sample_list_columnstore;
DROP TABLE IF EXISTS genomic_event;
DROP VIEW IF EXISTS sample_clinical_attribute_numeric_mv;
DROP VIEW IF EXISTS sample_clinical_attribute_categorical_mv;
DROP VIEW IF EXISTS patient_clinical_attribute_numeric_mv;
DROP VIEW IF EXISTS patient_clinical_attribute_categorical_mv;
DROP VIEW IF EXISTS sample_columnstore_mv;
DROP VIEW IF EXISTS sample_list_columnstore_mv;

Expand Down Expand Up @@ -135,17 +127,91 @@ FROM cancer_study cs
INNER JOIN clinical_patient cp on p.internal_id = cp.internal_id
WHERE NOT match(cp.attr_value, '^[\d\.]+$');

--patient_clinical_attribute_categorical_mv
DROP VIEW IF EXISTS patient_clinical_attribute_categorical_mv;
CREATE MATERIALIZED VIEW patient_clinical_attribute_categorical_mv
TO patient_clinical_attribute_categorical AS
SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
cp.attr_id as attribute_name,
cp.attr_value as attribute_value,
cs.cancer_study_identifier as cancer_study_identifier
FROM cancer_study cs
INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id
INNER JOIN clinical_patient cp on p.internal_id = cp.internal_id
WHERE NOT match(cp.attr_value, '^[\d\.]+$');
ENGINE = MergeTree()
ORDER BY cancer_study_identifier
SETTINGS allow_nullable_key = 1
POPULATE
AS
SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id,
cp.attr_id AS attribute_name,
cp.attr_value AS attribute_value,
cs.cancer_study_identifier AS cancer_study_identifier
FROM clinical_patient AS cp
INNER JOIN patient AS p ON cp.internal_id = p.internal_id
INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id
INNER JOIN clinical_attribute_meta AS cam
ON (cp.attr_id = cam.attr_id) AND (cs.cancer_study_id = cam.cancer_study_id)
WHERE cam.datatype = 'STRING'


--patient_clinical_attribute_numeric_mv
DROP VIEW IF EXISTS patient_clinical_attribute_numeric_mv;
CREATE MATERIALIZED VIEW patient_clinical_attribute_numeric_mv
ENGINE = MergeTree()
ORDER BY cancer_study_identifier
SETTINGS allow_nullable_key = 1
POPULATE
AS
SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id,
cp.attr_id AS attribute_name,
cp.attr_value AS attribute_value,
cs.cancer_study_identifier AS cancer_study_identifier
FROM sling_db_2024_05_23_original.clinical_patient AS cp
INNER JOIN sling_db_2024_05_23_original.patient AS p ON cp.internal_id = p.internal_id
INNER JOIN sling_db_2024_05_23_original.cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id
INNER JOIN sling_db_2024_05_23_original.clinical_attribute_meta AS cam
ON (cp.attr_id = cam.attr_id) AND (cs.cancer_study_id = cam.cancer_study_id)
WHERE cam.datatype = 'NUMBER'

-- sample_clinical_attribute_categorical_mv
DROP VIEW IF EXISTS sample_clinical_attribute_categorical_mv;
CREATE MATERIALIZED VIEW sample_clinical_attribute_categorical_mv
ENGINE = MergeTree()
ORDER BY cancer_study_identifier
SETTINGS allow_nullable_key = 1
POPULATE
AS
SELECT s.sample_unique_id,
s.patient_unique_id,
csamp.attr_id AS attribute_name,
csamp.attr_value AS attribute_value,
s.cancer_study_identifier AS cancer_study_identifier
FROM sling_db_2024_05_23_original.clinical_sample AS csamp
INNER JOIN sling_db_2024_05_23_original.sample_mv AS s ON csamp.internal_id = s.internal_id
INNER JOIN sling_db_2024_05_23_original.cancer_study AS cs
ON s.cancer_study_identifier = cs.cancer_study_identifier
INNER JOIN sling_db_2024_05_23_original.clinical_attribute_meta AS cam
ON (csamp.attr_id = cam.attr_id) AND (cs.cancer_study_id = cam.cancer_study_id)
WHERE cam.datatype = 'STRING'

-- sample_clinical_attribute_numeric_mv
DROP VIEW IF EXISTS sample_clinical_attribute_numeric_mv;
CREATE MATERIALIZED VIEW sample_clinical_attribute_numeric_mv
ENGINE = MergeTree()
ORDER BY cancer_study_identifier
SETTINGS allow_nullable_key = 1
POPULATE
AS
SELECT s.sample_unique_id,
s.patient_unique_id,
csamp.attr_id AS attribute_name,
csamp.attr_value AS attribute_value,
s.cancer_study_identifier AS cancer_study_identifier
FROM clinical_sample AS csamp
INNER JOIN sample_mv AS s ON csamp.internal_id = s.internal_id
INNER JOIN cancer_study AS cs
ON s.cancer_study_identifier = cs.cancer_study_identifier
INNER JOIN clinical_attribute_meta AS cam
ON (csamp.attr_id = cam.attr_id) AND (cs.cancer_study_id = cam.cancer_study_id)
WHERE cam.datatype = 'NUMBER'




-- sample_columnstore
CREATE TABLE IF NOT EXISTS sample_columnstore
(
sample_unique_id VARCHAR(45),
Expand Down
51 changes: 0 additions & 51 deletions src/main/resources/db-scripts/clickhouse/views.sql
Original file line number Diff line number Diff line change
@@ -1,57 +1,6 @@
DROP VIEW IF EXISTS sample_clinical_attribute_numeric_view;
DROP VIEW IF EXISTS sample_clinical_attribute_categorical_view;
DROP VIEW IF EXISTS patient_clinical_attribute_numeric_view;
DROP VIEW IF EXISTS patient_clinical_attribute_categorical_view;
DROP VIEW IF EXISTS sample_view;
DROP VIEW IF EXISTS sample_list_view;

CREATE VIEW sample_clinical_attribute_numeric_view
AS
SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id,
concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
clinical_sample.attr_id as attribute_name,
cast(clinical_sample.attr_value as float) as attribute_value,
cs.cancer_study_identifier as cancer_study_identifier
FROM cancer_study cs
INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id
INNER JOIN sample s on p.internal_id = s.patient_id
INNER JOIN clinical_sample ON s.internal_id = clinical_sample.internal_id
WHERE match(clinical_sample.attr_value, '^[\d\.]+$');

CREATE VIEW sample_clinical_attribute_categorical_view
AS
SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id,
concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
cl.attr_id as attribute_name,
cl.attr_value as attribute_value,
cs.cancer_study_identifier as cancer_study_identifier
FROM cancer_study cs
INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id
INNER JOIN sample s on p.internal_id = s.patient_id
INNER JOIN clinical_sample cl on s.internal_id = cl.internal_id
WHERE NOT match(cl.attr_value, '^[\d\.]+$');

CREATE VIEW patient_clinical_attribute_numeric_view
AS
SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
cp.attr_id as attribute_name,
cast(cp.attr_value as float) as attribute_value,
cs.cancer_study_identifier as cancer_study_identifier
FROM cancer_study cs
INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id
INNER JOIN clinical_patient cp on p.internal_id = cp.internal_id
WHERE match(cp.attr_value, '^[\d\.]+$');

CREATE VIEW patient_clinical_attribute_categorical_view
AS
SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
cp.attr_id as attribute_name,
cp.attr_value as attribute_value,
cs.cancer_study_identifier as cancer_study_identifier
FROM cancer_study cs
INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id
INNER JOIN clinical_patient cp on p.internal_id = cp.internal_id
WHERE NOT match(cp.attr_value, '^[\d\.]+$');

CREATE VIEW sample_view
AS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,17 @@
<foreach item="clinicalDataFilter" collection="categorizedClinicalDataCountFilter.getSampleNumericalClinicalDataFilters()" open="INTERSECT" separator="INTERSECT">
<include refid="numericalClinicalDataCountFilter">
<property name="unique_id" value="sample_unique_id"/>
<property name="table_name" value="sample_clinical_attribute_numeric_view"/>
<property name="table_name" value="sample_clinical_attribute_numeric_mv"/>
<property name="type" value="sample"/>
</include>
</foreach>
</if>
<if test="categorizedClinicalDataCountFilter.getSampleCategoricalClinicalDataFilters() != null and !categorizedClinicalDataCountFilter.getSampleCategoricalClinicalDataFilters().isEmpty()">
<foreach item="clinicalDataFilter" collection="categorizedClinicalDataCountFilter.getSampleCategoricalClinicalDataFilters()" open="INTERSECT" separator="INTERSECT">
<include refid="categoricalClinicalDataCountFilter">
<property name="unique_id" value="sample_unique_id"/>
<property name="table_name" value="sample_clinical_attribute_categorical_view"/>
<property name="table_name" value="clinical_data_derived"/>
<property name="type" value="sample"/>
</include>
</foreach>
</if>
Expand All @@ -68,15 +70,17 @@
<foreach item="clinicalDataFilter" collection="categorizedClinicalDataCountFilter.getPatientNumericalClinicalDataFilters()" open="INTERSECT" separator="INTERSECT">
<include refid="numericalClinicalDataCountFilter">
<property name="unique_id" value="patient_unique_id"/>
<property name="table_name" value="patient_clinical_attribute_numeric_view"/>
<property name="table_name" value="patient_clinical_attribute_numeric_mv"/>
<property name="type" value="patient"/>
</include>
</foreach>
</if>
<if test="categorizedClinicalDataCountFilter.getPatientCategoricalClinicalDataFilters() != null and !categorizedClinicalDataCountFilter.getPatientCategoricalClinicalDataFilters().isEmpty()">
<foreach item="clinicalDataFilter" collection="categorizedClinicalDataCountFilter.getPatientCategoricalClinicalDataFilters()" open="INTERSECT" separator="INTERSECT">
<include refid="categoricalClinicalDataCountFilter">
<property name="unique_id" value="patient_unique_id"/>
<property name="table_name" value="patient_clinical_attribute_categorical_view"/>
<property name="table_name" value="clinical_data_derived"/>
<property name="type" value="patient"/>
</include>
</foreach>
</if>
Expand Down Expand Up @@ -112,7 +116,8 @@
<sql id="categoricalClinicalDataCountFilter">
SELECT ${unique_id}
FROM ${table_name}
WHERE attribute_name = '${clinicalDataFilter.attributeId}'
WHERE attribute_name = '${clinicalDataFilter.attributeId}' AND
type='${type}'
<foreach item="dataFilterValue" collection="clinicalDataFilter.values" open=" AND ((" separator=") OR (" close="))">
<trim prefix="" prefixOverrides="AND">
AND attribute_value = '${dataFilterValue.value}'
Expand Down
Loading

0 comments on commit 1051dea

Please sign in to comment.