Skip to content

Commit

Permalink
Input validation
Browse files Browse the repository at this point in the history
  • Loading branch information
fuzhaoyuan committed Dec 23, 2024
1 parent ca03fa7 commit e0da855
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,12 @@ public boolean isCategoricalClinicalDataFilter(ClinicalDataFilter clinicalDataFi
* Merge the range of numerical bins in DataFilters to reduce the number of scans that runs on the database when filtering.
*/
public static <T extends DataFilter> List<T> mergeDataFilters(List<T> filters) {
boolean isNonNumericalOnly = true;
// this should throw error or move to all binning endpoints in the future for input validation
if (!areValidFilters(filters)) {
return filters;
}

boolean hasNumericalValue = false;
List<T> mergedDataFilters = new ArrayList<>();

for (T filter : filters) {
Expand All @@ -151,7 +156,7 @@ public static <T extends DataFilter> List<T> mergeDataFilters(List<T> filters) {
}
// merge adjacent numerical bins
else {
isNonNumericalOnly = false;
hasNumericalValue = true;
BigDecimal start = dataFilterValue.getStart();
BigDecimal end = dataFilterValue.getEnd();

Expand All @@ -161,16 +166,17 @@ public static <T extends DataFilter> List<T> mergeDataFilters(List<T> filters) {
}
else if (mergedEnd.equals(start)) {
mergedEnd = end;
} else {
mergedValues.add(new DataFilterValue(mergedStart, mergedEnd, null));
}
else {
mergedValues.add(new DataFilterValue(mergedStart, mergedEnd));
mergedStart = start;
mergedEnd = end;
}
}
}

if (!isNonNumericalOnly) {
mergedValues.add(new DataFilterValue(mergedStart, mergedEnd, null));
if (hasNumericalValue) {
mergedValues.add(new DataFilterValue(mergedStart, mergedEnd));
}
mergedValues.addAll(nonNumericalValues);
filter.setValues(mergedValues);
Expand All @@ -179,4 +185,55 @@ else if (mergedEnd.equals(start)) {

return mergedDataFilters;
}

public static <T extends DataFilter> boolean areValidFilters(List<T> filters) {
// check if filters are empty
if (filters == null || filters.isEmpty()) {
return false;
}

for (T filter : filters) {
// check if each filter has value
if (filter.getValues() == null || filter.getValues().isEmpty()) {
return false;
}

BigDecimal start = null;
BigDecimal end = null;
for (DataFilterValue dataFilterValue : filter.getValues()) {
// non-numerical value should not have numerical value
if (dataFilterValue.getValue() != null) {
if (dataFilterValue.getStart() != null || dataFilterValue.getEnd() != null) {
return false;
}
continue;
}
// check if start < end
if (dataFilterValue.getStart() != null && dataFilterValue.getEnd() != null) {
if (dataFilterValue.getStart().compareTo(dataFilterValue.getEnd()) >= 0) {
return false;
}
}
// check if start stays increasing
if (dataFilterValue.getStart() != null) {
if (start != null && start.compareTo(dataFilterValue.getStart()) >= 0) {
return false;
}
start = dataFilterValue.getStart();
// no overlapping is allowed
if (end != null && start.compareTo(end) < 0) {
return false;
}
}
// check if end stays increasing
if (dataFilterValue.getEnd() != null) {
if (end != null && end.compareTo(dataFilterValue.getEnd()) >= 0) {
return false;
}
end = dataFilterValue.getEnd();
}
}
}
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@ public class DataFilterValue implements Serializable {

public DataFilterValue() {}

public DataFilterValue(BigDecimal start, BigDecimal end, String value) {
public DataFilterValue(BigDecimal start, BigDecimal end) {
this.start = start;
this.end = end;
}

public DataFilterValue(String value) {
this.value = value;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ public class StudyViewFilterHelperTest {
public void testMergeDataFilterNumericalContinuousValues() {
List<GenomicDataFilter> genomicDataFilters = new ArrayList<>();
List<DataFilterValue> values = new ArrayList<>();
values.add(new DataFilterValue(BigDecimal.valueOf(-5), BigDecimal.valueOf(-1), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-1), BigDecimal.valueOf(3), null));
values.add(new DataFilterValue(BigDecimal.valueOf(3), BigDecimal.valueOf(7), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-5), BigDecimal.valueOf(-1)));
values.add(new DataFilterValue(BigDecimal.valueOf(-1), BigDecimal.valueOf(3)));
values.add(new DataFilterValue(BigDecimal.valueOf(3), BigDecimal.valueOf(7)));
genomicDataFilters.add(new GenomicDataFilter(null, null, values));

List<GenomicDataFilter> mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters);
Expand All @@ -36,10 +36,10 @@ public void testMergeDataFilterNumericalContinuousValues() {
public void testMergeDataFilterNumericalDiscontinuousValues() {
List<GenomicDataFilter> genomicDataFilters = new ArrayList<>();
List<DataFilterValue> values = new ArrayList<>();
values.add(new DataFilterValue(BigDecimal.valueOf(-2.5), BigDecimal.valueOf(-2.25), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-1.75), BigDecimal.valueOf(-1.5), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-1.5), BigDecimal.valueOf(-1.25), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-2.5), BigDecimal.valueOf(-2.25)));
values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2)));
values.add(new DataFilterValue(BigDecimal.valueOf(-1.75), BigDecimal.valueOf(-1.5)));
values.add(new DataFilterValue(BigDecimal.valueOf(-1.5), BigDecimal.valueOf(-1.25)));
genomicDataFilters.add(new GenomicDataFilter(null, null, values));

List<GenomicDataFilter> mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters);
Expand All @@ -60,9 +60,9 @@ public void testMergeDataFilterNumericalDiscontinuousValues() {
public void testMergeDataFilterNumericalInfiniteValues() {
List<GenomicDataFilter> genomicDataFilters = new ArrayList<>();
List<DataFilterValue> values = new ArrayList<>();
values.add(new DataFilterValue(null, BigDecimal.valueOf(-2.25), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-2), null, null));
values.add(new DataFilterValue(null, BigDecimal.valueOf(-2.25)));
values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2)));
values.add(new DataFilterValue(BigDecimal.valueOf(-2), null));
genomicDataFilters.add(new GenomicDataFilter(null, null, values));

List<GenomicDataFilter> mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters);
Expand All @@ -79,9 +79,9 @@ public void testMergeDataFilterNumericalInfiniteValues() {
public void testMergeDataFilterNumericalNonNumericalValues() {
List<GenomicDataFilter> genomicDataFilters = new ArrayList<>();
List<DataFilterValue> values = new ArrayList<>();
values.add(new DataFilterValue(BigDecimal.valueOf(-2.5), BigDecimal.valueOf(-2.25), null));
values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2), null));
values.add(new DataFilterValue(null, null, "NA"));
values.add(new DataFilterValue(BigDecimal.valueOf(-2.5), BigDecimal.valueOf(-2.25)));
values.add(new DataFilterValue(BigDecimal.valueOf(-2.25), BigDecimal.valueOf(-2)));
values.add(new DataFilterValue("NA"));
genomicDataFilters.add(new GenomicDataFilter(null, null, values));

List<GenomicDataFilter> mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters);
Expand All @@ -99,12 +99,91 @@ public void testMergeDataFilterNumericalNonNumericalValues() {
public void testMergeDataFilterNonNumericalOnlyValues() {
List<GenomicDataFilter> genomicDataFilters = new ArrayList<>();
List<DataFilterValue> values = new ArrayList<>();
values.add(new DataFilterValue(null, null, "NA"));
values.add(new DataFilterValue("NA"));
genomicDataFilters.add(new GenomicDataFilter(null, null, values));

List<GenomicDataFilter> mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters);
List<DataFilterValue> mergedDataFilterValues = mergedGenomicDataFilters.getFirst().getValues();
String value = mergedDataFilterValues.getFirst().getValue();
assertEquals("NA", value);
}

// invalid (2, 3, "NA"] -> unprocessed (2, 3, "NA"]
@Test
public void testMergeDataFilterNonNumericalValidation() {
List<GenomicDataFilter> genomicDataFilters = new ArrayList<>();
List<DataFilterValue> invalidValues = new ArrayList<>();
DataFilterValue invalidValue = new DataFilterValue("NA");
invalidValue.setStart(BigDecimal.valueOf(2));
invalidValue.setEnd(BigDecimal.valueOf(3));
invalidValues.add(invalidValue);
genomicDataFilters.add(new GenomicDataFilter(null, null, invalidValues));

List<GenomicDataFilter> mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters);
List<DataFilterValue> mergedDataFilterValues = mergedGenomicDataFilters.getFirst().getValues();
BigDecimal start = mergedDataFilterValues.getFirst().getStart();
BigDecimal end = mergedDataFilterValues.getFirst().getEnd();
String value = mergedDataFilterValues.getFirst().getValue();
assertEquals(0, BigDecimal.valueOf(2).compareTo(start));
assertEquals(0, BigDecimal.valueOf(3).compareTo(end));
assertEquals("NA", value);
}

// invalid (42, 6] -> unprocessed (42, 6]
@Test
public void testMergeDataFilterRangeValidation() {
List<GenomicDataFilter> genomicDataFilters = new ArrayList<>();
List<DataFilterValue> invalidValues = new ArrayList<>();
invalidValues.add(new DataFilterValue(BigDecimal.valueOf(42), BigDecimal.valueOf(6)));
genomicDataFilters.add(new GenomicDataFilter(null, null, invalidValues));

List<GenomicDataFilter> mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters);
List<DataFilterValue> mergedDataFilterValues = mergedGenomicDataFilters.getFirst().getValues();
BigDecimal start = mergedDataFilterValues.getFirst().getStart();
BigDecimal end = mergedDataFilterValues.getFirst().getEnd();
assertEquals(0, BigDecimal.valueOf(42).compareTo(start));
assertEquals(0, BigDecimal.valueOf(6).compareTo(end));
}

// invalid (3, 5], (1, 3] -> unprocessed (3, 5], (1, 3]
@Test
public void testMergeDataFilterContinuousValidation() {
List<GenomicDataFilter> genomicDataFilters = new ArrayList<>();
List<DataFilterValue> values = new ArrayList<>();
values.add(new DataFilterValue(BigDecimal.valueOf(3), BigDecimal.valueOf(5)));
values.add(new DataFilterValue(BigDecimal.valueOf(1), BigDecimal.valueOf(3)));
genomicDataFilters.add(new GenomicDataFilter(null, null, values));

List<GenomicDataFilter> mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters);
List<DataFilterValue> mergedDataFilterValues = mergedGenomicDataFilters.getFirst().getValues();
BigDecimal firstStart = mergedDataFilterValues.getFirst().getStart();
BigDecimal firstEnd = mergedDataFilterValues.getFirst().getEnd();
assertEquals(0, BigDecimal.valueOf(3).compareTo(firstStart));
assertEquals(0, BigDecimal.valueOf(5).compareTo(firstEnd));
BigDecimal secondStart = mergedDataFilterValues.get(1).getStart();
BigDecimal secondEnd = mergedDataFilterValues.get(1).getEnd();
assertEquals(0, BigDecimal.valueOf(1).compareTo(secondStart));
assertEquals(0, BigDecimal.valueOf(3).compareTo(secondEnd));
}

// invalid (3, 5], (2, 4] -> unprocessed (3, 5], (2, 4]
@Test
public void testMergeDataFilterNoOverlapValidation() {
List<GenomicDataFilter> genomicDataFilters = new ArrayList<>();
List<DataFilterValue> values = new ArrayList<>();
values.add(new DataFilterValue(BigDecimal.valueOf(3), BigDecimal.valueOf(5)));
values.add(new DataFilterValue(BigDecimal.valueOf(2), BigDecimal.valueOf(4)));
genomicDataFilters.add(new GenomicDataFilter(null, null, values));

List<GenomicDataFilter> mergedGenomicDataFilters = StudyViewFilterHelper.mergeDataFilters(genomicDataFilters);
List<DataFilterValue> mergedDataFilterValues = mergedGenomicDataFilters.getFirst().getValues();
BigDecimal firstStart = mergedDataFilterValues.getFirst().getStart();
BigDecimal firstEnd = mergedDataFilterValues.getFirst().getEnd();
assertEquals(0, BigDecimal.valueOf(3).compareTo(firstStart));
assertEquals(0, BigDecimal.valueOf(5).compareTo(firstEnd));
BigDecimal secondStart = mergedDataFilterValues.get(1).getStart();
BigDecimal secondEnd = mergedDataFilterValues.get(1).getEnd();
assertEquals(0, BigDecimal.valueOf(2).compareTo(secondStart));
assertEquals(0, BigDecimal.valueOf(4).compareTo(secondEnd));
}
}
1 change: 1 addition & 0 deletions src/test/resources/clickhouse_data.sql
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) val
insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values (4,'1,2,3,4,5,6,7,8,9,10,11,12,13,14,');
insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values (5,'2,');
insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values (11,'1,2,3,4,5,6,7,8,9,10,11,12,13,14,');
insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values (12,'1,2,3,4,5,6,7,8,9,10,11,12,13,14,');
insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values (14,'1,2,3,4,5,6,7,8,9,10,11,12,');
insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values (15,'1,2,3,4,');
insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values(10,'1,2,3,4,5,6,7,8,9,10,11,');
Expand Down

0 comments on commit e0da855

Please sign in to comment.