Skip to content

Commit

Permalink
add integration test for classification in snowflake
Browse files Browse the repository at this point in the history
  • Loading branch information
mayurinehate committed Nov 8, 2022
1 parent df14717 commit 93a64ca
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -109,23 +109,36 @@ def classify_schema_fields(
) -> None:

assert self.config.classification
column_infos: List[ColumnInfo] = [
ColumnInfo(
metadata=Metadata(
{
"Name": field.fieldPath,
"Description": field.description,
"DataType": field.nativeDataType,
"Dataset_Name": dataset_name,
}
),
values=sample_data[field.fieldPath].values,
column_infos: List[ColumnInfo] = []

for field in schema_metadata.fields:
if not self.is_classification_enabled_for_column(
dataset_name, field.fieldPath
):
self.logger.debug(
f"Skipping column {dataset_name}.{field.fieldPath} from classification"
)
continue
column_infos.append(
ColumnInfo(
metadata=Metadata(
{
"Name": field.fieldPath,
"Description": field.description,
"DataType": field.nativeDataType,
"Dataset_Name": dataset_name,
}
),
values=sample_data[field.fieldPath].values
if field.fieldPath in sample_data.columns
else [],
)
)
for field in schema_metadata.fields
if self.is_classification_enabled_for_column(dataset_name, field.fieldPath)
]

if not column_infos:
self.logger.debug(
f"No columns in {dataset_name} considered for classification"
)
return None

field_terms = {}
Expand Down
Loading

0 comments on commit 93a64ca

Please sign in to comment.