Skip to content

Commit

Permalink
Merge branch 'develop' into feature/gcp
Browse files Browse the repository at this point in the history
  • Loading branch information
jcampbell authored Nov 6, 2019
2 parents 94b456d + 2844644 commit 8bf50a0
Show file tree
Hide file tree
Showing 6 changed files with 228 additions and 28 deletions.
52 changes: 28 additions & 24 deletions great_expectations/data_context/store/store_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,11 +151,15 @@ def __init__(
filepath_template,
key_length,
root_directory,
forbidden_substrings=["/", "\\"],
forbidden_substrings=None,
platform_specific_separator=True
):
assert isinstance(key_length, int)
self.key_length = key_length
if forbidden_substrings is None:
forbidden_substrings = ["/", "\\"]
self.forbidden_substrings = forbidden_substrings
self.platform_specific_separator = platform_specific_separator

self.filepath_template = filepath_template
self.verify_that_key_to_filepath_operation_is_reversible()
Expand All @@ -182,28 +186,34 @@ def _validate_value(self, value):
type(value),
))


def _convert_key_to_filepath(self, key):
# NOTE: At some point in the future, it might be better to replace this logic with os.path.join.
# That seems more correct, but the configs will be a lot less intuitive.
# In the meantime, there is some chance that configs will not be cross-OS compatible.

# NOTE : These methods support fixed-length keys, but not variable.
self._validate_key(key)

converted_string = self.filepath_template.format(*list(key))

if self.platform_specific_separator:
converted_string = os.path.join(*converted_string.split('/'))
return converted_string

def _convert_filepath_to_key(self, filepath):
# filepath_template (for now) is always specified with forward slashes, but it is then
# used to (1) dynamically construct and evaluate a regex, and (2) split the provided (observed) filepath
if self.platform_specific_separator:
filepath_template = os.path.join(*self.filepath_template.split('/'))
filepath_template = filepath_template.replace('\\', '\\\\')
else:
filepath_template = self.filepath_template

# Convert the template to a regex
indexed_string_substitutions = re.findall("\{\d+\}", self.filepath_template)
indexed_string_substitutions = re.findall(r"{\d+}", filepath_template)
tuple_index_list = ["(?P<tuple_index_{0}>.*)".format(i, ) for i in range(len(indexed_string_substitutions))]
intermediate_filepath_regex = re.sub(
"\{\d+\}",
r"{\d+}",
lambda m, r=iter(tuple_index_list): next(r),
self.filepath_template
filepath_template
)
filepath_regex = intermediate_filepath_regex.format(*tuple_index_list)

Expand All @@ -212,8 +222,7 @@ def _convert_filepath_to_key(self, filepath):
if matches is None:
return None

#Map key elements into the appropriate parts of the tuple
# TODO: A common configuration error is for the key length to not match the number of elements in the filepath_template. We should trap this error and add a more informative message.
# Map key elements into the appropriate parts of the tuple
new_key = list([None for element in range(self.key_length)])
for i in range(len(tuple_index_list)):
tuple_index = int(re.search('\d+', indexed_string_substitutions[i]).group(0))
Expand All @@ -224,12 +233,6 @@ def _convert_filepath_to_key(self, filepath):
return new_key

def verify_that_key_to_filepath_operation_is_reversible(self):
# NOTE: There's actually a fairly complicated problem here, similar to magic autocomplete for dataAssetNames.
# "Under what conditions does an incomplete key tuple fully specify an object within the GE namespace?"
# This doesn't just depend on the structure of keys.
# It also depends on uniqueness of combinations of named elements within the namespace tree.
# For now, I do the blunt thing and force filepaths to fully specify keys.

def get_random_hex(len=4):
return "".join([random.choice(list("ABCDEF0123456789")) for i in range(len)])

Expand All @@ -243,10 +246,6 @@ def get_random_hex(len=4):
self.__class__.__name__,
self.key_length,
))
# raise AssertionError("Cannot reverse key conversion in {}\nThis is most likely a problem with your filepath_template:\n\t{}".format(
# self.__class__.__name__,
# self.filepath_template
# ))


class FixedLengthTupleFilesystemStoreBackend(FixedLengthTupleStoreBackend):
Expand All @@ -264,13 +263,15 @@ def __init__(
filepath_template,
key_length,
root_directory,
forbidden_substrings=["/", "\\"],
forbidden_substrings=None,
platform_specific_separator=True
):
super(FixedLengthTupleFilesystemStoreBackend, self).__init__(
root_directory=root_directory,
filepath_template=filepath_template,
key_length=key_length,
forbidden_substrings=forbidden_substrings,
platform_specific_separator=platform_specific_separator
)

self.base_directory = base_directory
Expand Down Expand Up @@ -353,18 +354,19 @@ def __init__(
key_length,
bucket,
prefix="",
forbidden_substrings=["/", "\\"],
forbidden_substrings=None,
platform_specific_separator=False
):
super(FixedLengthTupleS3StoreBackend, self).__init__(
root_directory=root_directory,
filepath_template=filepath_template,
key_length=key_length,
forbidden_substrings=forbidden_substrings,
platform_specific_separator=platform_specific_separator
)
self.bucket = bucket
self.prefix = prefix


def _get(self, key):
s3_object_key = os.path.join(
self.prefix,
Expand Down Expand Up @@ -431,13 +433,15 @@ def __init__(
bucket,
prefix,
project,
forbidden_substrings=["/", "\\"],
forbidden_substrings=None,
platform_specific_separator=False
):
super(FixedLengthTupleGCSStoreBackend, self).__init__(
root_directory=root_directory,
filepath_template=filepath_template,
key_length=key_length,
forbidden_substrings=forbidden_substrings,
platform_specific_separator=platform_specific_separator
)
self.bucket = bucket
self.prefix = prefix
Expand Down Expand Up @@ -492,4 +496,4 @@ def has_key(self, key):
assert isinstance(key, string_types)

all_keys = self.list_keys()
return key in all_keys
return key in all_keys
54 changes: 53 additions & 1 deletion great_expectations/render/renderer/page_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,14 @@ def render(self, validation_results={}):
overview_content_blocks = [
self._render_validation_header(),
self._render_validation_info(validation_results=validation_results),
self._render_validation_statistics(validation_results=validation_results)
self._render_validation_statistics(validation_results=validation_results),
]

if validation_results["meta"].get("batch_kwargs"):
overview_content_blocks.insert(
2,
self._render_batch_kwargs(validation_results=validation_results)
)

if "data_asset_name" in validation_results["meta"] and validation_results["meta"]["data_asset_name"]:
data_asset_name = short_data_asset_name
Expand Down Expand Up @@ -135,6 +141,52 @@ def _render_validation_info(cls, validation_results):
},
})

@classmethod
def _render_batch_kwargs(cls, validation_results):
batch_kwargs = validation_results["meta"].get("batch_kwargs")
table_rows = [
[
kwarg,
{
"content_block_type": "string_template",
"string_template": {
"template": "$value",
"params": {
"value": str(value)
},
"styling": {
"default": {
"styles": {
"word-break": "break-all"
}
},
}
},
"styling": {
"parent": {
"classes": ["pl-3"],
}
}
}
] for kwarg, value in batch_kwargs.items()
]
table_rows.sort(key=lambda row: row[0])

return RenderedComponentContent(**{
"content_block_type": "table",
"header": "Batch Kwargs",
"table": table_rows,
"styling": {
"classes": ["col-12", "table-responsive"],
"styles": {
"margin-top": "20px"
},
"body": {
"classes": ["table", "table-sm"]
}
},
})

@classmethod
def _render_validation_statistics(cls, validation_results):
statistics = validation_results["statistics"]
Expand Down
8 changes: 7 additions & 1 deletion tests/render/fixtures/BasicDatasetProfiler_evrs.json
Original file line number Diff line number Diff line change
Expand Up @@ -2166,6 +2166,12 @@
"great_expectations.__version__": "__fixture__",
"data_asset_name": null,
"expectation_suite_name": "default",
"run_id": "__run_id_fixture__"
"run_id": "__run_id_fixture__",
"batch_kwargs": {
"path": "project_dir/project_path/data/titanic/Titanic.csv",
"partition_id": "Titanic",
"sep": null,
"engine": "python"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2026,6 +2026,12 @@
"great_expectations.__version__": "__fixture__",
"data_asset_name": null,
"expectation_suite_name": "default",
"run_id": "__run_id_fixture__"
"run_id": "__run_id_fixture__",
"batch_kwargs": {
"path": "project_dir/project_path/data/titanic/Titanic.csv",
"partition_id": "Titanic",
"sep": null,
"engine": "python"
}
}
}
132 changes: 132 additions & 0 deletions tests/render/test_page_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,3 +240,135 @@ def test_ValidationResultsPageRenderer_render_validation_statistics(titanic_prof

assert validation_statistics == expected_validation_statistics


def test_ValidationResultsPageRenderer_render_batch_kwargs(titanic_profiled_evrs_1):
batch_kwargs_table = ValidationResultsPageRenderer._render_batch_kwargs(titanic_profiled_evrs_1)
print(json.dumps(batch_kwargs_table, indent=2))

expected_batch_kwarg_table = {
"content_block_type": "table",
"header": "Batch Kwargs",
"table": [
[
"engine",
{
"content_block_type": "string_template",
"string_template": {
"template": "$value",
"params": {
"value": "python"
},
"styling": {
"default": {
"styles": {
"word-break": "break-all"
}
}
}
},
"styling": {
"parent": {
"classes": [
"pl-3"
]
}
}
}
],
[
"partition_id",
{
"content_block_type": "string_template",
"string_template": {
"template": "$value",
"params": {
"value": "Titanic"
},
"styling": {
"default": {
"styles": {
"word-break": "break-all"
}
}
}
},
"styling": {
"parent": {
"classes": [
"pl-3"
]
}
}
}
],
[
"path",
{
"content_block_type": "string_template",
"string_template": {
"template": "$value",
"params": {
"value": "project_dir/project_path/data/titanic/Titanic.csv"
},
"styling": {
"default": {
"styles": {
"word-break": "break-all"
}
}
}
},
"styling": {
"parent": {
"classes": [
"pl-3"
]
}
}
}
],
[
"sep",
{
"content_block_type": "string_template",
"string_template": {
"template": "$value",
"params": {
"value": "None"
},
"styling": {
"default": {
"styles": {
"word-break": "break-all"
}
}
}
},
"styling": {
"parent": {
"classes": [
"pl-3"
]
}
}
}
]
],
"styling": {
"classes": [
"col-12",
"table-responsive"
],
"styles": {
"margin-top": "20px"
},
"body": {
"classes": [
"table",
"table-sm"
]
}
}
}

assert batch_kwargs_table == expected_batch_kwarg_table
2 changes: 1 addition & 1 deletion tests/store/test_store_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_FilesystemStoreBackend_two_way_string_conversion(tmp_path_factory):
root_directory=os.path.abspath(path),
base_directory=project_path,
key_length=3,
filepath_template= "{0}/{1}/{2}/foo-{2}-expectations.txt",
filepath_template="{0}/{1}/{2}/foo-{2}-expectations.txt",
)

tuple_ = ("A__a", "B-b", "C")
Expand Down

0 comments on commit 8bf50a0

Please sign in to comment.