-
Notifications
You must be signed in to change notification settings - Fork 44
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Harden validator for partially implemented APIs #1181
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,7 +12,7 @@ class that can be pointed at an OPTIMADE implementation and validated | |
import random | ||
import urllib.parse | ||
import dataclasses | ||
from typing import Union, Tuple, Any, List, Dict, Optional | ||
from typing import Union, Tuple, Any, List, Dict, Optional, Set | ||
|
||
try: | ||
import simplejson as json | ||
|
@@ -70,7 +70,7 @@ def __init__( # pylint: disable=too-many-arguments | |
base_url: str = None, | ||
verbosity: int = 0, | ||
respond_json: bool = False, | ||
page_limit: int = 5, | ||
page_limit: int = 4, | ||
max_retries: int = 5, | ||
run_optional_tests: bool = True, | ||
fail_fast: bool = False, | ||
|
@@ -380,6 +380,7 @@ def _recurse_through_endpoint(self, endp: str) -> Tuple[bool, str]: | |
prop_list = list(_impl_properties.keys()) | ||
|
||
self._check_response_fields(endp, prop_list) | ||
|
||
chosen_entry, _ = self._get_archetypal_entry(endp, prop_list) | ||
|
||
if not chosen_entry: | ||
|
@@ -524,19 +525,16 @@ def _get_archetypal_entry( | |
if data_returned < 1: | ||
return ( | ||
None, | ||
"Endpoint {endp!r} returned no entries, cannot get archetypal entry.", | ||
"Endpoint {endp!r} returned no entries, cannot get archetypal entry or test filtering.", | ||
) | ||
|
||
response, message = self._get_endpoint( | ||
f"{endp}?page_offset={random.randint(0, data_returned-1)}&response_fields={','.join(properties)}", | ||
multistage=True, | ||
archetypal_entry = response.json()["data"][ | ||
random.randint(0, data_returned - 1) | ||
] | ||
return ( | ||
archetypal_entry, | ||
f"set archetypal entry for {endp} with ID {archetypal_entry['id']}.", | ||
) | ||
if response: | ||
archetypal_entry = response.json()["data"][0] | ||
return ( | ||
archetypal_entry, | ||
f"set archetypal entry for {endp} with ID {archetypal_entry['id']}.", | ||
) | ||
|
||
raise ResponseError(f"Failed to get archetypal entry. Details: {message}") | ||
|
||
|
@@ -557,7 +555,7 @@ def _check_response_fields(self, endp: str, fields: List[str]) -> Tuple[bool, st | |
test_query = f"{endp}?response_fields={','.join(subset_fields)}&page_limit=1" | ||
response, _ = self._get_endpoint(test_query, multistage=True) | ||
|
||
if response and len(response.json()["data"]) == 1: | ||
if response and len(response.json()["data"]) >= 0: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was previously allowing databases that did not support |
||
doc = response.json()["data"][0] | ||
expected_fields = set(subset_fields) | ||
expected_fields -= CONF.top_level_non_attribute_fields | ||
|
@@ -825,8 +823,48 @@ def _construct_single_property_filters( | |
else: | ||
num_data_returned[operator] = response["meta"].get("data_returned") | ||
|
||
if prop in CONF.unique_properties and operator == "=": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code block was just moved up from below the reversed query tests, with this line being the only change (previously it assumed "=" was already in the dict). |
||
if num_data_returned["="] is not None and num_data_returned["="] == 0: | ||
raise ResponseError( | ||
f"Unable to filter field 'id' for equality, no data was returned for {query}." | ||
) | ||
if num_data_returned["="] is not None and num_data_returned["="] > 1: | ||
raise ResponseError( | ||
f"Filter for an individual 'id' returned {num_data_returned['=']} results, when only 1 was expected." | ||
) | ||
|
||
num_response = num_data_returned[operator] | ||
|
||
excluded = operator in exclusive_operators | ||
# if we have all results on this page, check that the blessed ID is in the response | ||
# if not response["meta"]["more_data_available"]: | ||
if excluded and ( | ||
chosen_entry["id"] in set(entry["id"] for entry in response["data"]) | ||
): | ||
raise ResponseError( | ||
f"Entry {chosen_entry['id']} with value {prop!r}: {test_value} was not excluded by {query!r}" | ||
) | ||
|
||
# check that at least the archetypal structure was returned, unless we are using a fallback value | ||
if not excluded and not using_fallback: | ||
if ( | ||
num_data_returned[operator] is not None | ||
and num_data_returned[operator] < 1 | ||
): | ||
raise ResponseError( | ||
f"Supposedly inclusive query {query!r} did not include original entry ID {chosen_entry['id']!r} " | ||
f"(with field {prop!r} = {test_value}) potentially indicating a problem with filtering on this field." | ||
) | ||
|
||
# check that the filter returned no entries that had a null or missing value for the filtered property | ||
if any( | ||
entry["attributes"].get(prop, entry.get(prop, None)) is None | ||
for entry in response.get("data", []) | ||
): | ||
raise ResponseError( | ||
f"Filter {query!r} on field {prop!r} returned entries that had null or missing values for the field." | ||
) | ||
|
||
# Numeric and string comparisons must work both ways... | ||
if prop_type in ( | ||
DataType.STRING, | ||
|
@@ -905,46 +943,6 @@ def _construct_single_property_filters( | |
f"Filter {reversed_query!r} on field {prop!r} returned entries that had null or missing values for the field." | ||
) | ||
|
||
excluded = operator in exclusive_operators | ||
# if we have all results on this page, check that the blessed ID is in the response | ||
if not response["meta"]["more_data_available"]: | ||
if excluded and ( | ||
chosen_entry["id"] in set(entry["id"] for entry in response["data"]) | ||
): | ||
raise ResponseError( | ||
f"Object {chosen_entry['id']} with value {test_value} was not excluded by {query}" | ||
) | ||
|
||
# check that at least the archetypal structure was returned, unless we are using a fallback value | ||
if not excluded and not using_fallback: | ||
if ( | ||
num_data_returned[operator] is not None | ||
and num_data_returned[operator] < 1 | ||
): | ||
raise ResponseError( | ||
f"Supposedly inclusive query '{query}' did not include original object ID {chosen_entry['id']} " | ||
f"(with field '{prop} = {test_value}') potentially indicating a problem with filtering on this field." | ||
) | ||
|
||
# check that the filter returned no entries that had a null or missing value for the filtered property | ||
if any( | ||
entry["attributes"].get(prop, entry.get(prop, None)) is None | ||
for entry in response.get("data", []) | ||
): | ||
raise ResponseError( | ||
f"Filter {query!r} on field {prop!r} returned entries that had null or missing values for the field." | ||
) | ||
|
||
if prop in CONF.unique_properties: | ||
if num_data_returned["="] is not None and num_data_returned["="] == 0: | ||
raise ResponseError( | ||
f"Unable to filter field 'id' for equality, no data was returned for {query}." | ||
) | ||
if num_data_returned["="] is not None and num_data_returned["="] > 1: | ||
raise ResponseError( | ||
f"Filter for an individual 'id' returned {num_data_returned['=']} results, when only 1 was expected." | ||
) | ||
|
||
return True, f"{prop} passed filter tests" | ||
|
||
def _test_info_or_links_endpoint(self, request_str: str) -> Union[bool, dict]: | ||
|
@@ -1237,7 +1235,10 @@ def _test_bad_version_returns_553(self) -> None: | |
|
||
@test_case | ||
def _test_page_limit( | ||
self, response: requests.models.Response, check_next_link: int = 5 | ||
self, | ||
response: requests.models.Response, | ||
check_next_link: int = 5, | ||
previous_links: Optional[Set[str]] = None, | ||
) -> Tuple[Optional[bool], str]: | ||
"""Test that a multi-entry endpoint obeys the page limit by | ||
following pagination links up to a depth of `check_next_link`. | ||
|
@@ -1247,12 +1248,16 @@ def _test_page_limit( | |
compliance. | ||
check_next_link: Maximum recursion depth for following | ||
pagination links. | ||
previous_links: A set of previous links that will be used | ||
to check that the `next` link is actually new. | ||
|
||
Returns: | ||
`True` if the test was successful and `None` if not, with a | ||
string summary. | ||
|
||
""" | ||
if previous_links is None: | ||
previous_links = set() | ||
try: | ||
response = response.json() | ||
except (AttributeError, json.JSONDecodeError): | ||
|
@@ -1285,6 +1290,12 @@ def _test_page_limit( | |
"Endpoint suggested more data was available but provided no valid links->next link." | ||
) | ||
|
||
if next_link in previous_links: | ||
raise ResponseError( | ||
f"The next link {next_link} has been provided already for a previous page." | ||
) | ||
previous_links.add(next_link) | ||
|
||
Comment on lines
+1293
to
+1298
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously, it was valid to return the same |
||
if not isinstance(next_link, str): | ||
raise ResponseError( | ||
f"Unable to parse links->next {next_link!r} as a link." | ||
|
@@ -1302,6 +1313,7 @@ def _test_page_limit( | |
next_response, | ||
check_next_link=check_next_link, | ||
multistage=check_next_link, | ||
previous_links=previous_links, | ||
) | ||
|
||
return ( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change removes the need for
page_offset
to be implemented by the API, and instead just chooses a random result from the first page of the API results to use for the filter tests.