Skip to content

Commit

Permalink
Merge pull request #24 from reworkd/url-validation-improvements
Browse files Browse the repository at this point in the history
URL validation improvements
  • Loading branch information
snshn authored May 31, 2024
2 parents d1bc5ea + 9457639 commit b6a92ae
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 23 deletions.
6 changes: 5 additions & 1 deletion harambe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,11 @@ def __init__(
self._stage = stage
self._scraper = scraper
self._context = context or {}
self._validator = PydanticSchemaParser(schema) if (schema is not None and schema != {}) else None
self._validator = (
PydanticSchemaParser(schema)
if (schema is not None and schema != {})
else None
)
self._saved_data = set()

if not observer:
Expand Down
30 changes: 17 additions & 13 deletions harambe/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,22 @@ class PydanticSchemaParser(SchemaParser):

def __init__(self, schema: Schema):
self.schema = schema
self.field_types = {
self.base_url = None

def validate(self, data: Dict[str, Any], base_url: URL) -> None:
self.base_url = base_url
self.field_types = self._get_field_types()
self.model = self._schema_to_pydantic_model(self.schema)

try:
self.model(**data)
except ValidationError as validation_error:
raise SchemaValidationError(
data=data, schema=self.schema, message=validation_error
)

def _get_field_types(self) -> Dict[str, Type]:
return {
"string": str,
"str": str,
"boolean": bool,
Expand All @@ -51,20 +66,9 @@ def __init__(self, schema: Schema):
LIST_TYPE: List,
OBJECT_TYPE: Dict[str, Any],
"datetime": ParserTypeDate(),
"url": ParserTypeUrl(),
"url": ParserTypeUrl(base_url=self.base_url),
}

def validate(self, data: Dict[str, Any], base_url: URL) -> None:
self.field_types["url"] = ParserTypeUrl(base_url=base_url)
self.model = self._schema_to_pydantic_model(self.schema)

try:
self.model(**data)
except ValidationError as validation_error:
raise SchemaValidationError(
data=data, schema=self.schema, message=validation_error
)

def _items_schema_to_python_type(
self, items_info: Schema, model_name: str = "DynamicModelItem"
) -> Type:
Expand Down
28 changes: 19 additions & 9 deletions tests/parser/test_type_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,44 +105,54 @@
# expected
"mailto:[email protected]",
),
# 11
(
# url
"//example.com/doc1",
# base_url
"https://example.com",
# expected
"https://example.com/doc1",
),
],
)
def test_pydantic_type_url_validate_url_success(url, base_url, expected):
assert ParserTypeUrl.validate_url(base_url)(url) == expected


@pytest.mark.parametrize(
"url, base_url, expected",
"url, base_url",
[
# 0
(
# url
"",
# base_url
"", # ❌ An empty string
# expected
"",
),
# 1
(
# url
"",
# base_url
"www.example.com", # ❌ Isn't a valid URL
# expected
"",
),
# 2
(
# url
"",
# base_url
"s4://bucket-name/file-name.pdf", # ❌ Bad URL scheme
# expected
"",
),
# 3
(
# url
"htp://example.com/doc1", # ❌ Bad URL scheme
# base_url
"https://example.com",
),
],
)
def test_pydantic_type_url_validate_url_error(url, base_url, expected):
def test_pydantic_type_url_validate_url_error(url, base_url):
with pytest.raises(ValueError):
ParserTypeUrl.validate_url(base_url)(url) == expected
ParserTypeUrl.validate_url(base_url)(url)

0 comments on commit b6a92ae

Please sign in to comment.