Skip to content

Commit

Permalink
Single list for Ingestion Model and adapted tests (#121)
Browse files Browse the repository at this point in the history
# PR Context
<!-- Additional info for the reviewer -->

# Added
<!-- New features and interfaces -->

# Changes
<!-- Changes in existing functionality -->
- BulkIngestRequest contains now one single list "items"
- tests for ingestion adapted to BulkIngestRequest-Model

# Deprecated
<!-- Soon-to-be removed features -->

# Removed
<!-- Definitely removed features -->
- removed class  _BaseBulkIngestRequest for ingestion model

# Fixed
<!-- Fixed bugs -->

# Security
<!-- Fixed vulnerabilities -->

---------

Signed-off-by: vyvytranngoc <[email protected]>
Co-authored-by: Nicolas Drebenstedt <[email protected]>
  • Loading branch information
vyvytranngoc and cutoffthetop authored Jul 31, 2024
1 parent 428264c commit 6f2655e
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 86 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changes

- BulkIngestRequest contains now one single list "items"
- tests for ingestion adapted to BulkIngestRequest-Model
### Deprecated

### Removed
- removed class _BaseBulkIngestRequest for ingestion model

### Fixed

Expand Down
8 changes: 2 additions & 6 deletions mex/backend/ingest/main.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from fastapi import APIRouter
from fastapi.responses import JSONResponse

from mex.backend.graph.connector import GraphConnector
from mex.backend.ingest.models import BulkIngestRequest, BulkIngestResponse
from mex.backend.transform import to_primitive

router = APIRouter()

Expand All @@ -12,7 +10,5 @@
def ingest_extracted_items(request: BulkIngestRequest) -> BulkIngestResponse:
"""Ingest batches of extracted items grouped by their type."""
connector = GraphConnector.get()
models = request.get_all()
identifiers = connector.ingest(models)
response = BulkIngestResponse(identifiers=identifiers)
return JSONResponse(to_primitive(response), 201) # type: ignore[return-value]
identifiers = connector.ingest(request.items)
return BulkIngestResponse(identifiers=identifiers)
54 changes: 2 additions & 52 deletions mex/backend/ingest/models.py
Original file line number Diff line number Diff line change
@@ -1,64 +1,14 @@
from typing import TYPE_CHECKING

from pydantic import ConfigDict, create_model

from mex.common.models import (
EXTRACTED_MODEL_CLASSES_BY_NAME,
AnyExtractedModel,
BaseModel,
)
from mex.common.types import Identifier


class _BaseBulkIngestRequest(BaseModel):
class BulkIngestRequest(BaseModel):
"""Request body for the bulk ingestion endpoint."""

model_config = ConfigDict(
json_schema_extra={
"examples": [
{
"ExtractedPerson": [
{
"hadPrimarySource": "000001111122222",
"identifierInPrimarySource": "jimmy",
"email": ["[email protected]"],
"givenName": "Jimmy",
"memberOf": ["111112222233333"],
}
],
"ExtractedContactPoint": [
{
"hadPrimarySource": "000001111122222",
"identifierInPrimarySource": "sales",
"email": ["[email protected]"],
},
{
"hadPrimarySource": "000001111122222",
"identifierInPrimarySource": "hr",
"email": ["[email protected]"],
},
],
}
]
}
)

def get_all(self) -> list[AnyExtractedModel]:
return [data for name in self.model_fields for data in getattr(self, name)]


if TYPE_CHECKING: # pragma: no cover
BulkIngestRequest = _BaseBulkIngestRequest
else:
BulkIngestRequest = create_model(
"BulkIngestRequest",
__base__=_BaseBulkIngestRequest,
__module__=__name__,
**{
name: (list[model], [])
for name, model in EXTRACTED_MODEL_CLASSES_BY_NAME.items()
},
)
items: list[AnyExtractedModel]


class BulkIngestResponse(BaseModel):
Expand Down
34 changes: 17 additions & 17 deletions tests/ingest/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@
def post_payload(dummy_data: list[AnyExtractedModel]) -> Payload:
payload = defaultdict(list)
for model in dummy_data:
payload[model.entityType].append(model.model_dump())
payload["items"].append(model.model_dump())
return cast(Payload, dict(payload))


@pytest.mark.integration
def test_bulk_insert_empty(client_with_api_key_write_permission: TestClient) -> None:
response = client_with_api_key_write_permission.post("/v0/ingest", json={})

response = client_with_api_key_write_permission.post(
"/v0/ingest", json={"items": []}
)
assert response.status_code == 201, response.text
assert response.json() == {"identifiers": []}

Expand Down Expand Up @@ -54,24 +55,23 @@ def test_bulk_insert(
def test_bulk_insert_malformed(
client_with_api_key_write_permission: TestClient,
) -> None:
expected_res = []
exp_err = {
"ctx": {"error": {}},
"input": "FAIL!",
"loc": ["body", "items", 0, "function-wrap[fix_listyness()]"],
"msg": "Assertion failed, Input should be a valid dictionary, validating "
"other types is not supported for models with computed fields.",
"type": "assertion_error",
}
expected_res += [exp_err] * 11

response = client_with_api_key_write_permission.post(
"/v0/ingest",
json={"ExtractedContactPoint": "FAIL!"},
json={"items": ["FAIL!"]},
)
assert response.status_code == 422, response.text
assert response.json() == {
"detail": [
{
"ctx": {"error": {}},
"type": "assertion_error",
"loc": ["body", "ExtractedContactPoint", 0],
"msg": "Assertion failed, Input should be a valid dictionary, "
"validating other types is not supported for models with computed "
"fields.",
"input": "FAIL!",
}
]
}
assert response.json() == {"detail": expected_res}


def test_bulk_insert_mocked(
Expand Down
11 changes: 0 additions & 11 deletions tests/ingest/test_models.py

This file was deleted.

0 comments on commit 6f2655e

Please sign in to comment.