Skip to content

Commit

Permalink
feat!: validate MD5 checksums on upload and export (#165)
Browse files Browse the repository at this point in the history
* feat!: validate MD5 checksums on upload and export

* Remove print

* Refactor file handling for style

* Fix style / lint issues

* Format to style guide

* Refactor for linter / cognitive complexity rule
  • Loading branch information
alexbostock authored Feb 20, 2024
1 parent 57c97f5 commit dfbfabc
Showing 1 changed file with 24 additions and 7 deletions.
31 changes: 24 additions & 7 deletions scribemi/ScribeMi.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from aws_requests_auth.aws_auth import AWSRequestsAuth
from datetime import datetime
from typing_extensions import TypedDict, Optional, List
from hashlib import md5
from base64 import b64encode


class Env(TypedDict):
Expand Down Expand Up @@ -416,6 +418,8 @@ def fetch_model(self, task: MITask):
)
res = requests.get(modelUrl)
if res.status_code == 200:
verify_etag_checksum(res)

return json.loads(res.text)
elif res.status_code == 401 or res.status_code == 403:
raise UnauthenticatedException(
Expand Down Expand Up @@ -462,14 +466,20 @@ def submit_task(
if isinstance(file_or_filename, str) and params.get("filename") == None:
params["filename"] = file_or_filename

post_res = self.call_endpoint("POST", "/tasks", params)
put_url = post_res["url"]

if isinstance(file_or_filename, str):
with open(file_or_filename, "rb") as file:
upload_file(file, put_url)
file_content = file.read()
else:
return upload_file(file_or_filename, put_url)
file_content = file_or_filename.read()

hash = md5(file_content, usedforsecurity=False)
md5checksum = b64encode(hash.digest()).decode()
params["md5checksum"] = md5checksum

post_res = self.call_endpoint("POST", "/tasks", params)
put_url = post_res["url"]

upload_file(file_content, md5checksum, put_url)

return post_res["jobid"]

Expand All @@ -486,7 +496,14 @@ def delete_task(self, task: MITask):
return self.call_endpoint("DELETE", "/tasks/{}".format(task["jobid"]))


def upload_file(file, url):
res = requests.put(url, data=file)
def upload_file(file, md5checksum, url):
res = requests.put(url, data=file, headers={"Content-MD5": md5checksum})
if res.status_code != 200:
raise Exception("Error uploading file: {}".format(res.status_code))


def verify_etag_checksum(res: requests.Response):
md5checksum_expected = res.headers["ETag"].replace('"', "")
md5checksum = md5(res.text.encode(), usedforsecurity=False).hexdigest()
if md5checksum != md5checksum_expected:
raise Exception("Integrity Error: invalid checksum. Please retry.")

0 comments on commit dfbfabc

Please sign in to comment.