Skip to content

Commit

Permalink
Update on "Adding lock mechanism to prevent on_disk_cache downloading…
Browse files Browse the repository at this point in the history
… twice"


Fixes #144

[ghstack-poisoned]
  • Loading branch information
VitalyFedyunin committed May 18, 2022
1 parent f4c18b6 commit 748d4fc
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 1 deletion.
2 changes: 1 addition & 1 deletion torchdata/datapipes/iter/util/cacheholder.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def _hash_check(filepath, hash_dict, hash_type):
else:
hash_func = hashlib.md5()

with portalocker.Lock(filepath, "rb", flags=portalocker.LockFlags.EXCLUSIVE) as f:
with portalocker.Lock(filepath, "rb", flags=portalocker.LockFlags.SHARED) as f:
chunk = f.read(1024 ** 2)
while chunk:
hash_func.update(chunk)
Expand Down
2 changes: 2 additions & 0 deletions torchdata/datapipes/iter/util/saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ def __iter__(self) -> Iterator[str]:
if not os.path.exists(dirname):
os.makedirs(dirname)
with portalocker.Lock(filepath, self.mode, flags=portalocker.LockFlags.EXCLUSIVE) as f:
# TODO(VitalyFedyunin): Enabling line above fails TorchText tests, need to investigate race condition
# with open(filepath, self.mode) as f:
f.write(data)
yield filepath

Expand Down

0 comments on commit 748d4fc

Please sign in to comment.