Skip to content

Commit

Permalink
Update on "Adding lock mechanism to prevent on_disk_cache downloading…
Browse files Browse the repository at this point in the history
… twice"


Fixes #144

[ghstack-poisoned]
  • Loading branch information
VitalyFedyunin committed May 18, 2022
1 parent ffa0fa3 commit 58c25aa
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 4 deletions.
5 changes: 4 additions & 1 deletion torchdata/datapipes/iter/util/cacheholder.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,10 @@ def _hash_check(filepath, hash_dict, hash_type):
else:
hash_func = hashlib.md5()

with portalocker.Lock(filepath, "rb", flags=portalocker.LockFlags.SHARED) as f:
# with portalocker.Lock(filepath, "rb", flags=portalocker.LockFlags.SHARED) as f:
# TODO(VitalyFedyunin): Line above will require all readers (Win) to obtain proper locks,
# I'm putting it on hold as we need to modify PyTorch core codebase heavily.
with open(filepath, "rb") as f:
chunk = f.read(1024 ** 2)
while chunk:
hash_func.update(chunk)
Expand Down
4 changes: 1 addition & 3 deletions torchdata/datapipes/iter/util/saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@

from typing import Any, Callable, Iterator, Optional, Tuple, Union

# import portalocker

from torchdata.datapipes import functional_datapipe
from torchdata.datapipes.iter import IterDataPipe

Expand Down Expand Up @@ -59,7 +57,7 @@ def __iter__(self) -> Iterator[str]:
if not os.path.exists(dirname):
os.makedirs(dirname)
# with portalocker.Lock(filepath, self.mode, flags=portalocker.LockFlags.EXCLUSIVE) as f:
# TODO(VitalyFedyunin): Enabling line above fails TorchText tests, need to investigate race condition
# TODO(VitalyFedyunin): Enabling line above will require all read sites to be updated (Win).
with open(filepath, self.mode) as f:
f.write(data)
yield filepath
Expand Down

0 comments on commit 58c25aa

Please sign in to comment.