diff --git a/test/test_remote_io.py b/test/test_remote_io.py index 5e60c055c..6690309bb 100644 --- a/test/test_remote_io.py +++ b/test/test_remote_io.py @@ -13,7 +13,7 @@ import torchdata -from _utils._common_utils_for_test import check_hash_fn, create_temp_dir +from _utils._common_utils_for_test import check_hash_fn, create_temp_dir, IS_WINDOWS from torch.utils.data import DataLoader from torchdata.datapipes.iter import ( @@ -180,9 +180,10 @@ def _read_and_decode(x): self.assertTrue(os.path.exists(expected_csv_path)) self.assertEqual(expected_csv_path, csv_path) - dl = DataLoader(file_cache_dp, num_workers=3, multiprocessing_context="fork", batch_size=1) - expected = [[os.path.join(self.temp_dir.name, root_dir, f"{i}.csv")] for i in range(3)] * 3 - self.assertEqual(sorted(expected), sorted(list(dl))) + if not IS_WINDOWS: + dl = DataLoader(file_cache_dp, num_workers=3, multiprocessing_context="fork", batch_size=1) + expected = [[os.path.join(self.temp_dir.name, root_dir, f"{i}.csv")] for i in range(3)] * 3 + self.assertEqual(sorted(expected), sorted(list(dl))) def test_s3_io_iterdatapipe(self): # sanity test diff --git a/torchdata/datapipes/iter/util/cacheholder.py b/torchdata/datapipes/iter/util/cacheholder.py index 58be30fc5..5aa1ac139 100644 --- a/torchdata/datapipes/iter/util/cacheholder.py +++ b/torchdata/datapipes/iter/util/cacheholder.py @@ -308,6 +308,8 @@ def __iter__(self): old_promise_filename = None old_filename = None first_entry = True + # TODO(VitalyFedyunin): Limit buffer size here. It is only contains file names from archive, + # but better be save than sorry. buffer = [] for filename in self.source_datapipe: promise_filename = _find_promise_file(filename)