Skip to content

Commit

Permalink
Merge branch 'main' into patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
datumbox authored Jan 17, 2022
2 parents f027050 + 09c5ddd commit 341ca10
Show file tree
Hide file tree
Showing 8 changed files with 471 additions and 73 deletions.
19 changes: 2 additions & 17 deletions test/test_prototype_datasets_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,21 +125,6 @@ def test_default_config(self, info):

assert info.default_config == default_config

@pytest.mark.parametrize(
"valid_options",
[
pytest.param(None, id="default"),
pytest.param(dict(option=("value",)), id="no_split"),
],
)
def test_default_config_split_train(self, valid_options):
info = make_minimal_dataset_info(valid_options=valid_options)
assert info.default_config.split == "train"

def test_valid_options_split_but_no_train(self):
with pytest.raises(ValueError, match="'train' has to be a valid argument for option 'split'"):
make_minimal_dataset_info(valid_options=dict(split=("test",)))

@pytest.mark.parametrize(
("options", "expected_error_msg"),
[
Expand Down Expand Up @@ -208,7 +193,7 @@ def test_default_config(self):
("config", "kwarg"),
[
pytest.param(*(datasets.utils.DatasetConfig(split="test"),) * 2, id="specific"),
pytest.param(make_minimal_dataset_info().default_config, None, id="default"),
pytest.param(DatasetMock().default_config, None, id="default"),
],
)
def test_load_config(self, config, kwarg):
Expand All @@ -218,7 +203,7 @@ def test_load_config(self, config, kwarg):

dataset.resources.assert_called_with(config)

(_, call_kwargs) = dataset._make_datapipe.call_args
_, call_kwargs = dataset._make_datapipe.call_args
assert call_kwargs["config"] == config

def test_missing_dependencies(self):
Expand Down
1 change: 1 addition & 0 deletions torchvision/prototype/datasets/_builtin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .cifar import Cifar10, Cifar100
from .clevr import CLEVR
from .coco import Coco
from .cub200 import CUB200
from .dtd import DTD
from .fer2013 import FER2013
from .imagenet import ImageNet
Expand Down
67 changes: 28 additions & 39 deletions torchvision/prototype/datasets/_builtin/cifar.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,32 @@ def __iter__(self) -> Iterator[Tuple[np.ndarray, int]]:


class _CifarBase(Dataset):
_FILE_NAME: str
_SHA256: str
_LABELS_KEY: str
_META_FILE_NAME: str
_CATEGORIES_KEY: str

@abc.abstractmethod
def _is_data_file(self, data: Tuple[str, io.IOBase], *, config: DatasetConfig) -> Optional[int]:
def _is_data_file(self, data: Tuple[str, io.IOBase], *, split: str) -> Optional[int]:
pass

def _make_info(self) -> DatasetInfo:
return DatasetInfo(
type(self).__name__.lower(),
type=DatasetType.RAW,
homepage="https://www.cs.toronto.edu/~kriz/cifar.html",
valid_options=dict(split=("train", "test")),
)

def resources(self, config: DatasetConfig) -> List[OnlineResource]:
return [
HttpResource(
f"https://www.cs.toronto.edu/~kriz/{self._FILE_NAME}",
sha256=self._SHA256,
)
]

def _unpickle(self, data: Tuple[str, io.BytesIO]) -> Dict[str, Any]:
_, file = data
return cast(Dict[str, Any], pickle.load(file, encoding="latin1"))
Expand Down Expand Up @@ -84,7 +102,7 @@ def _make_datapipe(
decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
) -> IterDataPipe[Dict[str, Any]]:
dp = resource_dps[0]
dp = Filter(dp, functools.partial(self._is_data_file, config=config))
dp = Filter(dp, functools.partial(self._is_data_file, split=config.split))
dp = Mapper(dp, self._unpickle)
dp = CifarFileReader(dp, labels_key=self._LABELS_KEY)
dp = hint_sharding(dp)
Expand All @@ -102,53 +120,24 @@ def _generate_categories(self, root: pathlib.Path) -> List[str]:


class Cifar10(_CifarBase):
_FILE_NAME = "cifar-10-python.tar.gz"
_SHA256 = "6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce"
_LABELS_KEY = "labels"
_META_FILE_NAME = "batches.meta"
_CATEGORIES_KEY = "label_names"

def _is_data_file(self, data: Tuple[str, Any], *, config: DatasetConfig) -> bool:
def _is_data_file(self, data: Tuple[str, Any], *, split: str) -> bool:
path = pathlib.Path(data[0])
return path.name.startswith("data" if config.split == "train" else "test")

def _make_info(self) -> DatasetInfo:
return DatasetInfo(
"cifar10",
type=DatasetType.RAW,
homepage="https://www.cs.toronto.edu/~kriz/cifar.html",
)

def resources(self, config: DatasetConfig) -> List[OnlineResource]:
return [
HttpResource(
"https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz",
sha256="6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce",
)
]
return path.name.startswith("data" if split == "train" else "test")


class Cifar100(_CifarBase):
_FILE_NAME = "cifar-100-python.tar.gz"
_SHA256 = "85cd44d02ba6437773c5bbd22e183051d648de2e7d6b014e1ef29b855ba677a7"
_LABELS_KEY = "fine_labels"
_META_FILE_NAME = "meta"
_CATEGORIES_KEY = "fine_label_names"

def _is_data_file(self, data: Tuple[str, Any], *, config: DatasetConfig) -> bool:
def _is_data_file(self, data: Tuple[str, Any], *, split: str) -> bool:
path = pathlib.Path(data[0])
return path.name == cast(str, config.split)

def _make_info(self) -> DatasetInfo:
return DatasetInfo(
"cifar100",
type=DatasetType.RAW,
homepage="https://www.cs.toronto.edu/~kriz/cifar.html",
valid_options=dict(
split=("train", "test"),
),
)

def resources(self, config: DatasetConfig) -> List[OnlineResource]:
return [
HttpResource(
"https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz",
sha256="85cd44d02ba6437773c5bbd22e183051d648de2e7d6b014e1ef29b855ba677a7",
)
]
return path.name == split
200 changes: 200 additions & 0 deletions torchvision/prototype/datasets/_builtin/cub200.categories
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
Black_footed_Albatross
Laysan_Albatross
Sooty_Albatross
Groove_billed_Ani
Crested_Auklet
Least_Auklet
Parakeet_Auklet
Rhinoceros_Auklet
Brewer_Blackbird
Red_winged_Blackbird
Rusty_Blackbird
Yellow_headed_Blackbird
Bobolink
Indigo_Bunting
Lazuli_Bunting
Painted_Bunting
Cardinal
Spotted_Catbird
Gray_Catbird
Yellow_breasted_Chat
Eastern_Towhee
Chuck_will_Widow
Brandt_Cormorant
Red_faced_Cormorant
Pelagic_Cormorant
Bronzed_Cowbird
Shiny_Cowbird
Brown_Creeper
American_Crow
Fish_Crow
Black_billed_Cuckoo
Mangrove_Cuckoo
Yellow_billed_Cuckoo
Gray_crowned_Rosy_Finch
Purple_Finch
Northern_Flicker
Acadian_Flycatcher
Great_Crested_Flycatcher
Least_Flycatcher
Olive_sided_Flycatcher
Scissor_tailed_Flycatcher
Vermilion_Flycatcher
Yellow_bellied_Flycatcher
Frigatebird
Northern_Fulmar
Gadwall
American_Goldfinch
European_Goldfinch
Boat_tailed_Grackle
Eared_Grebe
Horned_Grebe
Pied_billed_Grebe
Western_Grebe
Blue_Grosbeak
Evening_Grosbeak
Pine_Grosbeak
Rose_breasted_Grosbeak
Pigeon_Guillemot
California_Gull
Glaucous_winged_Gull
Heermann_Gull
Herring_Gull
Ivory_Gull
Ring_billed_Gull
Slaty_backed_Gull
Western_Gull
Anna_Hummingbird
Ruby_throated_Hummingbird
Rufous_Hummingbird
Green_Violetear
Long_tailed_Jaeger
Pomarine_Jaeger
Blue_Jay
Florida_Jay
Green_Jay
Dark_eyed_Junco
Tropical_Kingbird
Gray_Kingbird
Belted_Kingfisher
Green_Kingfisher
Pied_Kingfisher
Ringed_Kingfisher
White_breasted_Kingfisher
Red_legged_Kittiwake
Horned_Lark
Pacific_Loon
Mallard
Western_Meadowlark
Hooded_Merganser
Red_breasted_Merganser
Mockingbird
Nighthawk
Clark_Nutcracker
White_breasted_Nuthatch
Baltimore_Oriole
Hooded_Oriole
Orchard_Oriole
Scott_Oriole
Ovenbird
Brown_Pelican
White_Pelican
Western_Wood_Pewee
Sayornis
American_Pipit
Whip_poor_Will
Horned_Puffin
Common_Raven
White_necked_Raven
American_Redstart
Geococcyx
Loggerhead_Shrike
Great_Grey_Shrike
Baird_Sparrow
Black_throated_Sparrow
Brewer_Sparrow
Chipping_Sparrow
Clay_colored_Sparrow
House_Sparrow
Field_Sparrow
Fox_Sparrow
Grasshopper_Sparrow
Harris_Sparrow
Henslow_Sparrow
Le_Conte_Sparrow
Lincoln_Sparrow
Nelson_Sharp_tailed_Sparrow
Savannah_Sparrow
Seaside_Sparrow
Song_Sparrow
Tree_Sparrow
Vesper_Sparrow
White_crowned_Sparrow
White_throated_Sparrow
Cape_Glossy_Starling
Bank_Swallow
Barn_Swallow
Cliff_Swallow
Tree_Swallow
Scarlet_Tanager
Summer_Tanager
Artic_Tern
Black_Tern
Caspian_Tern
Common_Tern
Elegant_Tern
Forsters_Tern
Least_Tern
Green_tailed_Towhee
Brown_Thrasher
Sage_Thrasher
Black_capped_Vireo
Blue_headed_Vireo
Philadelphia_Vireo
Red_eyed_Vireo
Warbling_Vireo
White_eyed_Vireo
Yellow_throated_Vireo
Bay_breasted_Warbler
Black_and_white_Warbler
Black_throated_Blue_Warbler
Blue_winged_Warbler
Canada_Warbler
Cape_May_Warbler
Cerulean_Warbler
Chestnut_sided_Warbler
Golden_winged_Warbler
Hooded_Warbler
Kentucky_Warbler
Magnolia_Warbler
Mourning_Warbler
Myrtle_Warbler
Nashville_Warbler
Orange_crowned_Warbler
Palm_Warbler
Pine_Warbler
Prairie_Warbler
Prothonotary_Warbler
Swainson_Warbler
Tennessee_Warbler
Wilson_Warbler
Worm_eating_Warbler
Yellow_Warbler
Northern_Waterthrush
Louisiana_Waterthrush
Bohemian_Waxwing
Cedar_Waxwing
American_Three_toed_Woodpecker
Pileated_Woodpecker
Red_bellied_Woodpecker
Red_cockaded_Woodpecker
Red_headed_Woodpecker
Downy_Woodpecker
Bewick_Wren
Cactus_Wren
Carolina_Wren
House_Wren
Marsh_Wren
Rock_Wren
Winter_Wren
Common_Yellowthroat
Loading

0 comments on commit 341ca10

Please sign in to comment.