-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathloading_baseline.py
76 lines (61 loc) · 2.7 KB
/
loading_baseline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import argparse
import os
import torch
import torchvision
import torch.distributed as dist
import torchvision.transforms as transforms
from utils import *
logger = make_logger('imagenet', 'logs')
def parse_args():
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
parser.add_argument('data', metavar='DIR', help='path to dataset')
parser.add_argument('--local_rank', metavar='RANK', type=int, default=0)
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
help='number of data loading workers (default: 4)')
parser.add_argument('--epochs', default=90, type=int, metavar='N',
help='number of total epochs to run')
parser.add_argument('-b', '--batch-size', default=256, type=int,
metavar='N',
help='mini-batch size (default: 256), this is the total '
'batch size of all GPUs on the current node when '
'using Data Parallel or Distributed Data Parallel')
parser.add_argument('--dali_cpu', action='store_true',
help='Runs CPU based version of DALI pipeline.')
return parser.parse_args()
def train(train_loader, epoch, args):
load_time = Benchmark()
for i, (images, target) in enumerate(train_loader):
images = images.cuda(args.local_rank, non_blocking=True)
target = target.cuda(args.local_rank, non_blocking=True)
logger.info(f'Epoch #{epoch} [{i}/{len(train_loader)}] {load_time.elapsed():>.3f}')
if __name__ == '__main__':
assert torch.cuda.is_available(), 'CUDA IS NOT AVAILABLE!!'
args = parse_args()
args.batch_size = int(args.batch_size / torch.cuda.device_count())
logger.info(args)
dist.init_process_group('nccl')
torch.cuda.set_device(args.local_rank)
# Data loading code
train_dataset = torchvision.datasets.ImageFolder(
os.path.join(args.data, 'train'),
transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
)
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
train_loader = torch.utils.data.DataLoader(
train_dataset,
batch_size=args.batch_size,
shuffle=(train_sampler is None),
num_workers=args.workers,
pin_memory=True,
sampler=train_sampler
)
benchmark = Benchmark()
for epoch in range(0, args.epochs):
train_sampler.set_epoch(epoch)
train(train_loader, epoch, args)
logger.info(f'{benchmark.elapsed():>.3f}')