-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsplit_to_chunks.py
79 lines (65 loc) · 2.88 KB
/
split_to_chunks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
"""
This script can optionally be used to split a large stack
of images/flipbooks into smaller chunks for proofreading. E.g.,
A stack of 500 flipbooks can be split into 10 chunks or 50 flipbooks
and distributed to different proofreaders for faster turnaround.
Arguments:
----------
im_file: The image stack .tif generated by create_proofreading_stacks.py
mask_file: The mask stack .tif generated by create_proofreading_stacks.py
csv_file: The _consensus_attributes.csv file generated by create_proofreading_stacks.py
save_dir: Where to save the chunks.
cs: Number of images/flipbooks per chunk. Default 50.
Returns:
--------
An image and mask stack as well as an attrs .csv for each chunk.
Note: Chunks can be proofread masks can restacked into a single file with concat_mask_chunks.py.
Retain the unchunked im_file and csv_file if you intend to restack the masks later.
"""
import os
import argparse
import pandas as pd
from glob import glob
from skimage import io
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('im_file', type=str,
help='Path to stacked images/flipbooks')
parser.add_argument('mask_file', type=str,
help='Path to stacked masks/mask flipbooks')
parser.add_argument('csv_file', type=str,
help='Metadata consensus_attributes.csv file corresponding to images and masks')
parser.add_argument('save_dir', type=str,
help='Directory in which to save chunked stacks of images and masks')
parser.add_argument('--cs', type=int, default=50,
help='Number of images/flipbooks in each stack.')
args = parser.parse_args()
imf = args.im_file
segf = args.mask_file
sdir = args.save_dir
chunk_size = args.cs
csvf = args.csv_file
os.makedirs(sdir, exist_ok=True)
im = io.imread(imf)
seg = io.imread(segf)
attr_csv = pd.read_csv(csvf)
# ranges of indices for each chunk
start = 0
stop = len(attr_csv)
step = chunk_size
sindices = range(start, stop, step)
eindices = range(step, stop + step, step)
batch_name = os.path.basename(imf).split('_')[0]
for s,e in zip(sindices, eindices):
s_str = str(s).zfill(4)
e_str = str(e).zfill(4)
# names record the range of images/flipbooks in the chunk
impath = os.path.join(sdir, f'{batch_name}_chunk_{s_str}-{e_str}.tif')
segpath = os.path.join(sdir, f'{batch_name}_chunk_{s_str}-{e_str}_masks.tif')
csvpath = os.path.join(sdir, f'{batch_name}_attr_chunk_{s_str}-{e_str}.csv')
io.imsave(impath, im[s:e], check_contrast=False)
io.imsave(segpath, seg[s:e], check_contrast=False)
# update and save the csv file chunk
chunk_csv = attr_csv[s:e]
chunk_csv[:, 'stack_index'] = chunk_csv['stack_index'] - s
chunk_csv.to_csv(csvpath, index=False)