-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimage_utils.py
157 lines (128 loc) · 5.61 KB
/
image_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/env python
"""
This module contains tools that have been helpful for working with histological
image files, including reading the image annotation files, making the target
image.
"""
import cv2
import xml.etree.ElementTree as et
from glob import glob
import matplotlib.pyplot as plt
import numpy as np
import os
import tifffile
def read_annos_xml(filename):
"""
This reads the annotations for one image of the Kumar et al. (2018) dataset
and returns the x and y vertices (pixel locations) that trace the region
around each cell nucleus. The return values are each an ndarray where each
element is an ndarray of vertices outlining one nucleus region.
:param str filename:
:return ndarray x_vert, ndarray y_vert:
"""
tree = et.parse(filename)
root = tree.getroot()
x_vert = []
y_vert = []
for regions in root.iter('Regions'):
for region in regions.iter('Region'):
x_region = []
y_region = []
for vertex in region.iter('Vertex'):
x_region.append(float(vertex.attrib['X']))
y_region.append(float(vertex.attrib['Y']))
x_vert.append(np.round(np.asarray(x_region, dtype=int)))
y_vert.append(np.round(np.asarray(y_region, dtype=int)))
return np.asarray(x_vert), np.asarray(y_vert)
def _make_target_image(x_vert, y_vert, im_size):
"""
This create an RGB image of im_size where the red channel is 1.0 for all
"background" pixels; the blue channel is 1.0 for all "nuclei" pixels; and
the green channel is 1.0 for all "boundary" pixels. The boundary pixels
are described by x_vert, y_vert vertices of a polyline around each nuclei.
This so called target image defines the classification for each pixel in
the original image. The target image is the desired result that will be
used to train a decoder to classify all the pixels of the original image.
:param ndarray x_vert: x-component of vertices of polyline around each
nuclei boundary. Obtained from read_annos_xml().
:param ndarray y_vert: y-component of vertices of polyline around each
nuclei boundary. Obtained from read_annos_xml().
:param tuple im_size:
:return: ndarray im
"""
im = np.zeros(im_size + (3,)) # RGB image canvas
max_x = im_size[0] - 1
max_y = im_size[1] - 1
for n in range(len(x_vert)):
x = x_vert[n]
y = y_vert[n]
# truncate any nucli boundaries that go past the edge of the image.
x[x < 0] = 0
x[x > max_x] = max_x
y[y < 0] = 0
y[y > max_y] = max_y
# prepare the vertex points for input to openCV polylines()
pts = np.stack([x, y], axis=1).reshape((-1, 1, 2))
# draw the "boundary" outlines in the green channel
cv2.polylines(im, [pts], isClosed=True,
color=(0.0, 1.0, 0.0),
thickness=5,
lineType=cv2.LINE_8
)
# draw the initial "nuclei" region in the blue channel
cv2.fillPoly(im, [pts],
color=(0.0, 0.0, 1.0), # RGB
lineType=cv2.LINE_8
)
# make the "background" in red channel by subtracting the "nuclei"
# region from ones.
im[:, :, 0] = np.ones(im_size) - im[:, :, 2]
# finally subtract the "boundary" from the "nuclei" to produce the
# "inside" region in the blue channel, and subtract the "boudary" from the
# "background" in red.
im[:, :, 2] = np.maximum(im[:, :, 2] - im[:, :, 1], 0.0)
im[:, :, 0] = np.maximum(im[:, :, 0] - im[:, :, 1], 0.0)
return im
def make_target_images(anno_folder, target_folder, im_size):
"""
This will make a target image for each of the annotation files found in
<anno_folder>. Resulting target images are written as tiff files to
<target_folder>.
:param str anno_folder:
:param str target_folder:
:param tuple im_size:
:return: none
"""
filenames = glob(os.path.join(anno_folder, '*.xml'), recursive=False)
for anno_file in filenames:
x_vert, y_vert = read_annos_xml(anno_file)
im_target = _make_target_image(x_vert, y_vert, im_size)
# convert to uint8...
im_target = (im_target*255).astype(dtype=np.uint8)
filename, _ = os.path.splitext(os.path.basename(anno_file))
target_filename = os.path.join(target_folder, filename+'.tif')
tifffile.imsave(target_filename, im_target, photometric='rgb')
if __name__ == "__main__":
'''
This performs a small verification test...
'''
path = '/shared/Projects/nuclei_segmentation/Images/Kumar_images/'
anno_file = os.path.join(path, 'Annotations/TCGA-18-5592-01Z-00-DX1.xml')
image_file = os.path.join(path, 'deflated/TCGA-18-5592-01Z-00-DX1.tif')
anno_folder = os.path.join(path, 'Annotations/')
target_folder = os.path.join(path, 'target/')
make_target_images(anno_folder, target_folder, (1000,1000))
x_vert, y_vert = read_annos_xml(anno_file)
im_slide = tifffile.imread(image_file)
im_size = im_slide.shape[0:2]
im_target = _make_target_image(x_vert, y_vert, im_size)
# copy just the boundary image and make everything else transparent...
im_boundary = np.zeros(im_size + (4,)) # RGBA canvas
im_boundary[:, :, 1] = im_target[:, :, 1] # copy the green boundary channel
im_boundary[:, :, 3] = im_target[:, :, 1] # alpha channel...boundary opaque
plt.imshow(im_slide)
plt.imshow(im_boundary) # overlay the boundary on the slide for verification
plt.show()
plt.imshow(im_target) # plot target image in a new figure.
plt.show()
pass