-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_pup.py
151 lines (125 loc) · 5.36 KB
/
plot_pup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import pandas as pd
import h5sparse
import re
import os
import numpy as np
import matplotlib.pyplot as plt
import argparse
# Create the parser
parser = argparse.ArgumentParser(description="Plot heatmap from a given file.")
# Add an argument
parser.add_argument('--input', type=str, help='The file path to the data file.')
# Argument parser setup
parser = argparse.ArgumentParser(description="Plot heatmaps from given files and customize color scales.")
parser.add_argument('--input', type=str, help='The file paths to the data files, separated by commas.')
parser.add_argument('--vmin', type=float, default=0.6, help='Minimum value for the color scale of the first matrix.')
parser.add_argument('--vmax', type=float, default=1.4, help='Maximum value for the color scale of the first matrix.')
parser.add_argument('--vmin_diff', type=float, default=-0.2, help='Minimum value for the color scale of the difference matrix.')
parser.add_argument('--vmax_diff', type=float, default=0.2, help='Maximum value for the color scale of the difference matrix.')
args = parser.parse_args()
# Now args.file_path holds the input file path
file_path = args.input
vmin = args.vmin
vmax = args.vmax
base_name = os.path.splitext(os.path.basename(file_path))[0]
output_file = f'{base_name}_heatmap.pdf'
__version__ = "1.1.0"
def load_pileup_df(filename, quaich=False, skipstripes=False):
"""
Loads a dataframe saved using `save_pileup_df`
Parameters
----------
filename : str
File to load from.
quaich : bool, optional
Whether to assume standard quaich file naming to extract sample name and bedname.
The default is False.
Returns
-------
annotation : pd.DataFrame
Pileups are in the "data" column, all metadata in other columns
"""
with h5sparse.File(filename, "r", libver="latest") as f:
metadata = dict(zip(f["attrs"].attrs.keys(), f["attrs"].attrs.values()))
dstore = f["data"]
data = []
for chunk in dstore.iter_chunks():
chunk = dstore[chunk]
data.append(chunk)
annotation = pd.read_hdf(filename, "annotation")
annotation["data"] = data
vertical_stripe = []
horizontal_stripe = []
coordinates = []
if not skipstripes:
try:
for i in range(len(data)):
vstripe = "vertical_stripe_" + str(i)
hstripe = "horizontal_stripe_" + str(i)
coords = "coordinates_" + str(i)
vertical_stripe.append(f[vstripe][:].toarray())
horizontal_stripe.append(f[hstripe][:].toarray())
coordinates.append(f[coords][:].astype("U13"))
annotation["vertical_stripe"] = vertical_stripe
annotation["horizontal_stripe"] = horizontal_stripe
annotation["coordinates"] = coordinates
except KeyError:
pass
for key, val in metadata.items():
if key != "version":
annotation[key] = val
elif val != __version__:
logger.debug(
f"pileup generated with v{val}. Current version is v{__version__}"
)
if quaich:
basename = os.path.basename(filename)
sample, bedname = re.search(
"^(.*)-(?:[0-9]+)_over_(.*)_(?:[0-9]+-shifts|expected).*\.clpy", basename
).groups()
annotation["sample"] = sample
annotation["bedname"] = bedname
matrix_to_plot = annotation.loc[0, 'data']
return matrix_to_plot
# print(np.array2string(annotation, threshold=np.inf, max_line_width=np.inf))
file_paths = args.input.split(',')
matrices = [load_pileup_df(file_path) for file_path in file_paths]
# Calculate the difference if two matrices are provided
# Plotting based on the number of matrices
if len(matrices) == 1:
# Single matrix case
base_name = os.path.splitext(os.path.basename(file_paths[0]))[0]
output_file = f'{base_name}_heatmap.pdf'
matrix_to_plot = matrices[0]
plt.figure(figsize=(10, 8))
plt.imshow(matrix_to_plot, aspect='auto', cmap="coolwarm", vmin=args.vmin, vmax=args.vmax)
plt.colorbar()
plt.title('Matrix Heatmap')
plt.xlabel('X-axis Label')
plt.ylabel('Y-axis Label')
plt.savefig(output_file, format='pdf', bbox_inches='tight')
plt.show() # Show the plot
elif len(matrices) == 2:
difference_matrix = matrices[0] - matrices[1]
np.fill_diagonal(difference_matrix, np.nan)
matrices.append(difference_matrix)
base_name = os.path.splitext(os.path.basename(file_paths[0]))[0]
output_file = f'{base_name}_combined_heatmap.pdf'
# Plotting
fig, axs = plt.subplots(1, len(matrices), figsize=(10 * len(matrices), 8))
# Color scales and limits
# Color scales and limits
color_scales = ["coolwarm", "coolwarm", "RdBu_r"]
vmins = [args.vmin, args.vmin, args.vmin_diff]
vmaxs = [args.vmax, args.vmax, args.vmax_diff]
titles = ['Matrix 1', 'Matrix 2', 'Difference Matrix']
for i, matrix in enumerate(matrices):
ax = axs[i]
cax = ax.imshow(matrix, aspect='auto', cmap=color_scales[i], vmin=vmins[i], vmax=vmaxs[i])
fig.colorbar(cax, ax=ax, orientation='vertical')
ax.set_title(titles[i])
ax.set_xlabel('X-axis Label')
ax.set_ylabel('Y-axis Label')
plt.tight_layout()
plt.savefig(output_file, format='pdf', bbox_inches='tight')
plt.close(fig)