Skip to content

Commit

Permalink
add argparse for combine_2.py
Browse files Browse the repository at this point in the history
  • Loading branch information
bernard2012 committed Sep 9, 2020
1 parent fcea286 commit eb1d8a1
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 4 deletions.
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,21 +140,19 @@ In this example, we request one job per combination of (i, j, k). Hence there wi
Once all the jobs are complete, combine the results together to generate a final score list.
Note this step does not need to run using SLURM scheduler.
```bash
python3 ~/.local/lib/python3.6/site-packages/silhouetteRank/combine.py -i . -o silhouette_overall_pval.txt -l master.combine.log -v -r 0.95 0.99 -e 0.005 0.01 0.05 0.1 0.3 -m dissim
python3 ~/.local/lib/python3.6/site-packages/silhouetteRank/combine_2.py -i . -v -r 0.95 0.99 -e 0.005 0.01 0.05 0.1 0.3 -m dissim
```

**Table 2**. Explanations of `combine.py`:
| param | explanation |
| ----- | ------------------ |
| -i | The results directory (containing directories like result_5000_0.95_0.300) |
| -o | Output file |
| -l | Log file name (just file name, no path) |
| -v | Verbose |
| -r | Array. Float. Local spatial distance weighting constant. (recommend 0.95 - 0.995) |
| -e | Array. Float. Top proportion of cells to binarize to 1 (0 - 1) |
| -m | dissim or sim. Use dissimilarity matrix (default) or similarity matrix |

The final result is saved in the file `silhouette.overall.pval.txt`.
The final result is saved in the file `silhouette.overall.pval.txt` located in the results directory.

Note: the parameter settings for `-r` and `-e` work well for most datasets. Users can safely use the same commands for all datasets.

70 changes: 70 additions & 0 deletions silhouetteRank/combine_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import math
import sys
import os
import re
import scipy
import scipy.stats
import numpy as np
from operator import itemgetter
import silhouetteRank
import silhouetteRank.prep as prep
import silhouetteRank.evaluate_exact_one_2b as evaluate_exact_one_2b
import silhouetteRank.use_previous_cluster as use_previous_cluster
import silhouetteRank.combine as combine
import logging
import argparse
import subprocess

def read(n):
f = open(n)
by_gene = {}
for l in f:
l = l.rstrip("\n")
ll = l.split()
gene = ll[0]
pval = float(ll[-2])
by_gene[gene] = pval
f.close()
return by_gene

def do_one(args):
result = subprocess.call("Rscript --version 2> /dev/null", shell=True)
if result==127:
sys.stderr.write("Rscript is not found\n")
sys.stderr.flush()
sys.exit(1)

check_required = ["expr.npy", "Xcen.npy", "genes.npy"]
for cr in check_required:
if not os.path.isfile("%s/%s" % (args.input, cr)):
sys.stderr.write("%s file does not exist\n" % cr)
sys.stderr.flush()
sys.exit(1)

for rbp_p in args.rbp_ps:
for examine_top in args.examine_tops:
random_dir = "%s/result_sim_5000_%.2f_%.3f" % (args.input, rbp_p, examine_top)
score_file = "%s/silhouette.sim.exact.rbp.%.2f.top.%.3f.txt" % (args.input, rbp_p, examine_top)
output_score_file = "%s/silhouette.sim.exact.rbp.%.2f.top.%.3f.pval.txt" % (args.input, rbp_p, examine_top)
if args.matrix_type=="dissim":
random_dir = "%s/result_5000_%.2f_%.3f" % (args.input, rbp_p, examine_top)
score_file = "%s/silhouette.exact.rbp.%.2f.top.%.3f.txt" % (args.input, rbp_p, examine_top)
output_score_file = "%s/silhouette.exact.rbp.%.2f.top.%.3f.pval.txt" % (args.input, rbp_p, examine_top)
args1 = argparse.Namespace(expr="../expression.txt", centroid="../Xcen.good", examine_top=examine_top, input=score_file, input_random=random_dir, output=output_score_file, outdir=args.input, query_sizes=args.query_sizes, overwrite_input_bin=False, verbose=verbose, log_file="master.pvalue.log")
use_previous_cluster.do_one(args1)

combined_file = "%s/silhouette.overall.pval.txt" % args.input
if args.matrix_type=="sim":
combined_file = "%s/silhouette.sim.overall.pval.txt" % args.input
args1 = argparse.Namespace(rbp_ps=args.rbp_ps, examine_tops=args.examine_tops, matrix_type=args.matrix_type, input=args.input, output=combined_file)
combine.do_one(args1)

if __name__=="__main__":
parser = argparse.ArgumentParser(description="combine.py: combine spatial scores across parameters", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("-r", "--rbp-ps", dest="rbp_ps", nargs="+", type=float, default=[0.95, 0.99], help="p parameter of RBP")
parser.add_argument("-e", "--examine-tops", dest="examine_tops", nargs="+", type=float, default=[0.005, 0.010, 0.050, 0.100, 0.300], help="top proportion of cells per gene to be 1's (expressed)")
parser.add_argument("-m", "--matrix-type", dest="matrix_type", type=str, choices=["sim", "dissim"], help="whether to calculate similarity matrix or dissimilarity matrix", default="dissim")
parser.add_argument("-i", "--input-dir", dest="input", type=str, default=".", help="input directory containing individual spatial score rankings (to be aggregated)")
#parser.add_argument("-o", "--output", dest="output", type=str, required=True, help="output file name")
args = parser.parse_args()
do_one(args)

0 comments on commit eb1d8a1

Please sign in to comment.