Initial

bernard2012 · Sep 6, 2020 · 998c189 · 998c189
1 parent be7e70d
commit 998c189
Show file tree

Hide file tree

Showing 19 changed files with 2,052 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,138 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
diff --git a/PKG-INFO b/PKG-INFO
@@ -0,0 +1,15 @@
+Metadata-Version: 2.1
+Name: silhouetteRank
+Version: 1.0.0
+Summary: silhouetteRank is a tool for finding spatially variable genes based on computing silhouette coefficient from binarized spatial gene expression data
+Home-page: https://bitbucket.org/qzhu/silhouetteRank
+Author: Qian Zhu
+Author-email: [email protected]
+License: UNKNOWN
+Description: silhouetteRank is a tool for finding spatially variable genes based on computing silhouette coefficient from binarized spatial gene expression data
+Platform: UNKNOWN
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.5
+Description-Content-Type: text/markdown
diff --git a/README.md b/README.md
@@ -1 +1,3 @@
-# silhouetteRank
+This toolkit contains silhouetteRank, a flexible method for finding spatially variable genes. It computes a score based on silhouette coefficient of binarized gene expression data. It allows users to specify multiple running widths and integrate them in a Fisher's test. 
+
+silhouetteRank is written in Python 3.
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,4 @@
+[egg_info]
+tag_build = 
+tag_date = 0
+
diff --git a/setup.py b/setup.py
@@ -0,0 +1,75 @@
+import subprocess
+from distutils.command.build import build as _build
+import setuptools
+
+with open("README.md", "r") as fh:
+	long_description = fh.read()
+
+# This class handles the pip install mechanism.
+class build(_build):  # pylint: disable=invalid-name
+  sub_commands = _build.sub_commands + [("CustomCommands", None)]
+
+CUSTOM_COMMANDS = [
+	["libdir=`ls -1 build|grep \"lib\"`; cd build/$libdir/silhouetteRank/ && Rscript --version"]]
+
+class CustomCommands(setuptools.Command):
+  """A setuptools Command class able to run arbitrary commands."""
+
+  def initialize_options(self):
+    pass
+
+  def finalize_options(self):
+    pass
+
+  def RunCustomCommand(self, command_list):
+    print("Running command: %s" % command_list)
+    p = subprocess.Popen(
+        command_list,
+        stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
+    # Can use communicate(input='y\n'.encode()) if the command run requires
+    # some confirmation.
+    stdout_data, _ = p.communicate()
+    print("Command output: %s" % stdout_data)
+    if p.returncode != 0:
+      raise RuntimeError(
+          "Command %s failed: exit code: %s" % (command_list, p.returncode))
+
+  def run(self):
+    for command in CUSTOM_COMMANDS:
+      self.RunCustomCommand(command)
+
+
+
+setuptools.setup(
+	name="silhouetteRank",
+	version="1.0.5.10",
+	author="Qian Zhu",
+	author_email="[email protected]",
+	description="silhouetteRank is a tool for finding spatially variable genes based on computing silhouette coefficient from binarized spatial gene expression data",
+	long_description="",
+	long_description_content_type="text/markdown",
+	url="https://bitbucket.org/qzhu/silhouetteRank",
+	packages=setuptools.find_packages(),
+	entry_points = {
+		"console_scripts": [
+			"silhouette_rank_one = silhouetteRank.silhouette_rank_one:main",
+			"silhouette_rank_main = silhouetteRank.evaluate_2b:main",
+			"silhouette_rank_random = silhouetteRank.evaluate_exact_one_2b:main",
+		]
+	},
+	classifiers=(
+		"Programming Language :: Python :: 3",
+		"License :: OSI Approved :: MIT License",
+		"Operating System :: OS Independent",
+	),
+	python_requires=">=3.5",
+	package_data={"silhouetteRank":  ["do_gpd.R", "do_kmeans.R",
+		"qval.R"]},
+	install_requires=[
+		"scipy", "numpy", "pandas", "seaborn", "scikit-learn", "matplotlib"],
+	cmdclass={
+		"build": build,
+		"CustomCommands": CustomCommands,
+		}	
+)
+
diff --git a/silhouetteRank/__init__.py b/silhouetteRank/__init__.py
@@ -0,0 +1 @@
+name = "silhouetteRank"
diff --git a/silhouetteRank/combine.py b/silhouetteRank/combine.py
@@ -0,0 +1,72 @@
+import math
+import sys
+import os
+import re
+import scipy
+import scipy.stats
+import numpy as np
+from operator import itemgetter
+import silhouetteRank
+
+def read(n):
+	f = open(n)
+	by_gene = {}
+	for l in f:
+		l = l.rstrip("\n")
+		ll = l.split()
+		gene = ll[0]
+		pval = float(ll[-2])
+		by_gene[gene] = pval
+	f.close()
+	return by_gene
+
+def do_one(args):
+	by_gene = {}
+	for examine_top in args.examine_tops:
+		for rbp in args.rbp_ps:	
+			fname = "%s/silhouette.sim.exact.rbp.%.2f.top.%.3f.pval.txt" % (args.input, rbp, examine_top)	
+			if args.matrix_type=="dissim":
+				fname = "%s/silhouette.exact.rbp.%.2f.top.%.3f.pval.txt" % (args.input, rbp, examine_top)	
+			by_gene[(examine_top, rbp)] = read(fname)
+	all_genes = list(by_gene[(args.examine_tops[0], args.rbp_ps[0])].keys())
+	score = {}
+	pval = {}
+	for g in all_genes:
+		score[g] = 0
+		tot_test = 0
+		for i in args.examine_tops:
+			for j in args.rbp_ps:
+				score[g] += math.log(by_gene[(i, j)][g])
+				tot_test+=1
+		score[g] *= -2.0
+		pval[g] = np.exp(scipy.stats.chi2.logsf(score[g], tot_test*2))
+
+	score_it = list(score.items())
+	score_it.sort(key=itemgetter(1), reverse=True)
+	fw = open("/tmp/1.pval", "w")
+	for i,j in score_it:
+		fw.write(str(pval[i]) + "\n")
+	fw.close()
+
+	os.system("Rscript %s/qval.R /tmp/1.pval /tmp/1.qval" % os.path.dirname(silhouetteRank.__file__))
+	f = open("/tmp/1.qval")
+	q_score = []
+	for l in f:
+		l = l.rstrip("\n")
+		q_score.append(float(l))
+	f.close()
+
+	fw = open(args.output, "w")
+	for (i,j),k in zip(score_it, q_score):
+		fw.write("%s %s %s %s\n" % (str(i), str(j), str(pval[i]), str(k)))
+	fw.close()
+
+if __name__=="__main__":
+	parser = argparse.ArgumentParser(description="combine.py: combine spatial scores across parameters", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+	parser.add_argument("-r", "--rbp-ps", dest="rbp_ps", nargs="+", type=float, default=[0.95, 0.99], help="p parameter of RBP")
+	parser.add_argument("-e", "--examine-tops", dest="examine_tops", nargs="+", type=float, default=[0.005, 0.010, 0.050, 0.100, 0.300], help="top proportion of cells per gene to be 1's (expressed)")
+	parser.add_argument("-m", "--matrix-type", dest="matrix_type", type=str, choices=["sim", "dissim"], help="whether to calculate similarity matrix or dissimilarity matrix", default="dissim")
+	parser.add_argument("-i", "--input-dir", dest="input", type=str, default=".", help="input directory containing individual spatial score rankings (to be aggregated)")
+	parser.add_argument("-o", "--output", dest="output", type=str, required=True, help="output file name")
+	args = parser.parse_args()
+	do_one(args)
diff --git a/silhouetteRank/do_gpd.R b/silhouetteRank/do_gpd.R
@@ -0,0 +1,6 @@
+library(eva)
+f_name<- commandArgs(trailingOnly = T)[1]
+x<-t(t(read.table(paste0(f_name), header=F)))
+y<-gpdFit(x, nextremes=250, method="mle")
+write.table(y$par.ests, file=paste0("par.", f_name), sep="\t", col.names=F)
+
diff --git a/silhouetteRank/do_kmeans.R b/silhouetteRank/do_kmeans.R
@@ -0,0 +1,20 @@
+freq_file<-commandArgs(trailingOnly=T)[1]
+par_seed <-commandArgs(trailingOnly=T)[2]
+par_k <-commandArgs(trailingOnly=T)[3]
+nstart<-commandArgs(trailingOnly=T)[4]
+centroid_file<-commandArgs(trailingOnly=T)[5]
+kmeans_file<-commandArgs(trailingOnly=T)[6]
+
+par_k<-as.integer(par_k)
+par_seed<-as.integer(par_seed)
+nstart<-as.integer(nstart)
+
+if(par_seed!=-1 & par_seed>0){
+set.seed(par_seed)
+}
+
+xx<-read.table(freq_file, sep=" ", header=F)
+y<-c(); for(i in seq(1, dim(xx)[1])){y<-append(y, rep(xx[i,2], xx[i,1]))}
+kk<-kmeans(y, par_k, nstart=nstart, iter.max=300)
+write.table(kk$cluster, file=kmeans_file, sep=" ", quote=F, col.names=F, row.names=T)
+write.table(kk$centers, file=centroid_file, sep=" ", quote=F, col.names=F, row.names=T)