Skip to content

Commit

Permalink
Initial
Browse files Browse the repository at this point in the history
  • Loading branch information
bernard2012 committed Sep 6, 2020
1 parent be7e70d commit 998c189
Show file tree
Hide file tree
Showing 19 changed files with 2,052 additions and 1 deletion.
138 changes: 138 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/
15 changes: 15 additions & 0 deletions PKG-INFO
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Metadata-Version: 2.1
Name: silhouetteRank
Version: 1.0.0
Summary: silhouetteRank is a tool for finding spatially variable genes based on computing silhouette coefficient from binarized spatial gene expression data
Home-page: https://bitbucket.org/qzhu/silhouetteRank
Author: Qian Zhu
Author-email: [email protected]
License: UNKNOWN
Description: silhouetteRank is a tool for finding spatially variable genes based on computing silhouette coefficient from binarized spatial gene expression data
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Requires-Python: >=3.5
Description-Content-Type: text/markdown
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
# silhouetteRank
This toolkit contains silhouetteRank, a flexible method for finding spatially variable genes. It computes a score based on silhouette coefficient of binarized gene expression data. It allows users to specify multiple running widths and integrate them in a Fisher's test.

silhouetteRank is written in Python 3.
4 changes: 4 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[egg_info]
tag_build =
tag_date = 0

75 changes: 75 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import subprocess
from distutils.command.build import build as _build
import setuptools

with open("README.md", "r") as fh:
long_description = fh.read()

# This class handles the pip install mechanism.
class build(_build): # pylint: disable=invalid-name
sub_commands = _build.sub_commands + [("CustomCommands", None)]

CUSTOM_COMMANDS = [
["libdir=`ls -1 build|grep \"lib\"`; cd build/$libdir/silhouetteRank/ && Rscript --version"]]

class CustomCommands(setuptools.Command):
"""A setuptools Command class able to run arbitrary commands."""

def initialize_options(self):
pass

def finalize_options(self):
pass

def RunCustomCommand(self, command_list):
print("Running command: %s" % command_list)
p = subprocess.Popen(
command_list,
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
# Can use communicate(input='y\n'.encode()) if the command run requires
# some confirmation.
stdout_data, _ = p.communicate()
print("Command output: %s" % stdout_data)
if p.returncode != 0:
raise RuntimeError(
"Command %s failed: exit code: %s" % (command_list, p.returncode))

def run(self):
for command in CUSTOM_COMMANDS:
self.RunCustomCommand(command)



setuptools.setup(
name="silhouetteRank",
version="1.0.5.10",
author="Qian Zhu",
author_email="[email protected]",
description="silhouetteRank is a tool for finding spatially variable genes based on computing silhouette coefficient from binarized spatial gene expression data",
long_description="",
long_description_content_type="text/markdown",
url="https://bitbucket.org/qzhu/silhouetteRank",
packages=setuptools.find_packages(),
entry_points = {
"console_scripts": [
"silhouette_rank_one = silhouetteRank.silhouette_rank_one:main",
"silhouette_rank_main = silhouetteRank.evaluate_2b:main",
"silhouette_rank_random = silhouetteRank.evaluate_exact_one_2b:main",
]
},
classifiers=(
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
),
python_requires=">=3.5",
package_data={"silhouetteRank": ["do_gpd.R", "do_kmeans.R",
"qval.R"]},
install_requires=[
"scipy", "numpy", "pandas", "seaborn", "scikit-learn", "matplotlib"],
cmdclass={
"build": build,
"CustomCommands": CustomCommands,
}
)

1 change: 1 addition & 0 deletions silhouetteRank/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
name = "silhouetteRank"
72 changes: 72 additions & 0 deletions silhouetteRank/combine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import math
import sys
import os
import re
import scipy
import scipy.stats
import numpy as np
from operator import itemgetter
import silhouetteRank

def read(n):
f = open(n)
by_gene = {}
for l in f:
l = l.rstrip("\n")
ll = l.split()
gene = ll[0]
pval = float(ll[-2])
by_gene[gene] = pval
f.close()
return by_gene

def do_one(args):
by_gene = {}
for examine_top in args.examine_tops:
for rbp in args.rbp_ps:
fname = "%s/silhouette.sim.exact.rbp.%.2f.top.%.3f.pval.txt" % (args.input, rbp, examine_top)
if args.matrix_type=="dissim":
fname = "%s/silhouette.exact.rbp.%.2f.top.%.3f.pval.txt" % (args.input, rbp, examine_top)
by_gene[(examine_top, rbp)] = read(fname)
all_genes = list(by_gene[(args.examine_tops[0], args.rbp_ps[0])].keys())
score = {}
pval = {}
for g in all_genes:
score[g] = 0
tot_test = 0
for i in args.examine_tops:
for j in args.rbp_ps:
score[g] += math.log(by_gene[(i, j)][g])
tot_test+=1
score[g] *= -2.0
pval[g] = np.exp(scipy.stats.chi2.logsf(score[g], tot_test*2))

score_it = list(score.items())
score_it.sort(key=itemgetter(1), reverse=True)
fw = open("/tmp/1.pval", "w")
for i,j in score_it:
fw.write(str(pval[i]) + "\n")
fw.close()

os.system("Rscript %s/qval.R /tmp/1.pval /tmp/1.qval" % os.path.dirname(silhouetteRank.__file__))
f = open("/tmp/1.qval")
q_score = []
for l in f:
l = l.rstrip("\n")
q_score.append(float(l))
f.close()

fw = open(args.output, "w")
for (i,j),k in zip(score_it, q_score):
fw.write("%s %s %s %s\n" % (str(i), str(j), str(pval[i]), str(k)))
fw.close()

if __name__=="__main__":
parser = argparse.ArgumentParser(description="combine.py: combine spatial scores across parameters", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("-r", "--rbp-ps", dest="rbp_ps", nargs="+", type=float, default=[0.95, 0.99], help="p parameter of RBP")
parser.add_argument("-e", "--examine-tops", dest="examine_tops", nargs="+", type=float, default=[0.005, 0.010, 0.050, 0.100, 0.300], help="top proportion of cells per gene to be 1's (expressed)")
parser.add_argument("-m", "--matrix-type", dest="matrix_type", type=str, choices=["sim", "dissim"], help="whether to calculate similarity matrix or dissimilarity matrix", default="dissim")
parser.add_argument("-i", "--input-dir", dest="input", type=str, default=".", help="input directory containing individual spatial score rankings (to be aggregated)")
parser.add_argument("-o", "--output", dest="output", type=str, required=True, help="output file name")
args = parser.parse_args()
do_one(args)
6 changes: 6 additions & 0 deletions silhouetteRank/do_gpd.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
library(eva)
f_name<- commandArgs(trailingOnly = T)[1]
x<-t(t(read.table(paste0(f_name), header=F)))
y<-gpdFit(x, nextremes=250, method="mle")
write.table(y$par.ests, file=paste0("par.", f_name), sep="\t", col.names=F)

20 changes: 20 additions & 0 deletions silhouetteRank/do_kmeans.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
freq_file<-commandArgs(trailingOnly=T)[1]
par_seed <-commandArgs(trailingOnly=T)[2]
par_k <-commandArgs(trailingOnly=T)[3]
nstart<-commandArgs(trailingOnly=T)[4]
centroid_file<-commandArgs(trailingOnly=T)[5]
kmeans_file<-commandArgs(trailingOnly=T)[6]

par_k<-as.integer(par_k)
par_seed<-as.integer(par_seed)
nstart<-as.integer(nstart)

if(par_seed!=-1 & par_seed>0){
set.seed(par_seed)
}

xx<-read.table(freq_file, sep=" ", header=F)
y<-c(); for(i in seq(1, dim(xx)[1])){y<-append(y, rep(xx[i,2], xx[i,1]))}
kk<-kmeans(y, par_k, nstart=nstart, iter.max=300)
write.table(kk$cluster, file=kmeans_file, sep=" ", quote=F, col.names=F, row.names=T)
write.table(kk$centers, file=centroid_file, sep=" ", quote=F, col.names=F, row.names=T)
Loading

0 comments on commit 998c189

Please sign in to comment.