Commit f61b87d1 by Mustafa Tekpinar

Corrected a bug of overwriting on the fasta file

parent 2d1df3ad
...@@ -42,6 +42,13 @@ write.table(res[[3]][[3]],paste0(prot,"_pssm80.txt")) ...@@ -42,6 +42,13 @@ write.table(res[[3]][[3]],paste0(prot,"_pssm80.txt"))
# read evolutionary traces computed by JET # read evolutionary traces computed by JET
jet=read.table(paste(prot,"_jet.res",sep=""),head=TRUE) jet=read.table(paste(prot,"_jet.res",sep=""),head=TRUE)
if(sum(colnames(jet)=="traceMax")==1){trace=jet[,"traceMax"]}else{trace=jet[,"trace"]} if(sum(colnames(jet)=="traceMax")==1){trace=jet[,"traceMax"]}else{trace=jet[,"trace"]}
#You should comment line 44 to use this functionality. Or maybe, it should go into normalize functions
#That was what we originally decided with Alessandra.
#To get max values of PC, CV or Trace
#trace = c()
#for (row in 1:nrow(jet)) { trace<-append(trace, max(jet[row, "trace"], jet[row, "pc"])) }
#traceAli = sweep(binAli, MARGIN=2, trace, `*`) #traceAli = sweep(binAli, MARGIN=2, trace, `*`)
# compute evolutionary distances of all sequences with respect to the query # compute evolutionary distances of all sequences with respect to the query
distTrace = binAli[2:N[1],] %*% trace^2 distTrace = binAli[2:N[1],] %*% trace^2
......
...@@ -20,6 +20,7 @@ url http://www.rcsb.org/pdb/downloadFile.do URL of PDB server ...@@ -20,6 +20,7 @@ url http://www.rcsb.org/pdb/downloadFile.do URL of PDB server
>Filter >Filter
min_identity 0.20 min sequence identity min_identity 0.20 min sequence identity
max_identity 0.98 max sequence identity max_identity 0.98 max sequence identity
***************************************** *****************************************
>Sample >Sample
length_cutoff 0.8 minimum sequence length expressed in number of residues length_cutoff 0.8 minimum sequence length expressed in number of residues
...@@ -27,13 +28,14 @@ length_cutoff 0.8 minimum sequence length expressed in number of residues ...@@ -27,13 +28,14 @@ length_cutoff 0.8 minimum sequence length expressed in number of residues
***************************************** *****************************************
>Software >Software
clustalW /usr/local/bin/clustalw2 clustalW system dependent command clustalW /usr/local/bin/clustalw2 clustalW system dependent command
muscle /usr/bin/muscle muscle system dependent command muscle /usr/bin/muscle muscle system dependent command
naccess /opt/JET2/naccess2.1.1/naccess naccess system dependent command naccess /home/tekpinar/research/carbone-lab-software/naccess2.1.1/naccess naccess system dependent command
psiblast /opt/blast-2.2.27+/bin/psiblast psiblast system dependent command psiblast /usr/bin/psiblast psiblast system dependent command
***************************************** *****************************************
>Data >Data
substMatrix /opt/JET2/matrix directory location of matrices used in JET (Blosum62, gonnet and hsdm) substMatrix /home/tekpinar/research/carbone-lab-software/JET2/matrix directory location of matrices used in JET (Blosum62, gonnet and hsdm)
blastDatabases /disk1/blastdb/ directory location of databases used for local blast (nr{0-7}) blastDatabases /opt/blastdb directory location of databases used for local blast (nr{0-7})
***************************************** *****************************************
>ET >ET
...@@ -56,9 +58,9 @@ max_dist 20.0 max distance ...@@ -56,9 +58,9 @@ max_dist 20.0 max distance
>Interface >Interface
cutoff 0 minimum percentage accessible surface variation of an interface residu cutoff 0 minimum percentage accessible surface variation of an interface residu
ligand no (yes|no) keep contact of ligand (SUBSTRATE, PRODUCT and COFACTOR of database ENZYME) to compute interface of protein ligand no (yes|no) keep contact of ligand (SUBSTRATE, PRODUCT and COFACTOR of database ENZYME) to compute interface of protein
enzymeCpd /opt/JET/jet/data/enzyme.txt location of file containing database ENZYME enzymeCpd /home/tekpinar/research/carbone-lab-software/JET2/jet/data/enzyme.txt location of file containing database ENZYME
homologousPDB no (yes|no) add interface residues of homologous structures (find in pdb database clustered at 95% of identities) to interface of protein homologousPDB no (yes|no) add interface residues of homologous structures (find in pdb database clustered at 95% of identities) to interface of protein
clusteredPDB /opt/JET/jet/data/clusters95.txt location of pdb database clustered at 95% of identities clusteredPDB /home/tekpinar/research/carbone-lab-software/JET2/jet/data/clusters95.txt location of pdb database clustered at 95% of identities
***************************************** *****************************************
>Cluster >Cluster
......
...@@ -132,9 +132,15 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N): ...@@ -132,9 +132,15 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N):
doit(args.input,args.mutations,args.retrievingMethod,args.blastFile,args.fastaFile) doit(args.input,args.mutations,args.retrievingMethod,args.blastFile,args.fastaFile)
""" """
prot,seq,nl=extractQuerySeq(inAli) prot,seq,nl=extractQuerySeq(inAli)
createPDB(prot,seq) createPDB(prot,seq)
print "query protein: "+prot print "query protein: "+prot
print "computing conservation levels..." print "computing conservation levels..."
#I intend to run JET2 completely externally!!
#It is too much buggy and it has too many dependencies.
#Using it with a Docker or Singularity may be the best solution!
launchJET(prot,retMet,bFile,fFile,n,N,nl) launchJET(prot,retMet,bFile,fFile,n,N,nl)
print "done" print "done"
launchPred(prot,inAli,mutFile) launchPred(prot,inAli,mutFile)
......
...@@ -10,6 +10,7 @@ import argparse ...@@ -10,6 +10,7 @@ import argparse
import re import re
import math import math
import subprocess import subprocess
import shutil
# Extract the query sequence from the input alignment # Extract the query sequence from the input alignment
...@@ -67,21 +68,64 @@ def editConfJET(N): ...@@ -67,21 +68,64 @@ def editConfJET(N):
# Run JET to compute TJET values # Run JET to compute TJET values
def launchJET(prot,retMet,bFile,fFile,n,N,nl): def launchJET(prot,retMet,bFile,fFile,n,N,nl):
"""
Call JET2 and produce prot+"_jet.res" file.
prot+"_jet.res" will be used in the following steps (in launchPred)
to calculate independent and epistatic models.
Ideally, this call to JET2 should be from Dockers or Singularity
because installing all requirements of JET2 is a pain in the ass!
Parameters
----------
prot: string ???
Name of the protein ???
retMet: string
Retreival method of multiple sequence alignments file
It can be 'input', 'local' or 'server'. Default is local.
bFile: string
A multiple sequence alignment file obtained with psiblast.
It is used only if the retMet (explained above) is input.
fFile: string
A multiple sequence alignment file obtained with psiblast.
It is used only if the retMet (explained above) is input.
n: int
Number of JET2 iterations.
N: int
Default 20000
nl: int
Number of lines after > character in the query sequences file.
It is obtained in extractQuerySeq() function.
Returns
-------
Nothing
"""
subprocess.call("cp $GEMME_PATH/default.conf .",shell=True) subprocess.call("cp $GEMME_PATH/default.conf .",shell=True)
if retMet=="input": if retMet=="input":
if bFile!='': if bFile!='':
subprocess.call("cp "+bFile+" "+prot+"_A.psiblast",shell=True) subprocess.call("cp "+bFile+" "+prot+"_A.psiblast",shell=True)
jetcmd = "java -Xmx1000m -cp $JET_PATH:$JET_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+prot+".pdb -o `pwd` -p J -r input -b "+prot+"_A.psiblast -d chain -n "+n+" > "+prot+".out" jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+prot+".pdb -o `pwd` -p J -r input -b "+prot+"_A.psiblast -d chain -n "+n+" > "+prot+".out"
else: else:
print N print N
editConfJET(N) editConfJET(N)
#subprocess.call("cp "+fFile+" "+prot+"_A.fasta",shell=True)
subprocess.call("grep -m "+str(int(N)+1)+" -A "+str(nl)+" '^>' "+fFile+" > "+prot+"_A.fasta",shell=True) if(fFile == prot+"_A.fasta"):
jetcmd = "java -Xmx1000m -cp $JET_PATH:$JET_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+prot+".pdb -o `pwd` -p J -r input -f "+prot+"_A.fasta -d chain -n "+n+" > "+prot+".out" shutil.copy2(fFile, fFile+".orig")
#I think this subprocess call causes overwriting of the fasta file.
#subprocess.call("cp "+fFile+" "+prot+"_A.fasta",shell=True)
grpcmd="grep -m "+str(int(N)+1)+" -A "+str(nl)+" '^>' "+fFile+".orig > "+prot+"_A.fasta"
else:
#subprocess.call("cp "+fFile+" "+prot+"_A.fasta",shell=True)
grpcmd="grep -m "+str(int(N)+1)+" -A "+str(nl)+" '^>' "+fFile+" > "+prot+"_A.fasta"
print("\nRunning:\n"+grpcmd)
subprocess.call(grpcmd,shell=True)
jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+prot+".pdb -o `pwd` -p J -r input -f "+prot+"_A.fasta -d chain -n "+n+" > "+prot+".out"
else: else:
jetcmd = "java -Xmx1000m -cp $JET_PATH:$JET_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+prot+".pdb -o `pwd` -p J -r "+retMet+" -d chain -n "+n+" > "+prot+".out" jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+prot+".pdb -o `pwd` -p J -r "+retMet+" -d chain -n "+n+" > "+prot+".out"
print jetcmd print("\nRunning:\n"+jetcmd)
reCode=subprocess.call(jetcmd,shell=True) reCode=subprocess.call(jetcmd,shell=True)
if os.path.isfile(prot+"/"+prot+"_jet.res"): if os.path.isfile(prot+"/"+prot+"_jet.res"):
os.rename(prot+"/"+prot+"_jet.res",prot+"_jet.res") os.rename(prot+"/"+prot+"_jet.res",prot+"_jet.res")
...@@ -94,8 +138,11 @@ def launchPred(prot,inAli,mutFile): ...@@ -94,8 +138,11 @@ def launchPred(prot,inAli,mutFile):
rcmd="Rscript --save $GEMME_PATH/computePred.R "+prot+" "+inAli+" FALSE "+mutFile rcmd="Rscript --save $GEMME_PATH/computePred.R "+prot+" "+inAli+" FALSE "+mutFile
else: else:
rcmd="Rscript --save $GEMME_PATH/computePred.R "+prot+" "+inAli+" TRUE none" rcmd="Rscript --save $GEMME_PATH/computePred.R "+prot+" "+inAli+" TRUE none"
print rcmd print rcmd
reCode=subprocess.call(rcmd,shell=True) reCode=subprocess.call(rcmd,shell=True)
#Add plots here with gemmemore
return(reCode) return(reCode)
# Remove temporary files # Remove temporary files
...@@ -113,7 +160,7 @@ def cleanTheMess(prot,bFile,fFile): ...@@ -113,7 +160,7 @@ def cleanTheMess(prot,bFile,fFile):
os.remove(prot+"_A.fasta") os.remove(prot+"_A.fasta")
# if os.path.isfile(prot+"/"+prot+"_jet.res"): # if os.path.isfile(prot+"/"+prot+"_jet.res"):
# os.rename(prot+"/"+prot+"_jet.res",prot+"_jet.res") # os.rename(prot+"/"+prot+"_jet.res",prot+"_jet.res")
os.remove(prot+".pdb") # os.remove(prot+".pdb")
dir_name = prot+"/" dir_name = prot+"/"
if os.path.isdir(dir_name): if os.path.isdir(dir_name):
for f in os.listdir(dir_name): for f in os.listdir(dir_name):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment