Removed gemmeAnal.py. Not needed anymore.

fcd3b21b · Mustafa Tekpinar · 8f6e1b64 · 8f6e1b64 · fcd3b21b
Commit fcd3b21b authored Feb 21, 2023 by Mustafa Tekpinar
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 275 deletions

gemmeAnal.py gemmeAnal.py +0 -273

sgemme.py sgemme.py +4 -2

No files found.
--- a/gemmeAnal.py
+++ b/gemmeAnal.py
-# -*- coding: utf-8 -*-
-
-# Copyright (c) 2018-2022: Elodie Laine - Mustafa Tekpinar
-# This code is part of the gemme package and governed by its license.
-# Please see the LICENSE.txt file included as part of this package.
-
-import sys
-import os
-import argparse
-import re
-import math
-import subprocess
-import shutil
-import glob
-import pandas as pd
-import numpy as np
-
-
-# Moved to sgemme.py
-# def extractQuerySeq(filename):
-# 	"""
-# 		# Extract the query sequence from the input alignment
-# 	"""
-
-# 	fIN = open(filename,"r")
-# 	lines = fIN.readlines()
-# 	fIN.close()
-
-# 	if lines[0][0]!=">":
-# 		raise Exception('bad FASTA format')
-# 	else:
-# 		prot = re.compile("[^A-Z0-9a-z]").split(lines[0][1:])[0]
-# 		fOUT = open(prot+".fasta","w")
-# 		fOUT.write(">"+prot+"\n")
-# 		seq=""
-# 		i = 1
-# 		while lines[i][0]!=">":
-# 			seq = seq + lines[i].strip().strip(".").strip("-")
-# 			fOUT.write(lines[i])
-# 			i = i + 1
-# 	fOUT.close()
-# 	return prot,seq,i-1
-
-
-# def getNbSeq(filename):
-# 	#TODO: Remove bash dependency and count the sequences within Python!
-# 	"""
-# 		# Get the number of sequences in a multi-fasta file
-		
-# 	"""
-# 	if filename!='':
-# 		proc=subprocess.Popen("grep -c '^>' "+filename,stdout=subprocess.PIPE,shell=True)
-# 		return int(proc.stdout.read())
-# 	else:
-# 		return 0
-
-# def createPDB(prot,seq):
-# 	"""
-# 		If there is not a real PDB file for a given sequence,
-# 		create a fake PDB containing only dummy CA atoms.  
-# 	"""
-# 	d = {'C': 'CYS', 'D': 'ASP', 'S': 'SER',  'Q': 'GLN', 'K': 'LYS',
-# 	'I': 'ILE', 'P': 'PRO', 'T': 'THR', 'F': 'PHE', 'N': 'ASN', 
-# 	'G': 'GLY',  'H': 'HIS', 'L': 'LEU', 'R': 'ARG', 'W': 'TRP', 
-# 	'A': 'ALA', 'V': 'VAL', 'E': 'GLU', 'Y': 'TYR', 'M': 'MET'}
-
-# 	fOUT = open(prot+'.pdb','w')
-# 	i = 1
-# 	for let in seq:
-# 		if(let != '-'):
-# 			fOUT.write('ATOM%7d  CA  %s A%4d      43.524  70.381  46.465  1.00   0.0\n'%(i,d[let.upper()],i))
-# 			i += 1
-		
-# 	fOUT.close()
-
-# def editConfJET(N):
-# 	"""
-# 		# Edit JET configuration file with correct number of Seqs & MSA
-# 	"""
-# 	reCode=subprocess.call("sed -i 's/results\t\t5000/results\t\t"+str(N)+"/' default.conf",shell=True)
-# 	return(reCode)
-
-# def minMaxNormalization(data):
-#     """
-#         Min-max normalization of a data array.
-#     """
-#     return (data - np.min(data)) / (np.max(data) - np.min(data))
-
-# # Run JET to compute TJET values
-# def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
-# 	"""
-# 		Call JET2 and produce prot+"_jet.res" file.
-
-# 		prot+"_jet.res" will be used in the following steps (in launchPred)
-# 		to calculate independent and epistatic models.
-# 		Ideally, this call to JET2 should be from Dockers or Singularity 
-# 		because installing all requirements of JET2 is a pain in the ass!
-
-# 	Parameters
-#     ----------
-#     prot: string ???
-#         Name of the protein ???
-#     retMet: string
-# 		Retreival method of multiple sequence alignments file
-# 		It can be 'input', 'local' or 'server'. Default is local.
-# 	bFile: string 
-#         A multiple sequence alignment file obtained with psiblast.
-# 		It is used only if the retMet (explained above) is input.
-# 	fFile: string 
-#         A multiple sequence alignment file obtained with psiblast.
-# 		It is used only if the retMet (explained above) is input.
-# 	pdbfile: string 
-#         a Protein Data Bank file obtained with rcsb.org or any 
-# 		computational method like alphafold.
-# 		If it is None, only JET and PC scores are calculated.
-# 		Otherwise, CV and other structural-dynamical features also
-# 		can be calculated.
-# 	chains: list
-# 		A list of chains available in the pdb file.
-# 		Most of the time, it is supposed to be just one! 
-# 	n: int
-# 		Number of JET2 iterations.
-# 	N: int
-# 		Default 40000
-# 	nl: int
-# 		Number of lines after > character in the query sequences file.
-# 		It is obtained in extractQuerySeq() function.
-
-#     Returns
-#     -------
-# 	Nothing 
-# 	"""
-
-# 	chainID = chains[0]
-	
-# 	#TODO: Remove Bash dependency here. Make the copying process in Python
-# 	subprocess.call("cp $SGEMME_PATH/default.conf .",shell=True)
-
-# 	if retMet=="input":
-# 		if bFile!='':
-# 			#TODO: I think these two lines must be here as well but I am not sure. 
-# 			# print(N)
-# 			# editConfJET(N)
-
-# 			if(bFile == prot+"_"+chainID+".psiblast"):
-# 				shutil.copy2(bFile, bFile+".orig")
-# 				shutil.copy2(bFile+".orig ", prot+"_"+chainID+".psiblast")
-# 			else:
-# 				shutil.copy2(bFile+" ", prot+"_"+chainID+".psiblast")
-# 			if(pdbfile == None):
-# 				jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
-# 						prot+".pdb -o `pwd` -p J -r input -b "+prot+"_"+chainID+".psiblast -d chain -n "+n+" > "+prot+".out"
-# 			else:
-# 				# jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
-# 				#  		prot+".pdb -o `pwd` -p AVJ   -r input -b "+prot+"_"+chainID+".psiblast -d chain -n "+n+" > "+prot+".out"
-# 				jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
-# 					    prot+".pdb -o `pwd` -p AVJCG -r input -f "+prot+"_"+chainID+".psiblast -d chain -n "+n+" -a 5"+" > "+prot+".out"
-# 						#One can also add: -g 'trace,pc,cv,clusters,axs' 
-
-# 		else:
-# 			print(N)
-# 			editConfJET(N)
-
-# 			if(fFile == prot+"_"+chainID+".fasta"):
-# 				shutil.copy2(fFile, fFile+".orig")
-# 				#I think this subprocess call causes overwriting of the fasta file. 
-# 				#subprocess.call("cp "+fFile+" "+prot+"_"+chainID+".fasta",shell=True)
-# 				grpcmd="grep -m "+str(int(N)+1)+" -A "+str(nl)+" '^>' "+fFile+".orig > "+prot+"_"+chainID+".fasta"
-# 			else:
-# 				#subprocess.call("cp "+fFile+" "+prot+"_"+chainID+".fasta",shell=True)
-# 				grpcmd="grep -m "+str(int(N)+1)+" -A "+str(nl)+" '^>' "+fFile+" > "+prot+"_"+chainID+".fasta"
-			
-# 			print("\nRunning:\n"+grpcmd)
-# 			subprocess.call(grpcmd,shell=True)
-# 			if(pdbfile == None):
-# 				jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
-# 				     	prot+".pdb -o `pwd` -p J -r input -f "+prot+"_"+chainID+".fasta -d chain -n "+n+" > "+prot+".out"
-# 				print("\nRunning command:\n"+jetcmd)
-# 				reCode=subprocess.call(jetcmd,shell=True)
-# 				if os.path.isfile(prot+"/"+prot+"_jet.res"):
-# 					os.rename(prot+"/"+prot+"_jet.res",prot+"_jet.res")
-# 			else:
-# 				# Calculate SC1
-# 				jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
-# 					    prot+".pdb -o `pwd` -p AVJCG -r input -f "+prot+"_"+chainID+".fasta -d chain -n "+n+" -a 3"+" > "+prot+".out"
-# 						#One can also add: -g 'trace,pc,cv,clusters,axs' 
-				
-# 				print("\nRunning for SC1:\n"+jetcmd)
-# 				reCode=subprocess.call(jetcmd,shell=True)
-# 				if os.path.isfile(prot+"/"+prot+"_jet.res"):
-# 					os.rename(prot+"/"+prot+"_jet.res",prot+"_jet.res")
-				
-# 				dir_name = prot+"/"
-# 				if os.path.isdir(dir_name):
-# 					for f in os.listdir(dir_name): 
-# 						f_path = os.path.join(dir_name, f)
-# 						if os.path.isfile(f_path):
-# 							os.remove(f_path)
-# 					os.rmdir(dir_name)
-
-
-# 	else:
-# 		if(pdbfile == None):
-# 			jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
-# 			    	prot+".pdb -o `pwd` -p J -r "+retMet+" -d chain -n "+n+" > "+prot+".out"
-# 		else:
-# 			# jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
-# 			#     	prot+".pdb -o `pwd` -p AVJ   -r "+retMet+" -d chain -n "+n+" > "+prot+".out"
-# 			jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
-# 					prot+".pdb -o `pwd` -p AVJCG -r "+retMet+" -d chain -n "+n+" -a 5"+" > "+prot+".out"
-		
-# 		reCode=subprocess.call(jetcmd,shell=True)
-# 		if os.path.isfile(prot+"/"+prot+"_jet.res"):
-# 			os.rename(prot+"/"+prot+"_jet.res",prot+"_jet.res")
-		
-# 		dir_name = prot+"/"
-# 		if os.path.isdir(dir_name):
-# 			for f in os.listdir(dir_name): 
-# 				f_path = os.path.join(dir_name, f)
-# 				if os.path.isfile(f_path):
-# 					os.remove(f_path)
-# 			os.rmdir(dir_name)
-# 	# print("\nRunning:\n"+jetcmd)
-# 	# reCode=subprocess.call(jetcmd,shell=True)
-# 	# if os.path.isfile(prot+"/"+prot+"_jet.res"):
-# 	# 	os.rename(prot+"/"+prot+"_jet.res",prot+"_jet.res")
-# 	return(reCode)
-
-# # Run Rscript to compute predictions
-# def launchPred(prot,inAli,mutFile, normWeightMode, alphabet):
-
-# 	if mutFile!='':
-# 		rcmd="Rscript --save $SGEMME_PATH/computePred.R "+prot+" "+inAli+" FALSE "+mutFile+" "+normWeightMode+" "+alphabet
-# 	else:
-# 		rcmd="Rscript --save $SGEMME_PATH/computePred.R "+prot+" "+inAli+" TRUE none "+normWeightMode+" "+alphabet
-
-# 	print("\nRunning: \n"+rcmd)
-# 	reCode=subprocess.call(rcmd,shell=True)
-
-# 	#Add plots here with gemmemore
-# 	return(reCode)
-
-# # Remove temporary files
-# def cleanTheMess(prot,bFile,fFile, chainID):
-
-# 	if bFile!='':
-# 		if bFile!=prot+"_"+chainID+".psiblast":
-# 			os.remove(prot+"_"+chainID+".psiblast")
-# 	else:
-# 		if os.path.isfile(prot+"/"+prot+"_"+chainID+".psiblast"):
-# 			os.rename(prot+"/"+prot+"_"+chainID+".psiblast",prot+"_"+chainID+".psiblast")
-# 	if fFile!='':
-# 		if fFile!=prot+"_"+chainID+".fasta":
-# 			if os.path.isfile(prot+"_"+chainID+".fasta"):
-# 				os.remove(prot+"_"+chainID+".fasta")		
-# #	if os.path.isfile(prot+"/"+prot+"_jet.res"):
-# #		os.rename(prot+"/"+prot+"_jet.res",prot+"_jet.res")
-# #	os.remove(prot+".pdb")
-	
-# 	# Get all files with suffix nwk
-# 	treefiles = glob.glob('*.nwk')
-
-# 	# Iterate over the list of files and remove individually
-# 	for file in treefiles:
-# 		os.remove(file)
-
-# 	dir_name = prot+"/"
-# 	if os.path.isdir(dir_name):
-# 		for f in os.listdir(dir_name): 
-# 			f_path = os.path.join(dir_name, f)
-# 			if os.path.isfile(f_path):
-# 				os.remove(f_path)
-# 		os.rmdir(dir_name)
--- a/sgemme.py
+++ b/sgemme.py
@@ -14,15 +14,17 @@ import subprocess
 import math
 import numpy as np
 import matplotlib.pylab as plt
+import shutil
+import glob

 from prody import *
 from scipy.stats import rankdata

-from gemmeAnal import *
+#from gemmeAnal import *

 import pandas as pd

-#############The following part between # signs are moved from gemmeAnal.py to 
+#############The following part between # signs is moved from gemmeAnal.py to 
 # make the code more simple.  
 def extractQuerySeq(filename):
 	"""