Commit e7a7207f by Mustafa Tekpinar

Changed normWeightMode options.

Convert trace->tjet, maxhalftracepchalftracecvhalfcvpc->max
and ssjetormax->sstjetormax in ESGEMME.
parent eb8c519b
...@@ -46,7 +46,7 @@ res = list(computePSSM(ali,N[1],npos),computePSSM(aliCons,N[2],npos),computePSSM ...@@ -46,7 +46,7 @@ res = list(computePSSM(ali,N[1],npos),computePSSM(aliCons,N[2],npos),computePSSM
jet=read.table(paste(prot,"_jet.res",sep=""),head=TRUE) jet=read.table(paste(prot,"_jet.res",sep=""),head=TRUE)
############################################Added by MT to test effect of PC, CV, DFI, Bfactor etc. ############################################Added by MT to test effect of PC, CV, DFI, Bfactor etc.
trace = c() trace = c()
if ((normWeightMode=="maxhalftracepchalftracecvhalfcvpc")){ if ((normWeightMode=="max")){
print(paste("Using ", normWeightMode)) print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){ if(sum(colnames(jet)=="traceMax")==1){
...@@ -55,8 +55,8 @@ if ((normWeightMode=="maxhalftracepchalftracecvhalfcvpc")){ ...@@ -55,8 +55,8 @@ if ((normWeightMode=="maxhalftracepchalftracecvhalfcvpc")){
trace<-append(trace, max((jet[row, "trace"]+jet[row, "pc"])/2.0, max((jet[row, "trace"]+jet[row, "cv"])/2.0, (jet[row, "pc"]+jet[row, "cv"])/2.0 ))) trace<-append(trace, max((jet[row, "trace"]+jet[row, "pc"])/2.0, max((jet[row, "trace"]+jet[row, "cv"])/2.0, (jet[row, "pc"]+jet[row, "cv"])/2.0 )))
} }
} }
}else if (normWeightMode=="trace"){ }else if (normWeightMode=="tjet"){
print("Using only JET2 traces") print("Using only T_JET2 traces")
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){ if(sum(colnames(jet)=="traceMax")==1){
trace<-append(trace, jet[row, "traceMax"]) trace<-append(trace, jet[row, "traceMax"])
...@@ -74,22 +74,21 @@ if ((normWeightMode=="maxhalftracepchalftracecvhalfcvpc")){ ...@@ -74,22 +74,21 @@ if ((normWeightMode=="maxhalftracepchalftracecvhalfcvpc")){
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
trace<-append(trace, jet[row, "pc"]) trace<-append(trace, jet[row, "pc"])
} }
}else if (normWeightMode=="ssjetormax"){ }else if (normWeightMode=="sstjetormax"){
print("Using only ssjetormax") print("Using only sstjetormax")
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="ssjetormax")==1){ if(sum(colnames(jet)=="sstjetormax")==1){
trace<-append(trace, jet[row, "ssjetormax"]) trace<-append(trace, jet[row, "sstjetormax"])
}else{ }else{
print("No field called ssjetormax in the JET output!") print("No field called sstjetormax in the JET output!")
quit(status=-1) quit(status=-1)
} }
} }
}else{ }else{
print("ERROR: Unknown --normWeightMode selected!") print("ERROR: Unknown --normWeightMode selected!")
print("It can only be 'trace', 'pc', 'cv',") print("It can only be 'tjet', 'pc', 'cv',")
print("'maxhalftracepchalftracecvhalfcvpc'") print("'max' or 'sstjetormax'!")
print(" or 'ssjetormax'!")
} }
print(trace) print(trace)
##################################################################################################################### #####################################################################################################################
......
...@@ -296,8 +296,8 @@ def cleanTheMess(prot,bFile,fFile, chainID): ...@@ -296,8 +296,8 @@ def cleanTheMess(prot,bFile,fFile, chainID):
if fFile!=prot+"_"+chainID+".fasta": if fFile!=prot+"_"+chainID+".fasta":
if os.path.isfile(prot+"_"+chainID+".fasta"): if os.path.isfile(prot+"_"+chainID+".fasta"):
os.remove(prot+"_"+chainID+".fasta") os.remove(prot+"_"+chainID+".fasta")
if os.path.isfile(prot+"_jet.res"): # if os.path.isfile(prot+"_jet.res"):
os.remove(prot+"_jet.res") # os.remove(prot+"_jet.res")
if os.path.isfile(prot+".pdb"): if os.path.isfile(prot+".pdb"):
os.remove(prot+".pdb") os.remove(prot+".pdb")
...@@ -820,10 +820,9 @@ def parse_command_line(): ...@@ -820,10 +820,9 @@ def parse_command_line():
required=False, default=None) required=False, default=None)
retMet_args.add_argument('--normweightmode', dest='normweightmode', type=str, \ retMet_args.add_argument('--normweightmode', dest='normweightmode', type=str, \
help="It can be one of these: 'trace', 'tracemovingaverage', 'cv', 'pc', 'dfi', 'maxtracesc',"+\ help="It can be one of these: 'tjet', 'cv', 'pc',"+\
"'maxtracepc', 'maxtracecv', 'maxtracepccv', 'maxtracepcsc', 'maxtracepchalfpccv', "+\ "max or sstjetormax. Default is 'tjet'.",
"'halfcvpc', maxhalftracepchalftracecvhalfcvpc or jetormax. Default is 'trace'.", required=False, default="tjet")
required=False, default="trace")
args = parser.parse_args() args = parser.parse_args()
...@@ -863,13 +862,13 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -863,13 +862,13 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
print("query protein: "+prot) print("query protein: "+prot)
if( (normWeightMode != 'trace') and \ if( (normWeightMode != 'tjet') and \
(normWeightMode != 'cv') and \ (normWeightMode != 'cv') and \
(normWeightMode != 'pc') and \ (normWeightMode != 'pc') and \
(normWeightMode != 'maxhalftracepchalftracecvhalfcvpc') and \ (normWeightMode != 'max') and \
(normWeightMode != 'ssjetormax')): (normWeightMode != 'sstjetormax')):
print("ERROR: normWeightMode can only be 'trace', 'cv', 'pc', "+\ print("ERROR: normWeightMode can only be 'tjet', 'cv', 'pc', "+\
"'maxhalftracepchalftracecvhalfcvpc' or 'ssjetormax'!") "'max' or 'sstjetormax'!")
sys.exit(-1) sys.exit(-1)
structure = parsePDB(prot+".pdb") structure = parsePDB(prot+".pdb")
...@@ -899,31 +898,14 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -899,31 +898,14 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
shutil.copy2(jetfile, prot+"_jet.res") shutil.copy2(jetfile, prot+"_jet.res")
print("done") print("done")
# #If a real pdb file is given, calculate dfi for the residues. if((normWeightMode=='sstjetormax')):
# if(((normWeightMode=='dfi') or (normWeightMode=='maxtracedfi') or (normWeightMode=='maxdfitrace'))):
# if (pdbfile == None):
# print("ERROR: There is not any pdb file to calculate DFI.")
# sys.exit(-1)
# else:
# print("Calculating %DFI data per residue!")
# dfi = calcPercentDFI(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true")
# dfi = dfi
# df = pd.read_table(prot+"_jet.res")
# print(df)
# df['dfi'] = dfi.round(4)
# df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
# print(df)
if((normWeightMode=='ssjetormax')):
if (pdbfile == None): if (pdbfile == None):
print("ERROR: There is not any pdb file to calculate DFI.") print("ERROR: There is not any pdb file.")
sys.exit(-1) sys.exit(-1)
else: else:
calculateSecondaryStructure(pdbfile) calculateSecondaryStructure(pdbfile)
countCoilSegments(pdbfile+".dssp") countCoilSegments(pdbfile+".dssp")
#os.system("python /home/tekpinar/software/scripts/python_scripts/biotite-secondary-structure.py "+pdbfile+" "+pdbfile+".dssp")
#os.system("python /home/tekpinar/software/scripts/python_scripts/count-coil-segments-v2.py "+pdbfile+".dssp >"+pdbfile+".dssp.new")
df = pd.read_table(prot+"_jet.res", sep="\s+") df = pd.read_table(prot+"_jet.res", sep="\s+")
print(df['pos']) print(df['pos'])
print(pdbfile+".dssp") print(pdbfile+".dssp")
...@@ -935,46 +917,46 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -935,46 +917,46 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
mergedRes = pd.merge(df, df2, on ='pos', right_index=False) mergedRes = pd.merge(df, df2, on ='pos', right_index=False)
print(mergedRes) print(mergedRes)
ssjetormaxList = [] sstjetormaxList = []
maxCoilLength = 5 maxCoilLength = 5
print("WARNING: Max. coil length = {}".format(maxCoilLength)) print("WARNING: Max. coil length = {}".format(maxCoilLength))
for index, row in mergedRes.iterrows(): for index, row in mergedRes.iterrows():
if(row['ss']=='C') and (row['length']>maxCoilLength): if(row['ss']=='C') and (row['length']>maxCoilLength):
ssjetormaxList.append(row['trace']) sstjetormaxList.append(row['trace'])
else: else:
maxVal = max([((row['trace']+row['pc'])/2.0), ((row['trace']+row['cv'])/2.0), ((row['cv']+row['pc'])/2.0)]) maxVal = max([((row['trace']+row['pc'])/2.0), ((row['trace']+row['cv'])/2.0), ((row['cv']+row['pc'])/2.0)])
ssjetormaxList.append(maxVal) sstjetormaxList.append(maxVal)
#ssjetormaxList=rankSortProteinData(ssjetormaxList, inverted=False) #sstjetormaxList=rankSortProteinData(sstjetormaxList, inverted=False)
df['ssjetormax'] = ssjetormaxList df['sstjetormax'] = sstjetormaxList
df['ssjetormax'] = df['ssjetormax'].round(decimals = 4) df['sstjetormax'] = df['sstjetormax'].round(decimals = 4)
df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w') df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
#sys.exit(-1) #sys.exit(-1)
#If a real pdb file is given, calculate dfi for the residues. # #If a real pdb file is given, calculate dfi for the residues.
if(((normWeightMode=='tracemovingaverage'))): # if(((normWeightMode=='tracemovingaverage'))):
print("Calculating trace moving average per residue!") # print("Calculating trace moving average per residue!")
# tma = tracemovingaverage # # tma = tracemovingaverage
df = pd.read_csv(prot+"_jet.res", delimiter=r"\s+") # df = pd.read_csv(prot+"_jet.res", delimiter=r"\s+")
print(df.columns) # print(df.columns)
# Get the window of series # # Get the window of series
# of observations of specified window size # # of observations of specified window size
window_size = 3 # window_size = 3
windows = df['trace'].rolling(window_size) # windows = df['trace'].rolling(window_size)
# Create a series of moving # # Create a series of moving
# averages of each window # # averages of each window
moving_averages = windows.mean() # moving_averages = windows.mean()
tma = moving_averages # tma = moving_averages
print(df) # print(df)
df['tracemovingaverage'] = tma.round(4) # df['tracemovingaverage'] = tma.round(4)
#The first two elements are just copied bc they don't exist due to the moving average. # #The first two elements are just copied bc they don't exist due to the moving average.
df['tracemovingaverage'].iloc[0] = df['trace'].iloc[0].round(4) # df['tracemovingaverage'].iloc[0] = df['trace'].iloc[0].round(4)
df['tracemovingaverage'].iloc[1] = df['trace'].iloc[1].round(4) # df['tracemovingaverage'].iloc[1] = df['trace'].iloc[1].round(4)
df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w') # df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
print(df) # print(df)
# #If a real pdb file is given, calculate or get Bfactors for the residues. # #If a real pdb file is given, calculate or get Bfactors for the residues.
# if(((normWeightMode=='bfactor') or (normWeightMode=='maxtracebfactor') or (normWeightMode=='maxbfactortrace'))): # if(((normWeightMode=='bfactor') or (normWeightMode=='maxtracebfactor') or (normWeightMode=='maxbfactortrace'))):
# isCalc = True # isCalc = True
......
...@@ -27,7 +27,7 @@ then ...@@ -27,7 +27,7 @@ then
echo "Using blat-af2.pdb for the structural feature calculations!" echo "Using blat-af2.pdb for the structural feature calculations!"
echo "Entire mutational map of the protein will be calculated!" echo "Entire mutational map of the protein will be calculated!"
python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \ python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
--pdbfile ../data/blat-af2.pdb --normweightmode maxhalftracepchalftracecvhalfcvpc --pdbfile ../data/blat-af2.pdb --normweightmode max
elif [ "$1" == "withpdb-withmutfile" ] elif [ "$1" == "withpdb-withmutfile" ]
then then
...@@ -36,17 +36,17 @@ then ...@@ -36,17 +36,17 @@ then
echo "Using blat-af2.pdb for the structural feature calculations!" echo "Using blat-af2.pdb for the structural feature calculations!"
echo "Only effects of mutations specified in the Stiffler_2015_BLAT_ECOLX.mut file will be calculated!" echo "Only effects of mutations specified in the Stiffler_2015_BLAT_ECOLX.mut file will be calculated!"
python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \ python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
--pdbfile ../data/blat-af2.pdb --normweightmode maxhalftracepchalftracecvhalfcvpc \ --pdbfile ../data/blat-af2.pdb --normweightmode max \
-m ../data/Stiffler_2015_BLAT_ECOLX.mut -m ../data/Stiffler_2015_BLAT_ECOLX.mut
elif [ "$1" == "ssjetormax" ] elif [ "$1" == "sstjetormax" ]
then then
#Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file. #Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file.
echo "Using blat-af2.pdb for the secondary structure based calculations!" echo "Using blat-af2.pdb for the secondary structure based calculations!"
echo "Only effects of mutations specified in the Stiffler_2015_BLAT_ECOLX.mut file will be calculated!" echo "Only effects of mutations specified in the Stiffler_2015_BLAT_ECOLX.mut file will be calculated!"
python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \ python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
--pdbfile ../data/blat-af2.pdb --normweightmode ssjetormax \ --pdbfile ../data/blat-af2.pdb --normweightmode sstjetormax \
-m ../data/Stiffler_2015_BLAT_ECOLX.mut -m ../data/Stiffler_2015_BLAT_ECOLX.mut
else else
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment