Commit a823960e by Mustafa Tekpinar

Added jetormax to the normalization options

parent 54a3c39a
# Copyright (c) 2018: Elodie Laine # Copyright (c) 2018: Elodie Laine
# Copyright (c) 2022: Mustafa Tekpinar # Copyright (c) 2022-2023: Mustafa Tekpinar
# This code is part of the gemme package and governed by its license. # This code is part of the gemme package and governed by its license.
# Please see the LICENSE.txt file included as part of this package. # Please see the LICENSE.txt file included as part of this package.
# Usage: Rscript --save computePred.R XXXX # Usage: Rscript --save computePred.R XXXX
library("seqinr") library("seqinr")
source(paste(Sys.getenv("GEMME_PATH"),"/pred.R",sep="")) source(paste(Sys.getenv("SGEMME_PATH"),"/pred.R",sep=""))
# amino acid full alphabet # amino acid full alphabet
aa = c("a","c","d","e","f","g","h","i","k","l","m","n","p","q","r","s","t","v","w","y") aa = c("a","c","d","e","f","g","h","i","k","l","m","n","p","q","r","s","t","v","w","y")
# BLOSUM62 matrix # BLOSUM62 matrix
blosum62p = as.matrix(read.table(paste0(Sys.getenv("GEMME_PATH"),"/blosum62p.txt"),row.names=1)) blosum62p = as.matrix(read.table(paste0(Sys.getenv("SGEMME_PATH"),"/blosum62p.txt"),row.names=1))
bgp = blosum62p[order(rownames(blosum62p)),order(colnames(blosum62p))] bgp = blosum62p[order(rownames(blosum62p)),order(colnames(blosum62p))]
bg = apply(bgp,1,sum) bg = apply(bgp,1,sum)
...@@ -136,6 +136,19 @@ if((normWeightMode=="maxtracepc") | (normWeightMode=="maxpctrace")){ ...@@ -136,6 +136,19 @@ if((normWeightMode=="maxtracepc") | (normWeightMode=="maxpctrace")){
trace<-append(trace, max((jet[row, "trace"]+jet[row, "pc"])/2.0, max((jet[row, "trace"]+jet[row, "cv"])/2.0, (jet[row, "pc"]+jet[row, "cv"])/2.0 ))) trace<-append(trace, max((jet[row, "trace"]+jet[row, "pc"])/2.0, max((jet[row, "trace"]+jet[row, "cv"])/2.0, (jet[row, "pc"]+jet[row, "cv"])/2.0 )))
} }
} }
}else if ((normWeightMode=="jetormax")){
print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){
maxValue = max((jet[row, "traceMax"]+jet[row, "pc"])/2.0, max((jet[row, "traceMax"]+jet[row, "cv"])/2.0, (jet[row, "pc"]+jet[row, "cv"])/2.0 ))
traceValue = jet[row, "traceMax"]
trace<-append(trace, max((traceValue), (maxValue)))
}else{
maxValue = max((jet[row, "trace"]+jet[row, "pc"])/2.0, max((jet[row, "trace"]+jet[row, "cv"])/2.0, (jet[row, "pc"]+jet[row, "cv"])/2.0 ))
traceValue = jet[row, "trace"]
trace<-append(trace, max((traceValue), (maxValue)))
}
}
}else if ((normWeightMode=="maxtracepchalfpccv")){ }else if ((normWeightMode=="maxtracepchalfpccv")){
print(paste("Using ", normWeightMode)) print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
...@@ -244,7 +257,8 @@ if((normWeightMode=="maxtracepc") | (normWeightMode=="maxpctrace")){ ...@@ -244,7 +257,8 @@ if((normWeightMode=="maxtracepc") | (normWeightMode=="maxpctrace")){
print("ERROR: Unknown --normWeightMode selected!") print("ERROR: Unknown --normWeightMode selected!")
print("It can only be 'trace', 'tracemovingaverage', 'pc', 'cv', 'dfi', 'bfactor',") print("It can only be 'trace', 'tracemovingaverage', 'pc', 'cv', 'dfi', 'bfactor',")
print(" 'maxtracepc', 'maxtracecv', 'maxtracepccv', 'halfcvpc', 'halftracepc', 'maxtracepchalfpccv',") print(" 'maxtracepc', 'maxtracecv', 'maxtracepccv', 'halfcvpc', 'halftracepc', 'maxtracepchalfpccv',")
print(" 'maxtracesc', 'maxtracescpc', 'maxtracedfi' or 'maxtracebfactor'!") print(" 'maxtracesc', 'maxtracescpc', 'maxtracedfi', 'maxtracebfactor', 'maxhalftracepchalftracecvhalfcvpc'")
print(" or 'jetormax'!")
} }
print(trace) print(trace)
##################################################################################################################### #####################################################################################################################
......
...@@ -16,30 +16,30 @@ import pandas as pd ...@@ -16,30 +16,30 @@ import pandas as pd
import numpy as np import numpy as np
# Moved to sgemme.py
def extractQuerySeq(filename): # def extractQuerySeq(filename):
""" # """
# Extract the query sequence from the input alignment # # Extract the query sequence from the input alignment
""" # """
fIN = open(filename,"r") # fIN = open(filename,"r")
lines = fIN.readlines() # lines = fIN.readlines()
fIN.close() # fIN.close()
if lines[0][0]!=">": # if lines[0][0]!=">":
raise Exception('bad FASTA format') # raise Exception('bad FASTA format')
else: # else:
prot = re.compile("[^A-Z0-9a-z]").split(lines[0][1:])[0] # prot = re.compile("[^A-Z0-9a-z]").split(lines[0][1:])[0]
fOUT = open(prot+".fasta","w") # fOUT = open(prot+".fasta","w")
fOUT.write(">"+prot+"\n") # fOUT.write(">"+prot+"\n")
seq="" # seq=""
i = 1 # i = 1
while lines[i][0]!=">": # while lines[i][0]!=">":
seq = seq + lines[i].strip().strip(".").strip("-") # seq = seq + lines[i].strip().strip(".").strip("-")
fOUT.write(lines[i]) # fOUT.write(lines[i])
i = i + 1 # i = i + 1
fOUT.close() # fOUT.close()
return prot,seq,i-1 # return prot,seq,i-1
def getNbSeq(filename): def getNbSeq(filename):
...@@ -135,7 +135,7 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl): ...@@ -135,7 +135,7 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
#TODO: Remove Bash dependency here. Make the copying process in Python #TODO: Remove Bash dependency here. Make the copying process in Python
subprocess.call("cp $SGEMME_PATH/default.conf .",shell=True) subprocess.call("cp $SGEMME_PATH/default.conf .",shell=True)
if retMet=="input": if retMet=="input":
if bFile!='': if bFile!='':
#TODO: I think these two lines must be here as well but I am not sure. #TODO: I think these two lines must be here as well but I am not sure.
......
...@@ -174,7 +174,7 @@ computePSSM2<-function(mat){ ...@@ -174,7 +174,7 @@ computePSSM2<-function(mat){
} }
computeNbSeqsAlph<-function(nbSeqs,alphabet){ computeNbSeqsAlph<-function(nbSeqs,alphabet){
path=paste(Sys.getenv("GEMME_PATH"),"/alphabets/",sep="") path=paste(Sys.getenv("SGEMME_PATH"),"/alphabets/",sep="")
AA = read.table(paste(path,alphabet,".txt",sep=""))$V1 AA = read.table(paste(path,alphabet,".txt",sep=""))$V1
facAA = rep(NA,20) facAA = rep(NA,20)
for(Let in AA){ for(Let in AA){
...@@ -210,7 +210,7 @@ readMut<-function(fname){ ...@@ -210,7 +210,7 @@ readMut<-function(fname){
} }
which.class<-function(a,alphabet){ which.class<-function(a,alphabet){
path=paste(Sys.getenv("GEMME_PATH"),"/alphabets/",sep="") path=paste(Sys.getenv("SGEMME_PATH"),"/alphabets/",sep="")
AA = read.table(paste(path,alphabet,".txt",sep=""))$V1 AA = read.table(paste(path,alphabet,".txt",sep=""))$V1
for(Let in AA){ for(Let in AA){
Let = toString(Let) Let = toString(Let)
......
...@@ -22,6 +22,8 @@ from gemmeAnal import * ...@@ -22,6 +22,8 @@ from gemmeAnal import *
import pandas as pd import pandas as pd
#############The following part between # signs are moved from gemmeAnal.py to
# make the code more simple.
def extractQuerySeq(filename): def extractQuerySeq(filename):
""" """
# Extract the query sequence from the input alignment # Extract the query sequence from the input alignment
...@@ -45,7 +47,7 @@ def extractQuerySeq(filename): ...@@ -45,7 +47,7 @@ def extractQuerySeq(filename):
i = i + 1 i = i + 1
fOUT.close() fOUT.close()
return prot,seq,i-1 return prot,seq,i-1
###############################################################################
def rankSortProteinData(dataArray, inverted=True): def rankSortProteinData(dataArray, inverted=True):
""" """
This function ranksorts protein data and converts it This function ranksorts protein data and converts it
...@@ -477,7 +479,8 @@ def parse_command_line(): ...@@ -477,7 +479,8 @@ def parse_command_line():
retMet_args.add_argument('--normweightmode', dest='normweightmode', type=str, \ retMet_args.add_argument('--normweightmode', dest='normweightmode', type=str, \
help="It can be one of these: 'trace', 'tracemovingaverage', 'cv', 'pc', 'dfi', 'maxtracesc',"+\ help="It can be one of these: 'trace', 'tracemovingaverage', 'cv', 'pc', 'dfi', 'maxtracesc',"+\
"'maxtracepc', 'maxtracecv', 'maxtracepccv', 'maxtracepcsc', 'maxtracepchalfpccv' or 'halfcvpc'. Default is 'trace'.", "'maxtracepc', 'maxtracecv', 'maxtracepccv', 'maxtracepcsc', 'maxtracepchalfpccv', "+\
"'halfcvpc', maxhalftracepchalftracecvhalfcvpc or jetormax. Default is 'trace'.",
required=False, default="trace") required=False, default="trace")
args = parser.parse_args() args = parser.parse_args()
...@@ -548,6 +551,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -548,6 +551,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
(normWeightMode != 'maxtracehalfcvpc') and \ (normWeightMode != 'maxtracehalfcvpc') and \
(normWeightMode != 'maxtracehalftracecvhalfcvpc') and \ (normWeightMode != 'maxtracehalftracecvhalfcvpc') and \
(normWeightMode != 'maxhalftracepchalftracecvhalfcvpc') and \ (normWeightMode != 'maxhalftracepchalftracecvhalfcvpc') and \
(normWeightMode != 'jetormax') and \
(normWeightMode != 'tracemovingaverage') and \ (normWeightMode != 'tracemovingaverage') and \
(normWeightMode != 'maxtracehalfpccv')): (normWeightMode != 'maxtracehalfpccv')):
print("ERROR: normWeightMode can only be 'trace', 'tracemovingaverage', 'cv', 'pc', 'dfi', bfactor,\n"+\ print("ERROR: normWeightMode can only be 'trace', 'tracemovingaverage', 'cv', 'pc', 'dfi', bfactor,\n"+\
...@@ -555,7 +559,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -555,7 +559,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
" 'maxtracebfactor', 'maxtracepcsc', 'maxhalftracepchalftracecvhalfcvpc', "+\ " 'maxtracebfactor', 'maxtracepcsc', 'maxhalftracepchalftracecvhalfcvpc', "+\
" 'maxtracecv', 'maxtracepccv', maxtracepchalfpccv"+\ " 'maxtracecv', 'maxtracepccv', maxtracepchalfpccv"+\
" 'combinedv1', 'combinedv2', 'combinedv3', 'combinedv4'"+\ " 'combinedv1', 'combinedv2', 'combinedv3', 'combinedv4'"+\
" 'halfcvpc', 'halftracepc', 'maxtracehalfcvpc' or maxtracehalftracecvhalfcvpc!") " 'halfcvpc', 'halftracepc', 'maxtracehalfcvpc', 'maxtracehalftracecvhalfcvpc' or 'jetormax'!")
sys.exit(-1) sys.exit(-1)
structure = parsePDB(prot+".pdb") structure = parsePDB(prot+".pdb")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment