Commit a823960e by Mustafa Tekpinar

Added jetormax to the normalization options

parent 54a3c39a
# Copyright (c) 2018: Elodie Laine
# Copyright (c) 2022: Mustafa Tekpinar
# Copyright (c) 2022-2023: Mustafa Tekpinar
# This code is part of the gemme package and governed by its license.
# Please see the LICENSE.txt file included as part of this package.
# Usage: Rscript --save computePred.R XXXX
library("seqinr")
source(paste(Sys.getenv("GEMME_PATH"),"/pred.R",sep=""))
source(paste(Sys.getenv("SGEMME_PATH"),"/pred.R",sep=""))
# amino acid full alphabet
aa = c("a","c","d","e","f","g","h","i","k","l","m","n","p","q","r","s","t","v","w","y")
# BLOSUM62 matrix
blosum62p = as.matrix(read.table(paste0(Sys.getenv("GEMME_PATH"),"/blosum62p.txt"),row.names=1))
blosum62p = as.matrix(read.table(paste0(Sys.getenv("SGEMME_PATH"),"/blosum62p.txt"),row.names=1))
bgp = blosum62p[order(rownames(blosum62p)),order(colnames(blosum62p))]
bg = apply(bgp,1,sum)
......@@ -136,6 +136,19 @@ if((normWeightMode=="maxtracepc") | (normWeightMode=="maxpctrace")){
trace<-append(trace, max((jet[row, "trace"]+jet[row, "pc"])/2.0, max((jet[row, "trace"]+jet[row, "cv"])/2.0, (jet[row, "pc"]+jet[row, "cv"])/2.0 )))
}
}
}else if ((normWeightMode=="jetormax")){
print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){
maxValue = max((jet[row, "traceMax"]+jet[row, "pc"])/2.0, max((jet[row, "traceMax"]+jet[row, "cv"])/2.0, (jet[row, "pc"]+jet[row, "cv"])/2.0 ))
traceValue = jet[row, "traceMax"]
trace<-append(trace, max((traceValue), (maxValue)))
}else{
maxValue = max((jet[row, "trace"]+jet[row, "pc"])/2.0, max((jet[row, "trace"]+jet[row, "cv"])/2.0, (jet[row, "pc"]+jet[row, "cv"])/2.0 ))
traceValue = jet[row, "trace"]
trace<-append(trace, max((traceValue), (maxValue)))
}
}
}else if ((normWeightMode=="maxtracepchalfpccv")){
print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) {
......@@ -244,7 +257,8 @@ if((normWeightMode=="maxtracepc") | (normWeightMode=="maxpctrace")){
print("ERROR: Unknown --normWeightMode selected!")
print("It can only be 'trace', 'tracemovingaverage', 'pc', 'cv', 'dfi', 'bfactor',")
print(" 'maxtracepc', 'maxtracecv', 'maxtracepccv', 'halfcvpc', 'halftracepc', 'maxtracepchalfpccv',")
print(" 'maxtracesc', 'maxtracescpc', 'maxtracedfi' or 'maxtracebfactor'!")
print(" 'maxtracesc', 'maxtracescpc', 'maxtracedfi', 'maxtracebfactor', 'maxhalftracepchalftracecvhalfcvpc'")
print(" or 'jetormax'!")
}
print(trace)
#####################################################################################################################
......
......@@ -16,30 +16,30 @@ import pandas as pd
import numpy as np
def extractQuerySeq(filename):
"""
# Extract the query sequence from the input alignment
"""
fIN = open(filename,"r")
lines = fIN.readlines()
fIN.close()
if lines[0][0]!=">":
raise Exception('bad FASTA format')
else:
prot = re.compile("[^A-Z0-9a-z]").split(lines[0][1:])[0]
fOUT = open(prot+".fasta","w")
fOUT.write(">"+prot+"\n")
seq=""
i = 1
while lines[i][0]!=">":
seq = seq + lines[i].strip().strip(".").strip("-")
fOUT.write(lines[i])
i = i + 1
fOUT.close()
return prot,seq,i-1
# Moved to sgemme.py
# def extractQuerySeq(filename):
# """
# # Extract the query sequence from the input alignment
# """
# fIN = open(filename,"r")
# lines = fIN.readlines()
# fIN.close()
# if lines[0][0]!=">":
# raise Exception('bad FASTA format')
# else:
# prot = re.compile("[^A-Z0-9a-z]").split(lines[0][1:])[0]
# fOUT = open(prot+".fasta","w")
# fOUT.write(">"+prot+"\n")
# seq=""
# i = 1
# while lines[i][0]!=">":
# seq = seq + lines[i].strip().strip(".").strip("-")
# fOUT.write(lines[i])
# i = i + 1
# fOUT.close()
# return prot,seq,i-1
def getNbSeq(filename):
......@@ -135,7 +135,7 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
#TODO: Remove Bash dependency here. Make the copying process in Python
subprocess.call("cp $SGEMME_PATH/default.conf .",shell=True)
if retMet=="input":
if bFile!='':
#TODO: I think these two lines must be here as well but I am not sure.
......
......@@ -174,7 +174,7 @@ computePSSM2<-function(mat){
}
computeNbSeqsAlph<-function(nbSeqs,alphabet){
path=paste(Sys.getenv("GEMME_PATH"),"/alphabets/",sep="")
path=paste(Sys.getenv("SGEMME_PATH"),"/alphabets/",sep="")
AA = read.table(paste(path,alphabet,".txt",sep=""))$V1
facAA = rep(NA,20)
for(Let in AA){
......@@ -210,7 +210,7 @@ readMut<-function(fname){
}
which.class<-function(a,alphabet){
path=paste(Sys.getenv("GEMME_PATH"),"/alphabets/",sep="")
path=paste(Sys.getenv("SGEMME_PATH"),"/alphabets/",sep="")
AA = read.table(paste(path,alphabet,".txt",sep=""))$V1
for(Let in AA){
Let = toString(Let)
......
......@@ -22,6 +22,8 @@ from gemmeAnal import *
import pandas as pd
#############The following part between # signs are moved from gemmeAnal.py to
# make the code more simple.
def extractQuerySeq(filename):
"""
# Extract the query sequence from the input alignment
......@@ -45,7 +47,7 @@ def extractQuerySeq(filename):
i = i + 1
fOUT.close()
return prot,seq,i-1
###############################################################################
def rankSortProteinData(dataArray, inverted=True):
"""
This function ranksorts protein data and converts it
......@@ -477,7 +479,8 @@ def parse_command_line():
retMet_args.add_argument('--normweightmode', dest='normweightmode', type=str, \
help="It can be one of these: 'trace', 'tracemovingaverage', 'cv', 'pc', 'dfi', 'maxtracesc',"+\
"'maxtracepc', 'maxtracecv', 'maxtracepccv', 'maxtracepcsc', 'maxtracepchalfpccv' or 'halfcvpc'. Default is 'trace'.",
"'maxtracepc', 'maxtracecv', 'maxtracepccv', 'maxtracepcsc', 'maxtracepchalfpccv', "+\
"'halfcvpc', maxhalftracepchalftracecvhalfcvpc or jetormax. Default is 'trace'.",
required=False, default="trace")
args = parser.parse_args()
......@@ -548,6 +551,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
(normWeightMode != 'maxtracehalfcvpc') and \
(normWeightMode != 'maxtracehalftracecvhalfcvpc') and \
(normWeightMode != 'maxhalftracepchalftracecvhalfcvpc') and \
(normWeightMode != 'jetormax') and \
(normWeightMode != 'tracemovingaverage') and \
(normWeightMode != 'maxtracehalfpccv')):
print("ERROR: normWeightMode can only be 'trace', 'tracemovingaverage', 'cv', 'pc', 'dfi', bfactor,\n"+\
......@@ -555,7 +559,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
" 'maxtracebfactor', 'maxtracepcsc', 'maxhalftracepchalftracecvhalfcvpc', "+\
" 'maxtracecv', 'maxtracepccv', maxtracepchalfpccv"+\
" 'combinedv1', 'combinedv2', 'combinedv3', 'combinedv4'"+\
" 'halfcvpc', 'halftracepc', 'maxtracehalfcvpc' or maxtracehalftracecvhalfcvpc!")
" 'halfcvpc', 'halftracepc', 'maxtracehalfcvpc', 'maxtracehalftracecvhalfcvpc' or 'jetormax'!")
sys.exit(-1)
structure = parsePDB(prot+".pdb")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment