Commit 06ff7a4c by Mustafa Tekpinar

Added half-pc+cv option to normweightmode!

parent ba2786df
...@@ -100,7 +100,7 @@ print("running normalization...") ...@@ -100,7 +100,7 @@ print("running normalization...")
#This part of the code obtains max values of Trace, PC, or CV for weighting the #This part of the code obtains max values of Trace, PC, or CV for weighting the
#normalized results. #normalized results.
trace = c() trace = c()
if((normWeightMode=="trace+pc") | (normWeightMode=="pc+trace")){ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){
print(paste("Using ", normWeightMode)) print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){ if(sum(colnames(jet)=="traceMax")==1){
...@@ -109,7 +109,7 @@ if((normWeightMode=="trace+pc") | (normWeightMode=="pc+trace")){ ...@@ -109,7 +109,7 @@ if((normWeightMode=="trace+pc") | (normWeightMode=="pc+trace")){
trace<-append(trace, max(jet[row, "trace"], jet[row, "pc"])) trace<-append(trace, max(jet[row, "trace"], jet[row, "pc"]))
} }
} }
} else if ((normWeightMode=="trace+cv") | (normWeightMode=="cv+trace")){ } else if ((normWeightMode=="max-trace-cv") | (normWeightMode=="max-cv-trace")){
print(paste("Using ", normWeightMode)) print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){ if(sum(colnames(jet)=="traceMax")==1){
...@@ -118,7 +118,7 @@ if((normWeightMode=="trace+pc") | (normWeightMode=="pc+trace")){ ...@@ -118,7 +118,7 @@ if((normWeightMode=="trace+pc") | (normWeightMode=="pc+trace")){
trace<-append(trace, max(jet[row, "trace"], jet[row, "cv"])) trace<-append(trace, max(jet[row, "trace"], jet[row, "cv"]))
} }
} }
} else if ((normWeightMode=="trace+pc+cv")|(normWeightMode=="trace+cv+pc")){ } else if ((normWeightMode=="max-trace-pc-cv")|(normWeightMode=="max-trace-cv-pc")){
print(paste("Using ", normWeightMode)) print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){ if(sum(colnames(jet)=="traceMax")==1){
...@@ -127,6 +127,15 @@ if((normWeightMode=="trace+pc") | (normWeightMode=="pc+trace")){ ...@@ -127,6 +127,15 @@ if((normWeightMode=="trace+pc") | (normWeightMode=="pc+trace")){
trace<-append(trace, max(jet[row, "trace"], max(jet[row, "pc"], jet[row, "cv"]))) trace<-append(trace, max(jet[row, "trace"], max(jet[row, "pc"], jet[row, "cv"])))
} }
} }
} else if ((normWeightMode=="half-cv+pc") | (normWeightMode=="half-pc+cv")){
print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){
trace<-append(trace, max(jet[row, "traceMax"], jet[row, "cv"]))
}else{
trace<-append(trace, (jet[row, "pc"]+jet[row, "cv"])/2.0)
}
}
} else if (normWeightMode=="trace"){ } else if (normWeightMode=="trace"){
print("Using only JET2 traces") print("Using only JET2 traces")
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
...@@ -138,7 +147,7 @@ if((normWeightMode=="trace+pc") | (normWeightMode=="pc+trace")){ ...@@ -138,7 +147,7 @@ if((normWeightMode=="trace+pc") | (normWeightMode=="pc+trace")){
} }
}else{ }else{
print("ERROR: Unknown --normWeightMode selected!") print("ERROR: Unknown --normWeightMode selected!")
print("It can only be 'trace', 'trace+pc', 'trace+cv' or 'trace+pc+cv'!") print("It can only be 'trace', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv' or 'half-cv+pc'!")
} }
print(trace) print(trace)
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -18,6 +18,13 @@ then ...@@ -18,6 +18,13 @@ then
echo "Using a previously produced prot_jet.res file to check reproducibility!" echo "Using a previously produced prot_jet.res file to check reproducibility!"
cp ../tests/BLAT_jet.res . cp ../tests/BLAT_jet.res .
python $GEMME_PATH/gemme.py aliBLAT.fasta -r input -f aliBLAT.fasta --jetfile BLAT_jet.res python $GEMME_PATH/gemme.py aliBLAT.fasta -r input -f aliBLAT.fasta --jetfile BLAT_jet.res
elif [ "$1" == "withpdb" ]
then
#Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file.
echo "Using blat-curated.pdb for the structural feature calculations!"
python $GEMME_PATH/gemme.py aliBLAT.fasta -r input -f aliBLAT.fasta --pdbfile blat-curated.pdb --normweightmode max-trace-pc-cv
else else
echo "Running GEMME with a user-provided alignment file." echo "Running GEMME with a user-provided alignment file."
......
...@@ -265,7 +265,7 @@ def parse_command_line(): ...@@ -265,7 +265,7 @@ def parse_command_line():
required=False, default=None) required=False, default=None)
retMet_args.add_argument('--normweightmode', dest='normweightmode', type=str, \ retMet_args.add_argument('--normweightmode', dest='normweightmode', type=str, \
help="It can be one of these: 'trace', 'trace+pc', 'trace+cv' or 'trace+pc+cv'. Default is 'trace'.", help="It can be one of these: 'trace', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv' or 'half-cv+pc'. Default is 'trace'.",
required=False, default="trace") required=False, default="trace")
args = parser.parse_args() args = parser.parse_args()
...@@ -303,6 +303,18 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode) ...@@ -303,6 +303,18 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode)
print("query protein: "+prot) print("query protein: "+prot)
if( (normWeightMode != 'trace') and \
(normWeightMode != 'max-trace-pc') and \
(normWeightMode != 'max-pc-trace') and \
(normWeightMode != 'max-trace-cv') and \
(normWeightMode != 'max-cv-trace') and \
(normWeightMode != 'max-trace-pc-cv') and \
(normWeightMode != 'max-trace-cv-pc') and \
(normWeightMode != 'half-pc+cv') and \
(normWeightMode != 'half-cv+pc')):
print("ERROR: normWeightMode can only be 'trace', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv' or 'half-cv+pc'!")
sys.exit(-1)
if((jetfile) == None): if((jetfile) == None):
#I intend to run JET2 completely externally!! #I intend to run JET2 completely externally!!
#It is too much buggy and it has too many dependencies. #It is too much buggy and it has too many dependencies.
...@@ -325,7 +337,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode) ...@@ -325,7 +337,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode)
#Do Python plotting here #Do Python plotting here
#TODO: Eventually, I will do the map plotting with a completely independent #TODO: Eventually, I will do the map plotting with a completely independent
# module and call the module here! # module and call the module here! demust module was created!
#TODO: Mark the original (wildtype) residue locations with a dot or something #TODO: Mark the original (wildtype) residue locations with a dot or something
# special to show the original amino acid. # special to show the original amino acid.
#TODO: You can even put letters on the top line like in EVmutation output. #TODO: You can even put letters on the top line like in EVmutation output.
......
File added
>BLAT
HPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVL
LCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSA
AITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPN
DERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSAL
PAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNR
QIAEIGASLIKHW
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This source diff could not be displayed because it is too large. You can view the blob instead.
*****************************************
>BLAT:A
size 263 size of the sequence
numAli 43 Number of sequences in alignment
numMulti 43 Number of multiple alignment
partition 498;498;456;446 partition of sequences after PSI-BLAST+filtering
*****************************************
>SequenceRetrieving
method input change this parameter with -r option of JET. See usage of JET to obtain description of this parameter
format fasta fasta: fasta input file, fasta: fasta input file
*****************************************
>QBlast
eValue 1.0E-5 psiblast maximum expected value threshold
results 20000 maximum number of results
url http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi BlastQ server URL
database nr database used
matrix blosum62 matrix used to fetch homologs
gap_existence 11 BLOSUM62=11, PAM30=9, BLOSUM45=15, PAM70=BLOSUM80=10
gap_extension 1 BLOSUM62=1, PAM30=1, BLOSUM45=2, PAM70=BLOSUM80=1
max_iter 3 number of iteration for psi-blast
****************************************
>PDB
url http://www.rcsb.org/pdb/downloadFile.do URL of PDB server
*****************************************
>Filter
min_identity 0.20 min sequence identity
max_identity 0.98 max sequence identity
*****************************************
>Sample
length_cutoff 0.8 minimum sequence length expressed in number of residues
*****************************************
>Software
clustalW /usr/local/bin/clustalw2 clustalW system dependent command
muscle /usr/bin/muscle muscle system dependent command
naccess /home/tekpinar/research/carbone-lab-software/naccess2.1.1/naccess naccess system dependent command
psiblast /usr/bin/psiblast psiblast system dependent command
*****************************************
>Data
substMatrix /home/tekpinar/research/carbone-lab-software/JET2/matrix directory location of matrices used in JET (Blosum62, gonnet and hsdm)
blastDatabases /opt/blastdb directory location of databases used for local blast (nr{0-7})
*****************************************
>ET
coverage 0.95 maximum coverage percentage of trace
freq_cutoff 0.0 minimum frequency of trace residue
msaNumber -1 number of alignments (trees), -1 for JET computting
seqNumber -1 number of sequences in alignments, -1 for JET computting
*****************************************
>Access
probe_radius 1.4 radius of probe used for accessible surface detection
res_cutoff 0.05 minimum percentage accessible surface of a residu
atom_cutoff 0.01 minimum accessible surface of an atom
accessType chain change this parameter with -d option of JET. See usage of JET to obtain description of this parameter
*****************************************
>CV
max_dist 20.0 max distance
*****************************************
>Interface
cutoff 0 minimum percentage accessible surface variation of an interface residu
ligand no (yes|no) keep contact of ligand (SUBSTRATE, PRODUCT and COFACTOR of database ENZYME) to compute interface of protein
enzymeCpd /home/tekpinar/research/carbone-lab-software/JET2/jet/data/enzyme.txt location of file containing database ENZYME
homologousPDB no (yes|no) add interface residues of homologous structures (find in pdb database clustered at 95% of identities) to interface of protein
clusteredPDB /home/tekpinar/research/carbone-lab-software/JET2/jet/data/clusters95.txt location of pdb database clustered at 95% of identities
*****************************************
>Cluster
max_dist 5.0 max distance between atoms to aggregate
analysis 2 change this parameter with -a option of JET. See usage of JET to obtain description of this parameter
namePcCol pc name of the column in results file containing the phisical-chemical score of residues (do not change this parameter)
namePcLCol pcL name of the column in results file containing the residues propensities to be found at prot-lig interfaces (do not change this parameter)
nameTraceCol trace name of the column in results file containing the conservation score of residues (do not change this parameter)
coverage -1 change this parameter with -s option of JET. See usage of JET to obtain description of this parameter
#!/bin/bash
python $GEMME_PATH/gemme.py aliBLAT.fasta -r input -f aliBLAT.fasta
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment