Commit 2d117e24 by Mustafa Tekpinar

Commented unused code in pred.R and computePred.R

parent 5647766c
......@@ -55,9 +55,7 @@ JET2 configuration file is: default.conf.
JET2 output file is: myProt_jet.res.
### Analyzing the ESGEMME output
By default, ESGEMME will output the following files:
* myProt_pred_evolEpi.txt
* myProt_normPred_evolEpi.txt
* myProt_pred_evolInd.txt
* myProt_normPred_evolInd.txt
* myProt_normPred_evolCombi.txt
......
......@@ -36,11 +36,11 @@ aliCons = ali[pId>0.6,]
aliVeryCons = ali[pId>0.8,]
# number of sequences
N = c(dim(ali)[[1]],dim(aliCons)[[1]],dim(aliVeryCons)[[1]])
#resAliCons = computePSSM(aliCons)
res = list(computePSSM(ali,N[1],npos),computePSSM(aliCons,N[2],npos),computePSSM(aliVeryCons,N[3],npos))
# write.table(res[[1]][[3]],paste0(prot,"_pssm.txt"))
# write.table(res[[2]][[3]],paste0(prot,"_pssm60.txt"))
# write.table(res[[3]][[3]],paste0(prot,"_pssm80.txt"))
# #resAliCons = computePSSM(aliCons)
# res = list(computePSSM(ali,N[1],npos),computePSSM(aliCons,N[2],npos),computePSSM(aliVeryCons,N[3],npos))
# # write.table(res[[1]][[3]],paste0(prot,"_pssm.txt"))
# # write.table(res[[2]][[3]],paste0(prot,"_pssm60.txt"))
# # write.table(res[[3]][[3]],paste0(prot,"_pssm80.txt"))
# read evolutionary traces computed by JET
jet=read.table(paste(prot,"_jet.res",sep=""),head=TRUE)
......@@ -176,14 +176,14 @@ print("done")
print("running combined model...")
#Read frequencies from the jet output file.
frequencies = c()
print(paste("Reading frequencies:"))
for (row in 1:nrow(jet)) {
frequencies<-append(frequencies, jet[row, "freq"] )
}
#print(frequencies)
freq_mean = mean(frequencies)
# #Read frequencies from the jet output file.
# frequencies = c()
# print(paste("Reading frequencies:"))
# for (row in 1:nrow(jet)) {
# frequencies<-append(frequencies, jet[row, "freq"] )
# }
# #print(frequencies)
# freq_mean = mean(frequencies)
alpha = 0.6
# alpha = c()
......
......@@ -10,7 +10,7 @@ then
rm -rf BLAT*
rm -f default.conf caracTest.dat
rm -rf bin*.fasta
rm -rf blat-af2.pdb.dssp blat-af2.pdb.dssp.new
rm -rf ../data/blat-af2.pdb.dssp ../data/blat-af2.pdb.dssp.new
elif [ "$1" == "jetoff" ]
then
# If you have your own JET2 score file, you can turn off JET2 as follows:
......
......@@ -91,87 +91,87 @@ computeNbSeqs<-function(mat,gap=FALSE){
return(res)
}
# compute variability levels as the Shanon entropy
computeConsSE<-function(mat,N){
mat[mat==0] = 1
return(apply(mat/N,2,f<-function(x){return(-sum(x*log2(x)))}))
}
# # compute variability levels as the Shanon entropy
# computeConsSE<-function(mat,N){
# mat[mat==0] = 1
# return(apply(mat/N,2,f<-function(x){return(-sum(x*log2(x)))}))
# }
# compute conservation levels as the Kullback-Leibler divergence
computeConsKL<-function(mat){
n = apply(mat,2,sum)
mat = t(t(mat)/n)
mat = mat + 0.000001
bg = blosum62[,1][order(rownames(blosum62))]
return(apply(mat,2,f<-function(x){return(sum(x*log2(x/bg)))}))
}
# # compute conservation levels as the Kullback-Leibler divergence
# computeConsKL<-function(mat){
# n = apply(mat,2,sum)
# mat = t(t(mat)/n)
# mat = mat + 0.000001
# bg = blosum62[,1][order(rownames(blosum62))]
# return(apply(mat,2,f<-function(x){return(sum(x*log2(x/bg)))}))
# }
computeSeqWeights<-function(mat,N,npos){
# compute the number of different observed amino acids in each column
r = apply(mat,2,f<-function(x){return(length(unique(x)))})
# compute occurrence of each letter at each position
occMat = computeNbSeqs(mat,TRUE)
#print(occMat)
# compute weights for each sequence
weightMat=matrix(nr=N,nc=npos)
for(k in 1:N){
midx=cbind(mat[k,],1:npos)
weightMat[k,] = 1 / (occMat[midx] * r)
}
indObs = sum(r)/npos
return(list(weightMat,indObs))
}
# computeSeqWeights<-function(mat,N,npos){
# # compute the number of different observed amino acids in each column
# r = apply(mat,2,f<-function(x){return(length(unique(x)))})
# # compute occurrence of each letter at each position
# occMat = computeNbSeqs(mat,TRUE)
# #print(occMat)
# # compute weights for each sequence
# weightMat=matrix(nr=N,nc=npos)
# for(k in 1:N){
# midx=cbind(mat[k,],1:npos)
# weightMat[k,] = 1 / (occMat[midx] * r)
# }
# indObs = sum(r)/npos
# return(list(weightMat,indObs))
# }
computePseudoCounts<-function(freqmat,npos){
PC = matrix(nr=length(aa),nc=npos)
rownames(PC)= aa
for(a in aa){
PC[a,] = apply(freqmat,2,f<-function(x){return(sum(x/bg*bgp[a,]))})
}
return(PC)
}
# computePseudoCounts<-function(freqmat,npos){
# PC = matrix(nr=length(aa),nc=npos)
# rownames(PC)= aa
# for(a in aa){
# PC[a,] = apply(freqmat,2,f<-function(x){return(sum(x/bg*bgp[a,]))})
# }
# return(PC)
# }
computePSSM<-function(mat,N,npos){
# compute sequence weights
res = computeSeqWeights(mat,N,npos)
weights = res[[1]]
indObs = res[[2]]
# extend amino acid alphabet with gaps
aa=c(aa,"-")
# compute weighted occurrences
occMat = matrix(0,nr=length(aa),nc=npos)
rownames(occMat) = aa
for(i in 1:npos){
counts = tapply(weights[,i],mat[,i],sum)
occMat[names(counts),i] = counts
}
# distribute gaps
occMat = occMat[1:20,] + t(occMat["-",]%*%t(bg))
# compute pseudo-counts
PC = computePseudoCounts(occMat,npos)
# distribute pseudo-counts according to an empirical (?) weight
occMat = (occMat*(indObs-1) + PC) /indObs
# normalize to relative frequencies
n = apply(occMat,2,sum)
freq = t(t(occMat)/n)
# compute Shannon entropy
SE = apply(freq,2,f<-function(x){return(-sum(x*log2(x)))})
# compute Kullback-Leibler divergence
KL = apply(freq,2,f<-function(x){return(sum(x*log2(x/bg)))})
# divide by bg freqs and convertto log-scores
pssm = 2 * log2(freq/bg)
return(list(SE,KL,pssm))
}
# computePSSM<-function(mat,N,npos){
# # compute sequence weights
# res = computeSeqWeights(mat,N,npos)
# weights = res[[1]]
# indObs = res[[2]]
# # extend amino acid alphabet with gaps
# aa=c(aa,"-")
# # compute weighted occurrences
# occMat = matrix(0,nr=length(aa),nc=npos)
# rownames(occMat) = aa
# for(i in 1:npos){
# counts = tapply(weights[,i],mat[,i],sum)
# occMat[names(counts),i] = counts
# }
# # distribute gaps
# occMat = occMat[1:20,] + t(occMat["-",]%*%t(bg))
# # compute pseudo-counts
# PC = computePseudoCounts(occMat,npos)
# # distribute pseudo-counts according to an empirical (?) weight
# occMat = (occMat*(indObs-1) + PC) /indObs
# # normalize to relative frequencies
# n = apply(occMat,2,sum)
# freq = t(t(occMat)/n)
# # compute Shannon entropy
# SE = apply(freq,2,f<-function(x){return(-sum(x*log2(x)))})
# # compute Kullback-Leibler divergence
# KL = apply(freq,2,f<-function(x){return(sum(x*log2(x/bg)))})
# # divide by bg freqs and convertto log-scores
# pssm = 2 * log2(freq/bg)
# return(list(SE,KL,pssm))
# }
computePSSM2<-function(mat){
occMat = computeNbSeqs(mat)
n = apply(occMat,2,sum)
pssm = t(t(occMat)/n)
pssm = pssm + 0.000001
bg = blosum62[,1][order(rownames(blosum62))]
res = 2 * log2(pssm/bg)
return(list(pssm,res))
}
# computePSSM2<-function(mat){
# occMat = computeNbSeqs(mat)
# n = apply(occMat,2,sum)
# pssm = t(t(occMat)/n)
# pssm = pssm + 0.000001
# bg = blosum62[,1][order(rownames(blosum62))]
# res = 2 * log2(pssm/bg)
# return(list(pssm,res))
# }
computeNbSeqsAlph<-function(nbSeqs,alphabet){
path=paste(Sys.getenv("ESGEMME_PATH"),"/data/alphabets/",sep="")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment