Commit 2d117e24 by Mustafa Tekpinar

Commented unused code in pred.R and computePred.R

parent 5647766c
...@@ -55,9 +55,7 @@ JET2 configuration file is: default.conf. ...@@ -55,9 +55,7 @@ JET2 configuration file is: default.conf.
JET2 output file is: myProt_jet.res. JET2 output file is: myProt_jet.res.
### Analyzing the ESGEMME output ### Analyzing the ESGEMME output
By default, ESGEMME will output the following files: By default, ESGEMME will output the following files:
* myProt_pred_evolEpi.txt
* myProt_normPred_evolEpi.txt * myProt_normPred_evolEpi.txt
* myProt_pred_evolInd.txt
* myProt_normPred_evolInd.txt * myProt_normPred_evolInd.txt
* myProt_normPred_evolCombi.txt * myProt_normPred_evolCombi.txt
......
...@@ -36,11 +36,11 @@ aliCons = ali[pId>0.6,] ...@@ -36,11 +36,11 @@ aliCons = ali[pId>0.6,]
aliVeryCons = ali[pId>0.8,] aliVeryCons = ali[pId>0.8,]
# number of sequences # number of sequences
N = c(dim(ali)[[1]],dim(aliCons)[[1]],dim(aliVeryCons)[[1]]) N = c(dim(ali)[[1]],dim(aliCons)[[1]],dim(aliVeryCons)[[1]])
#resAliCons = computePSSM(aliCons) # #resAliCons = computePSSM(aliCons)
res = list(computePSSM(ali,N[1],npos),computePSSM(aliCons,N[2],npos),computePSSM(aliVeryCons,N[3],npos)) # res = list(computePSSM(ali,N[1],npos),computePSSM(aliCons,N[2],npos),computePSSM(aliVeryCons,N[3],npos))
# write.table(res[[1]][[3]],paste0(prot,"_pssm.txt")) # # write.table(res[[1]][[3]],paste0(prot,"_pssm.txt"))
# write.table(res[[2]][[3]],paste0(prot,"_pssm60.txt")) # # write.table(res[[2]][[3]],paste0(prot,"_pssm60.txt"))
# write.table(res[[3]][[3]],paste0(prot,"_pssm80.txt")) # # write.table(res[[3]][[3]],paste0(prot,"_pssm80.txt"))
# read evolutionary traces computed by JET # read evolutionary traces computed by JET
jet=read.table(paste(prot,"_jet.res",sep=""),head=TRUE) jet=read.table(paste(prot,"_jet.res",sep=""),head=TRUE)
...@@ -176,14 +176,14 @@ print("done") ...@@ -176,14 +176,14 @@ print("done")
print("running combined model...") print("running combined model...")
#Read frequencies from the jet output file. # #Read frequencies from the jet output file.
frequencies = c() # frequencies = c()
print(paste("Reading frequencies:")) # print(paste("Reading frequencies:"))
for (row in 1:nrow(jet)) { # for (row in 1:nrow(jet)) {
frequencies<-append(frequencies, jet[row, "freq"] ) # frequencies<-append(frequencies, jet[row, "freq"] )
} # }
#print(frequencies) # #print(frequencies)
freq_mean = mean(frequencies) # freq_mean = mean(frequencies)
alpha = 0.6 alpha = 0.6
# alpha = c() # alpha = c()
......
...@@ -10,7 +10,7 @@ then ...@@ -10,7 +10,7 @@ then
rm -rf BLAT* rm -rf BLAT*
rm -f default.conf caracTest.dat rm -f default.conf caracTest.dat
rm -rf bin*.fasta rm -rf bin*.fasta
rm -rf blat-af2.pdb.dssp blat-af2.pdb.dssp.new rm -rf ../data/blat-af2.pdb.dssp ../data/blat-af2.pdb.dssp.new
elif [ "$1" == "jetoff" ] elif [ "$1" == "jetoff" ]
then then
# If you have your own JET2 score file, you can turn off JET2 as follows: # If you have your own JET2 score file, you can turn off JET2 as follows:
......
...@@ -91,87 +91,87 @@ computeNbSeqs<-function(mat,gap=FALSE){ ...@@ -91,87 +91,87 @@ computeNbSeqs<-function(mat,gap=FALSE){
return(res) return(res)
} }
# compute variability levels as the Shanon entropy # # compute variability levels as the Shanon entropy
computeConsSE<-function(mat,N){ # computeConsSE<-function(mat,N){
mat[mat==0] = 1 # mat[mat==0] = 1
return(apply(mat/N,2,f<-function(x){return(-sum(x*log2(x)))})) # return(apply(mat/N,2,f<-function(x){return(-sum(x*log2(x)))}))
} # }
# compute conservation levels as the Kullback-Leibler divergence # # compute conservation levels as the Kullback-Leibler divergence
computeConsKL<-function(mat){ # computeConsKL<-function(mat){
n = apply(mat,2,sum) # n = apply(mat,2,sum)
mat = t(t(mat)/n) # mat = t(t(mat)/n)
mat = mat + 0.000001 # mat = mat + 0.000001
bg = blosum62[,1][order(rownames(blosum62))] # bg = blosum62[,1][order(rownames(blosum62))]
return(apply(mat,2,f<-function(x){return(sum(x*log2(x/bg)))})) # return(apply(mat,2,f<-function(x){return(sum(x*log2(x/bg)))}))
} # }
computeSeqWeights<-function(mat,N,npos){ # computeSeqWeights<-function(mat,N,npos){
# compute the number of different observed amino acids in each column # # compute the number of different observed amino acids in each column
r = apply(mat,2,f<-function(x){return(length(unique(x)))}) # r = apply(mat,2,f<-function(x){return(length(unique(x)))})
# compute occurrence of each letter at each position # # compute occurrence of each letter at each position
occMat = computeNbSeqs(mat,TRUE) # occMat = computeNbSeqs(mat,TRUE)
#print(occMat) # #print(occMat)
# compute weights for each sequence # # compute weights for each sequence
weightMat=matrix(nr=N,nc=npos) # weightMat=matrix(nr=N,nc=npos)
for(k in 1:N){ # for(k in 1:N){
midx=cbind(mat[k,],1:npos) # midx=cbind(mat[k,],1:npos)
weightMat[k,] = 1 / (occMat[midx] * r) # weightMat[k,] = 1 / (occMat[midx] * r)
} # }
indObs = sum(r)/npos # indObs = sum(r)/npos
return(list(weightMat,indObs)) # return(list(weightMat,indObs))
} # }
computePseudoCounts<-function(freqmat,npos){ # computePseudoCounts<-function(freqmat,npos){
PC = matrix(nr=length(aa),nc=npos) # PC = matrix(nr=length(aa),nc=npos)
rownames(PC)= aa # rownames(PC)= aa
for(a in aa){ # for(a in aa){
PC[a,] = apply(freqmat,2,f<-function(x){return(sum(x/bg*bgp[a,]))}) # PC[a,] = apply(freqmat,2,f<-function(x){return(sum(x/bg*bgp[a,]))})
} # }
return(PC) # return(PC)
} # }
computePSSM<-function(mat,N,npos){ # computePSSM<-function(mat,N,npos){
# compute sequence weights # # compute sequence weights
res = computeSeqWeights(mat,N,npos) # res = computeSeqWeights(mat,N,npos)
weights = res[[1]] # weights = res[[1]]
indObs = res[[2]] # indObs = res[[2]]
# extend amino acid alphabet with gaps # # extend amino acid alphabet with gaps
aa=c(aa,"-") # aa=c(aa,"-")
# compute weighted occurrences # # compute weighted occurrences
occMat = matrix(0,nr=length(aa),nc=npos) # occMat = matrix(0,nr=length(aa),nc=npos)
rownames(occMat) = aa # rownames(occMat) = aa
for(i in 1:npos){ # for(i in 1:npos){
counts = tapply(weights[,i],mat[,i],sum) # counts = tapply(weights[,i],mat[,i],sum)
occMat[names(counts),i] = counts # occMat[names(counts),i] = counts
} # }
# distribute gaps # # distribute gaps
occMat = occMat[1:20,] + t(occMat["-",]%*%t(bg)) # occMat = occMat[1:20,] + t(occMat["-",]%*%t(bg))
# compute pseudo-counts # # compute pseudo-counts
PC = computePseudoCounts(occMat,npos) # PC = computePseudoCounts(occMat,npos)
# distribute pseudo-counts according to an empirical (?) weight # # distribute pseudo-counts according to an empirical (?) weight
occMat = (occMat*(indObs-1) + PC) /indObs # occMat = (occMat*(indObs-1) + PC) /indObs
# normalize to relative frequencies # # normalize to relative frequencies
n = apply(occMat,2,sum) # n = apply(occMat,2,sum)
freq = t(t(occMat)/n) # freq = t(t(occMat)/n)
# compute Shannon entropy # # compute Shannon entropy
SE = apply(freq,2,f<-function(x){return(-sum(x*log2(x)))}) # SE = apply(freq,2,f<-function(x){return(-sum(x*log2(x)))})
# compute Kullback-Leibler divergence # # compute Kullback-Leibler divergence
KL = apply(freq,2,f<-function(x){return(sum(x*log2(x/bg)))}) # KL = apply(freq,2,f<-function(x){return(sum(x*log2(x/bg)))})
# divide by bg freqs and convertto log-scores # # divide by bg freqs and convertto log-scores
pssm = 2 * log2(freq/bg) # pssm = 2 * log2(freq/bg)
return(list(SE,KL,pssm)) # return(list(SE,KL,pssm))
} # }
computePSSM2<-function(mat){ # computePSSM2<-function(mat){
occMat = computeNbSeqs(mat) # occMat = computeNbSeqs(mat)
n = apply(occMat,2,sum) # n = apply(occMat,2,sum)
pssm = t(t(occMat)/n) # pssm = t(t(occMat)/n)
pssm = pssm + 0.000001 # pssm = pssm + 0.000001
bg = blosum62[,1][order(rownames(blosum62))] # bg = blosum62[,1][order(rownames(blosum62))]
res = 2 * log2(pssm/bg) # res = 2 * log2(pssm/bg)
return(list(pssm,res)) # return(list(pssm,res))
} # }
computeNbSeqsAlph<-function(nbSeqs,alphabet){ computeNbSeqsAlph<-function(nbSeqs,alphabet){
path=paste(Sys.getenv("ESGEMME_PATH"),"/data/alphabets/",sep="") path=paste(Sys.getenv("ESGEMME_PATH"),"/data/alphabets/",sep="")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment