Commit 00f40390 by Mustafa Tekpinar

Changed alpha[i]=0.6 values to a single value alpha=0.6.

Once upon a time, we tested the idea of changing the weight
of epistatic (0.6) and independent (1-0.6) component depending
on the residue features such as frequency of aa in the MSA.
Therefore, I had made alpha a list instead of a single value.
Now, I reverted it back to a single value (0.6).
parent 7db0ac49
...@@ -355,14 +355,14 @@ for (row in 1:nrow(jet)) { ...@@ -355,14 +355,14 @@ for (row in 1:nrow(jet)) {
#print(frequencies) #print(frequencies)
freq_mean = mean(frequencies) freq_mean = mean(frequencies)
#alpha = 0.6 alpha = 0.6
alpha = c() # alpha = c()
#If we want to assign each aa the same weight of epistatic and independent # #If we want to assign each aa the same weight of epistatic and independent
#contribution, that's the way to do it. # #contribution, that's the way to do it.
for (row in 1:nrow(jet)) { # for (row in 1:nrow(jet)) {
alpha<-append(alpha, 0.6) # alpha<-append(alpha, 0.6)
} # }
# alpha_val = 0.6 # alpha_val = 0.6
# for (row in 1:nrow(jet)) { # for (row in 1:nrow(jet)) {
# if(frequencies[row]>freq_mean){ # if(frequencies[row]>freq_mean){
...@@ -383,7 +383,7 @@ if(simple){ ...@@ -383,7 +383,7 @@ if(simple){
}else{ }else{
#normPredCombi = normalizePredWithNbSeqsPCSelMult(pred,trace,wt,list(pos,subsaa),alpha,nbSeqs[[1]],alphabet) #normPredCombi = normalizePredWithNbSeqsPCSelMult(pred,trace,wt,list(pos,subsaa),alpha,nbSeqs[[1]],alphabet)
normPredCombi = normalizePredWithNbSeqsPCSelMult(pred,trace,wt,list(pos,subsaa),alpha,nbSeqs,alphabet) normPredCombi = normalizePredWithNbSeqsPCSelMult(pred,trace,wt,list(pos,subsaa),alpha,nbSeqs,alphabet)
names(normPredCombi)=rawMut # names(normPredCombi)=rawMut
names(normPredCombi)=rawMut names(normPredCombi)=rawMut
} }
# output the predicted mutational effects based on sequence counts (conservation at the bottom) # output the predicted mutational effects based on sequence counts (conservation at the bottom)
......
...@@ -400,6 +400,61 @@ normalizePred<-function(pred, trace, wt){ ...@@ -400,6 +400,61 @@ normalizePred<-function(pred, trace, wt){
return(-normPred) return(-normPred)
} }
# # Equation 2 in https://doi.org/10.1093/molbev/msz179
# normalizePredWithNbSeqsPC<-function(pred, trace, wt, alpha, nbSeqs, alphabet){
# n = length(trace)
# normPred = matrix(nc=n,nr=20)
# rownames(normPred) = aa
# nbseqs = computeNbSeqsAlph(nbSeqs,alphabet)
# maxRefVal = max(apply(nbseqs,2,sum))
# # for each position in the sequence
# for(i in 1:n){
# normPred[,i] = pred[,i] / max(pred[!is.na(pred)]) * log(1/maxRefVal) * (-1)
# normPred[is.na(normPred[,i]),i] = - log(1/maxRefVal)
# # for each possible substitution
# for(a in aa){
# A = which.class(wt[i],alphabet)
# B = which.class(a,alphabet)
# # I think this is the place I should modify. alpha will become alpha[i]
# normPred[a,i] = alpha[i] * normPred[a,i] - (1-alpha[i]) * log(max(1,nbseqs[B,i])/nbseqs[A,i])
# }
# normPred[,i] = normPred[,i] * trace[i]
# normPred[aa==wt[i],i] = NA
# }
# return(-normPred)
# }
# normalizePredWithNbSeqsPCSelMult<-function(pred, trace, wt, listMut, alpha, nbSeqs, alphabet){
# # check mutation list
# pos = listMut[[1]]
# let = listMut[[2]]
# N = length(pos)
# if(N!=length(let)){print("Problem!!!! not the same number of positions and aas !!")}
# normPred = rep(NA,N)
# allVal = unlist(pred)
# maxVal = max(allVal[!is.na(allVal)])
# nbseqs = computeNbSeqsAlph(nbSeqs,alphabet)
# maxRefVal = max(apply(nbseqs,2,sum))
# # for each position in the sequence
# for(i in 1:N){
# predTMP=c()
# myPos = pos[[i]]
# myAA = tolower(let[[i]])
# n = length(myPos)
# for(k in 1:n){
# A = which.class(wt[myPos[k]],alphabet)
# B = which.class(myAA[k],alphabet)
# ratio = log(max(1,nbseqs[B,myPos[k]])/nbseqs[A,myPos[k]])
# val = pred[myAA[k],myPos[k]] / maxVal * log(1/maxRefVal) * (-1)
# if(is.na(val)){val=- log(1/maxRefVal)}
# predTMP=c(predTMP, alpha[i] * val - (1-alpha[i]) * ratio)
# }
# normPred[i] = sum(predTMP * trace[myPos])
# }
# return(-normPred)
# }
# Equation 2 in https://doi.org/10.1093/molbev/msz179 # Equation 2 in https://doi.org/10.1093/molbev/msz179
normalizePredWithNbSeqsPC<-function(pred, trace, wt, alpha, nbSeqs, alphabet){ normalizePredWithNbSeqsPC<-function(pred, trace, wt, alpha, nbSeqs, alphabet){
...@@ -418,7 +473,7 @@ normalizePredWithNbSeqsPC<-function(pred, trace, wt, alpha, nbSeqs, alphabet){ ...@@ -418,7 +473,7 @@ normalizePredWithNbSeqsPC<-function(pred, trace, wt, alpha, nbSeqs, alphabet){
B = which.class(a,alphabet) B = which.class(a,alphabet)
# I think this is the place I should modify. alpha will become alpha[i] # I think this is the place I should modify. alpha will become alpha[i]
normPred[a,i] = alpha[i] * normPred[a,i] - (1-alpha[i]) * log(max(1,nbseqs[B,i])/nbseqs[A,i]) normPred[a,i] = alpha * normPred[a,i] - (1-alpha) * log(max(1,nbseqs[B,i])/nbseqs[A,i])
} }
normPred[,i] = normPred[,i] * trace[i] normPred[,i] = normPred[,i] * trace[i]
normPred[aa==wt[i],i] = NA normPred[aa==wt[i],i] = NA
...@@ -449,7 +504,7 @@ normalizePredWithNbSeqsPCSelMult<-function(pred, trace, wt, listMut, alpha, nbSe ...@@ -449,7 +504,7 @@ normalizePredWithNbSeqsPCSelMult<-function(pred, trace, wt, listMut, alpha, nbSe
ratio = log(max(1,nbseqs[B,myPos[k]])/nbseqs[A,myPos[k]]) ratio = log(max(1,nbseqs[B,myPos[k]])/nbseqs[A,myPos[k]])
val = pred[myAA[k],myPos[k]] / maxVal * log(1/maxRefVal) * (-1) val = pred[myAA[k],myPos[k]] / maxVal * log(1/maxRefVal) * (-1)
if(is.na(val)){val=- log(1/maxRefVal)} if(is.na(val)){val=- log(1/maxRefVal)}
predTMP=c(predTMP, alpha[i] * val - (1-alpha[i]) * ratio) predTMP=c(predTMP, alpha * val - (1-alpha) * ratio)
} }
normPred[i] = sum(predTMP * trace[myPos]) normPred[i] = sum(predTMP * trace[myPos])
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment