Changed alpha[i]=0.6 values to a single value alpha=0.6.

Once upon a time, we tested the idea of changing the weight of epistatic (0.6) and independent (1-0.6) component depending on the residue features such as frequency of aa in the MSA. Therefore, I had made alpha a list instead of a single value. Now, I reverted it back to a single value (0.6).

Changed alpha[i]=0.6 values to a single value alpha=0.6.
Once upon a time, we tested the idea of changing the weight of epistatic (0.6) and independent (1-0.6) component depending on the residue features such as frequency of aa in the MSA. Therefore, I had made alpha a list instead of a single value. Now, I reverted it back to a single value (0.6).
00f40390 · Mustafa Tekpinar · 7db0ac49 · 00f40390 · 00f40390
Commit 00f40390 authored Aug 04, 2022 by Mustafa Tekpinar
Show whitespace changes
Inline Side-by-side

Showing with 65 additions and 10 deletions

computePred.R computePred.R +8 -8

pred.R pred.R +57 -2

No files found.
--- a/computePred.R
+++ b/computePred.R
@@ -355,14 +355,14 @@ for (row in 1:nrow(jet)) {
 #print(frequencies)
 freq_mean = mean(frequencies)
-#alpha = 0.6
+alpha = 0.6
-alpha = c()
+# alpha = c()
-#If we want to assign each aa the same weight of epistatic and independent 
+# #If we want to assign each aa the same weight of epistatic and independent 
-#contribution, that's the way to do it. 
+# #contribution, that's the way to do it. 
-for (row in 1:nrow(jet)) {
+# for (row in 1:nrow(jet)) {
-  alpha<-append(alpha, 0.6)
+#   alpha<-append(alpha, 0.6)
-}
+# }
 # alpha_val = 0.6
 # for (row in 1:nrow(jet)) {
 #   if(frequencies[row]>freq_mean){
@@ -383,7 +383,7 @@ if(simple){
 }else{
  #normPredCombi = normalizePredWithNbSeqsPCSelMult(pred,trace,wt,list(pos,subsaa),alpha,nbSeqs[[1]],alphabet)
  normPredCombi = normalizePredWithNbSeqsPCSelMult(pred,trace,wt,list(pos,subsaa),alpha,nbSeqs,alphabet)
-  names(normPredCombi)=rawMut
+  # names(normPredCombi)=rawMut
  names(normPredCombi)=rawMut
 }
 # output the predicted mutational effects based on sequence counts (conservation at the bottom)

--- a/pred.R
+++ b/pred.R
@@ -400,6 +400,61 @@ normalizePred<-function(pred, trace, wt){
 	return(-normPred)
 }
+# # Equation 2 in https://doi.org/10.1093/molbev/msz179
+# normalizePredWithNbSeqsPC<-function(pred, trace, wt, alpha, nbSeqs, alphabet){
+# 	n = length(trace)
+# 	normPred = matrix(nc=n,nr=20)
+# 	rownames(normPred) = aa
+# 	nbseqs = computeNbSeqsAlph(nbSeqs,alphabet)
+# 	maxRefVal = max(apply(nbseqs,2,sum))
+# 	# for each position in the sequence
+# 	for(i in 1:n){
+# 		normPred[,i] = pred[,i] / max(pred[!is.na(pred)]) * log(1/maxRefVal) * (-1)
+# 		normPred[is.na(normPred[,i]),i] = - log(1/maxRefVal)
+# 		# for each possible substitution
+# 		for(a in aa){
+# 			A = which.class(wt[i],alphabet)
+# 			B = which.class(a,alphabet)
+#       # I think this is the place I should modify. alpha will become alpha[i]
+# 			normPred[a,i] = alpha[i] * normPred[a,i] - (1-alpha[i]) * log(max(1,nbseqs[B,i])/nbseqs[A,i])
+# 		}
+# 		normPred[,i] = normPred[,i] * trace[i]
+# 		normPred[aa==wt[i],i] = NA
+# 	}
+# 	return(-normPred)
+# }
+# normalizePredWithNbSeqsPCSelMult<-function(pred, trace, wt, listMut, alpha, nbSeqs, alphabet){
+#   # check mutation list
+#   pos = listMut[[1]]
+#   let = listMut[[2]]
+#   N = length(pos)
+#   if(N!=length(let)){print("Problem!!!! not the same number of positions and aas !!")}
+#   normPred = rep(NA,N)
+#   allVal = unlist(pred)
+#   maxVal = max(allVal[!is.na(allVal)])
+#   nbseqs = computeNbSeqsAlph(nbSeqs,alphabet)
+#   maxRefVal = max(apply(nbseqs,2,sum))
+#   # for each position in the sequence
+#   for(i in 1:N){
+#     predTMP=c()
+#     myPos = pos[[i]]
+#     myAA = tolower(let[[i]])
+#     n = length(myPos)
+#     for(k in 1:n){
+# 	    A = which.class(wt[myPos[k]],alphabet)
+# 	    B = which.class(myAA[k],alphabet)
+# 	    ratio = log(max(1,nbseqs[B,myPos[k]])/nbseqs[A,myPos[k]])
+# 	    val = pred[myAA[k],myPos[k]] / maxVal * log(1/maxRefVal) * (-1)
+# 	    if(is.na(val)){val=- log(1/maxRefVal)}
+# 	    predTMP=c(predTMP, alpha[i] * val - (1-alpha[i]) * ratio)
+#     }
+#     normPred[i] = sum(predTMP * trace[myPos])
+#   }
+#   return(-normPred)
+# }
 # Equation 2 in https://doi.org/10.1093/molbev/msz179
 normalizePredWithNbSeqsPC<-function(pred, trace, wt, alpha, nbSeqs, alphabet){
@@ -418,7 +473,7 @@ normalizePredWithNbSeqsPC<-function(pred, trace, wt, alpha, nbSeqs, alphabet){
 			B = which.class(a,alphabet)
      # I think this is the place I should modify. alpha will become alpha[i]
-			normPred[a,i] = alpha[i] * normPred[a,i] - (1-alpha[i]) * log(max(1,nbseqs[B,i])/nbseqs[A,i])
+			normPred[a,i] = alpha * normPred[a,i] - (1-alpha) * log(max(1,nbseqs[B,i])/nbseqs[A,i])
 		}
 		normPred[,i] = normPred[,i] * trace[i]
 		normPred[aa==wt[i],i] = NA
@@ -449,7 +504,7 @@ normalizePredWithNbSeqsPCSelMult<-function(pred, trace, wt, listMut, alpha, nbSe
 	    ratio = log(max(1,nbseqs[B,myPos[k]])/nbseqs[A,myPos[k]])
 	    val = pred[myAA[k],myPos[k]] / maxVal * log(1/maxRefVal) * (-1)
 	    if(is.na(val)){val=- log(1/maxRefVal)}
-	    predTMP=c(predTMP, alpha[i] * val - (1-alpha[i]) * ratio)
+	    predTMP=c(predTMP, alpha * val - (1-alpha) * ratio)
    }
    normPred[i] = sum(predTMP * trace[myPos])
  }