Commit 8be6ee36 by Mustafa Tekpinar

Corrected column names in full raw escott output.

Column names in raw escott full single point mutational files
(such as BLAT_normPred_evolCombi.txt,
BLAT_normPred_evolEpi.txt and BLAT_normPred_evolInd.txt)
were V1 V2.....Vn, where n is number of residues in the protein.
I replaced those V letters with correct single amino acid
identifiers.
parent c22a700a
# Copyright (c) 2018: Elodie Laine
# Copyright (c) 2022-2023: Mustafa Tekpinar
# This code is part of the gemme package and governed by its license.
# This code is part of the prescott package and governed by its license.
# Please see the LICENSE.txt file included as part of this package.
# Usage: Rscript --save computePred.R XXXX
......@@ -140,7 +140,23 @@ print(trace)
distTrace = binAli[2:N[1],] %*% trace^2
wt=read.fasta(paste(prot,".fasta",sep=""))
n = length(wt[[1]])
# # print(wt)
# for ( residue in wt){
# print(toupper(residue))
# }
# print(length(wt[[1]]))
myWTSequence <- wt[[1]]
#create empty list of length of the wt sequence
myColumnNames <- vector(mode='list', length=length(wt[[1]]))
# myColumnNames <- list()
for ( k in 1:length(wt[[1]])) {
# print(toupper())
myColumnNames[[k]] <-paste(toupper(myWTSequence[k]), k, sep = "")
}
# print(myColumnNames)
wt = wt[[1]][1:n]
#indicesQ = getIndicesAli(ali$seq,1)
......@@ -191,10 +207,10 @@ if(simple){
normPred=normalizePred(pred, trace, wt)
rownames(normPred)=aa
# output the normalized predicted mutational effects based on sequence counts (conservation at the bottom)
write.table(normPredInd,paste0(prot,"_normPred_evolInd.txt"))
write.table(normPredInd,paste0(prot,"_normPred_evolInd.txt"), col.names=myColumnNames)
# output the predicted normalized mutational effects based on evolutionary distance (conservation at the bottom)
write.table(normPred,paste0(prot,"_normPred_evolEpi.txt"))
write.table(normPred,paste0(prot,"_normPred_evolEpi.txt"), col.names=myColumnNames)
}else{
#Independent model normalization
normPredInd = normalizePredWithNbSeqsZeroSelMult(predInd, trace, wt, list(pos,subsaa))
......@@ -247,7 +263,7 @@ if(simple){
normPredCombi = normalizePredWithNbSeqsPC(pred,trace,wt,alpha,nbSeqs,alphabet)
rownames(normPredCombi)=aa
# output the predicted mutational effects based on sequence counts (conservation at the bottom)
write.table(normPredCombi,paste0(prot,"_normPred_evolCombi.txt"))
write.table(normPredCombi,paste0(prot,"_normPred_evolCombi.txt"), col.names=myColumnNames)
}else{
#normPredCombi = normalizePredWithNbSeqsPCSelMult(pred,trace,wt,list(pos,subsaa),alpha,nbSeqs[[1]],alphabet)
normPredCombi = normalizePredWithNbSeqsPCSelMult(pred,trace,wt,list(pos,subsaa),alpha,nbSeqs,alphabet)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment