Commit 4223d132 by Mustafa Tekpinar

Corrected a plotting bug for big proteins.

When a protein is bigger than 3276 aa's,
matplotlib can not plot it due to 2^16
horizontal pixel size limit. I changed
plotGEMMEmatrix function and the way I
call it to solve this problem. Now, the
program will divide big proteins into
500 aa chuncks and plot it in that way.
parent 898d9ad2
......@@ -582,9 +582,13 @@ def parseGEMMEoutput(inputFile, verbose):
# mutatedTransposed = mutationsData.T
# return mutatedTransposed
return mutationsData
def plotGEMMEmatrix(scanningMatrix, outFile, beg, end, \
colorMap = 'coolwarm', offSet=0, pixelType='square'):
colorMap = 'coolwarm', \
offSet=0, pixelType='square',
aaOrder="ACDEFGHIKLMNPQRSTVWY", \
sequence=None,\
interactive=False,\
isColorBarOn=False):
"""
A function to plot deep mutational scanning matrices.
......@@ -594,7 +598,16 @@ def plotGEMMEmatrix(scanningMatrix, outFile, beg, end, \
Data matrix to plot
outFile: string
Name of the output png image
Name of the output image without file extension.
Default file extension (png) is added by the program
beg: int
The first residue to use. It is used to select a subrange
of amino acids. It starts from 1.
end: int
The last residue to use. It is used to select a subrange
of amino acids.
colorMap: matplotlib cmap
Any colormap existing in matplotlib.
......@@ -609,6 +622,16 @@ def plotGEMMEmatrix(scanningMatrix, outFile, beg, end, \
It is a matter of taste but I added it as an option.
Default is 'square'
sequence: string
A fasta file of one letter amino acid codes from N terminal to C terminal.
interactive: bool
If True, it will plot the map interactively.
isColorBarOn: bool
If True, it will show a colorbar to show the numerical scale of
the colors. Default is False.
Returns
-------
Nothing
......@@ -619,16 +642,19 @@ def plotGEMMEmatrix(scanningMatrix, outFile, beg, end, \
if(end == None):
end = len(scanningMatrix[0])
# print("Beginning: "+str(beg))
# print("End : "+str(end))
print("Beginning: "+str(beg))
print("End : "+str(end))
#print(scanningMatrix)
#np.reshape(scanningMatrix.flatten(), (20, 286))
#print(len(scanningMatrix[0]))
# print(len(scanningMatrix[0]))
subMatrix = scanningMatrix[:, (beg-1):end]
#print(subMatrix)
##########################################################################
# Set plotting parameters
nres_shown = len(subMatrix[0])
# print(nres_shown)
fig_height=8
# figure proportions
fig_width = fig_height/2 # inches
......@@ -636,26 +662,26 @@ def plotGEMMEmatrix(scanningMatrix, outFile, beg, end, \
fig, ax = plt.subplots(figsize=(fig_width, fig_height))
if (nres_shown >=200):
if (nres_shown >150):
majorTics = 50
else:
majorTics = 25
majorTics = 20
major_nums_x = np.arange(majorTics, len(subMatrix[0]), majorTics, dtype=int)
major_nums_x = major_nums_x -1
#major_nums_x = major_nums_x -1
major_nums_x = np.insert(major_nums_x, 0, 0)
# print(major_nums_x)
#print(major_nums_x)
minor_nums_x = np.arange(10, len(subMatrix[0]), 10, dtype=int)
minor_nums_x = minor_nums_x - 1
#minor_nums_x = minor_nums_x - 1
minor_nums_x = np.insert(minor_nums_x, 0, 0)
# print(minor_nums_x)
#print(minor_nums_x)
major_labels_x = major_nums_x + 1 + offSet
major_nums_y = np.arange(0, 20, 1, dtype=int)
major_labels_y = alphabeticalAminoAcidsList
major_labels_y = list(aaOrder)
plt.xticks(major_nums_x, major_labels_x, size=28)
ax.set_xticks(minor_nums_x, minor=True)
......@@ -664,27 +690,49 @@ def plotGEMMEmatrix(scanningMatrix, outFile, beg, end, \
ax.set_yticklabels(major_labels_y, ha='left')
ax.tick_params(axis='y', which='major', pad=30)
#############################################################################
if(pixelType=='square'):
#For plotting square pixels
plt.imshow(subMatrix, cmap=colorMap)
img = plt.imshow(subMatrix, cmap=colorMap)
elif(pixelType=='rectangle'):
#For plotting rectangular pixels
plt.imshow(subMatrix, cmap=colorMap, aspect=3.0)
img = plt.imshow(subMatrix, cmap=colorMap, aspect=3.0)
else:
print("\nERROR: Unknown pixelType specified!\n")
sys.exit(-1)
#plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
#To make the colors consistent if there are submatrices.
plt.clim(np.min(scanningMatrix), np.max(scanningMatrix))
#plt.clim([-5.0, 5.0])
if(sequence!=None):
mySeqFile = SeqIO.read(sequence, 'fasta')
#print(mySeqFile)
#Convert aaOrder to a list.
aaOrderList = list(aaOrder)
for i in range (len(subMatrix[0])):
j = beg-1+i
# print(i, aaOrderList.index(sequence[i]))
plt.scatter(i, aaOrderList.index(mySeqFile.seq[j].upper()), s=5, c='black', marker='o')
if(isColorBarOn):
from mpl_toolkits.axes_grid1 import make_axes_locatable
divider = make_axes_locatable(ax)
# print("Here I am!")
# print(nres_shown)
cax = divider.append_axes("right", size=fig_width/nres_shown, pad=0.2)
# plt.clim([-10.0, 2.0])
plt.colorbar(img, cax=cax)
plt.tight_layout()
plt.savefig(outFile)
#plt.show()
plt.savefig(outFile+".png")
if(interactive):
plt.show()
#plt.imsave('output.png', subMatrix)
plt.close()
def check_argument_groups(parser, arg_dict, group, argument):
"""
Check for use of arguments.
......@@ -873,6 +921,16 @@ def parse_command_line():
"score files as well combined combined score file.",
required=False, default=False)
parser.add_argument('--offset', dest='offset', type=int, \
help="This argument is used when printing the output."+\
"If you start from amino acid 50 of the original gene,"+\
"specify --offset 49 to make the numbering start from 50.",
required=False, default=0)
parser.add_argument('--colormap', dest='colormap', type=str, \
help="Select a colormap from standard matplotlib colormaps",
required=False, default='Oranges_r')
args = parser.parse_args()
arg_dict = vars(args)
......@@ -1050,20 +1108,76 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
#Check if the normalized data files were created.
if(os.path.exists(prot+"_normPred_evolEpi.txt") and args.verbose==True):
gemmeData = parseGEMMEoutput(prot+"_normPred_evolEpi.txt", verbose=False)
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolEpi.png", 1, None,\
colorMap='Blues_r', offSet=0, pixelType='square')
sequenceLength = len(gemmeData[0])
beginning = 1
end = sequenceLength
if(sequenceLength>500):
rowLength = 500
numberOfImageChunks = int(int(sequenceLength)/int(rowLength))
for i in range(numberOfImageChunks):
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolEpi"+"_part_"+str(i+1), \
i*rowLength + beginning, \
(i+1)*rowLength + beginning -1,\
colorMap='Blues_r', offSet=i*rowLength + args.offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True)
if(sequenceLength%rowLength != 0):
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolEpi"+"_part_"+str(i+2), \
(i+1)*rowLength + beginning, \
end,\
colorMap='Blues_r', offSet=(i+1)*rowLength + args.offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True)
# plotGEMMEmatrix(gemmeData, prot+"_normPred_evolEpi.png", 1, None,\
# colorMap='Blues_r', offSet=0, pixelType='square')
#Check if the normalized data files were created.
if(os.path.exists(prot+"_normPred_evolInd.txt") and args.verbose==True):
gemmeData = parseGEMMEoutput(prot+"_normPred_evolInd.txt", verbose=False)
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolInd.png", 1, None,\
colorMap='Greens_r', offSet=0, pixelType='square')
sequenceLength = len(gemmeData[0])
beginning = 1
end = sequenceLength
if(sequenceLength>500):
rowLength = 500
numberOfImageChunks = int(int(sequenceLength)/int(rowLength))
for i in range(numberOfImageChunks):
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolInd"+"_part_"+str(i+1), \
i*rowLength + beginning, \
(i+1)*rowLength + beginning -1,\
colorMap='Greens_r', offSet=i*rowLength + args.offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True)
if(sequenceLength%rowLength != 0):
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolInd"+"_part_"+str(i+2), \
(i+1)*rowLength + beginning, \
end,\
colorMap='Greens_r', offSet=(i+1)*rowLength + args.offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True)
# plotGEMMEmatrix(gemmeData, prot+"_normPred_evolInd.png", 1, None,\
# colorMap='Greens_r', offSet=0, pixelType='square')
#Check if the normalized data files were created.
if(os.path.exists(prot+"_normPred_evolCombi.txt")):
gemmeData = parseGEMMEoutput(prot+"_normPred_evolCombi.txt", verbose=False)
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolCombi.png", 1, None,\
colorMap='Oranges_r', offSet=0, pixelType='square')
gemmeData = parseGEMMEoutput(prot+"_normPred_evolCombi.txt", verbose=True)
sequenceLength = len(gemmeData[0])
beginning = 1
end = sequenceLength
if(sequenceLength>500):
rowLength = 500
numberOfImageChunks = int(int(sequenceLength)/int(rowLength))
for i in range(numberOfImageChunks):
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolCombi"+"_part_"+str(i+1), \
i*rowLength + beginning, \
(i+1)*rowLength + beginning -1,\
colorMap=args.colormap, offSet=i*rowLength + args.offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True)
if(sequenceLength%rowLength != 0):
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolCombi"+"_part_"+str(i+2), \
(i+1)*rowLength + beginning, \
end,\
colorMap=args.colormap, offSet=(i+1)*rowLength + args.offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True)
# plotGEMMEmatrix(gemmeData, prot+"_normPred_evolCombi.png", 1, None,\
# colorMap='Oranges_r', offSet=0, pixelType='square')
#Convert standard combined output to a transposed format to increase
#legibility.
......@@ -1074,7 +1188,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
aaAndPosition = []
oldNamesList = gemmeDF.columns.tolist()
for i in range(len(list(seq))):
aaAndPosition.append(list(seq)[i]+str(i+1))
aaAndPosition.append(list(seq)[i]+str(i+1+args.offset))
# gemmeDFtrans = pd.DataFrame(gemmeDFtrans, index=aaAndPosition)
gemmeDFtrans.rename(index = dict(map(lambda i,j : (i,j) , oldNamesList,aaAndPosition)), inplace=True)
# gemmeDFtrans.set_axis(aaAndPosition)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment