Commit 54a3c39a by Mustafa Tekpinar

Moved extractQuerySeq from gemmeAnal.py to sgemme.py.

parent faafebd8
...@@ -133,7 +133,9 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl): ...@@ -133,7 +133,9 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
chainID = chains[0] chainID = chains[0]
#TODO: Remove Bash dependency here. Make the copying process in Python
subprocess.call("cp $SGEMME_PATH/default.conf .",shell=True) subprocess.call("cp $SGEMME_PATH/default.conf .",shell=True)
if retMet=="input": if retMet=="input":
if bFile!='': if bFile!='':
#TODO: I think these two lines must be here as well but I am not sure. #TODO: I think these two lines must be here as well but I am not sure.
......
...@@ -22,6 +22,30 @@ from gemmeAnal import * ...@@ -22,6 +22,30 @@ from gemmeAnal import *
import pandas as pd import pandas as pd
def extractQuerySeq(filename):
"""
# Extract the query sequence from the input alignment
"""
fIN = open(filename,"r")
lines = fIN.readlines()
fIN.close()
if lines[0][0]!=">":
raise Exception('bad FASTA format')
else:
prot = re.compile("[^A-Z0-9a-z]").split(lines[0][1:])[0]
fOUT = open(prot+".fasta","w")
fOUT.write(">"+prot+"\n")
seq=""
i = 1
while lines[i][0]!=">":
seq = seq + lines[i].strip().strip(".").strip("-")
fOUT.write(lines[i])
i = i + 1
fOUT.close()
return prot,seq,i-1
def rankSortProteinData(dataArray, inverted=True): def rankSortProteinData(dataArray, inverted=True):
""" """
This function ranksorts protein data and converts it This function ranksorts protein data and converts it
......
>BLAT
HPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVL
LCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSA
AITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPN
DERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSAL
PAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNR
QIAEIGASLIKHW
This source diff could not be displayed because it is too large. You can view the blob instead.
*****************************************
>BLAT:A
size 263 size of the sequence
numAli 43 Number of sequences in alignment
numMulti 43 Number of multiple alignment
partition 498;498;456;446 partition of sequences after PSI-BLAST+filtering
*****************************************
>SequenceRetrieving
method input change this parameter with -r option of JET. See usage of JET to obtain description of this parameter
format fasta fasta: fasta input file, fasta: fasta input file
*****************************************
>QBlast
eValue 1.0E-5 psiblast maximum expected value threshold
results 20000 maximum number of results
url http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi BlastQ server URL
database nr database used
matrix blosum62 matrix used to fetch homologs
gap_existence 11 BLOSUM62=11, PAM30=9, BLOSUM45=15, PAM70=BLOSUM80=10
gap_extension 1 BLOSUM62=1, PAM30=1, BLOSUM45=2, PAM70=BLOSUM80=1
max_iter 3 number of iteration for psi-blast
****************************************
>PDB
url http://www.rcsb.org/pdb/downloadFile.do URL of PDB server
*****************************************
>Filter
min_identity 0.20 min sequence identity
max_identity 0.98 max sequence identity
*****************************************
>Sample
length_cutoff 0.8 minimum sequence length expressed in number of residues
*****************************************
>Software
clustalW /usr/local/bin/clustalw2 clustalW system dependent command
muscle /usr/bin/muscle muscle system dependent command
naccess /home/tekpinar/research/carbone-lab-software/naccess2.1.1/naccess naccess system dependent command
psiblast /usr/bin/psiblast psiblast system dependent command
*****************************************
>Data
substMatrix /home/tekpinar/research/carbone-lab-software/JET2/matrix directory location of matrices used in JET (Blosum62, gonnet and hsdm)
blastDatabases /opt/blastdb directory location of databases used for local blast (nr{0-7})
*****************************************
>ET
coverage 0.95 maximum coverage percentage of trace
freq_cutoff 0.0 minimum frequency of trace residue
msaNumber -1 number of alignments (trees), -1 for JET computting
seqNumber -1 number of sequences in alignments, -1 for JET computting
*****************************************
>Access
probe_radius 1.4 radius of probe used for accessible surface detection
res_cutoff 0.05 minimum percentage accessible surface of a residu
atom_cutoff 0.01 minimum accessible surface of an atom
accessType chain change this parameter with -d option of JET. See usage of JET to obtain description of this parameter
*****************************************
>CV
max_dist 20.0 max distance
*****************************************
>Interface
cutoff 0 minimum percentage accessible surface variation of an interface residu
ligand no (yes|no) keep contact of ligand (SUBSTRATE, PRODUCT and COFACTOR of database ENZYME) to compute interface of protein
enzymeCpd /home/tekpinar/research/carbone-lab-software/JET2/jet/data/enzyme.txt location of file containing database ENZYME
homologousPDB no (yes|no) add interface residues of homologous structures (find in pdb database clustered at 95% of identities) to interface of protein
clusteredPDB /home/tekpinar/research/carbone-lab-software/JET2/jet/data/clusters95.txt location of pdb database clustered at 95% of identities
*****************************************
>Cluster
max_dist 5.0 max distance between atoms to aggregate
analysis 2 change this parameter with -a option of JET. See usage of JET to obtain description of this parameter
namePcCol pc name of the column in results file containing the phisical-chemical score of residues (do not change this parameter)
namePcLCol pcL name of the column in results file containing the residues propensities to be found at prot-lig interfaces (do not change this parameter)
nameTraceCol trace name of the column in results file containing the conservation score of residues (do not change this parameter)
coverage -1 change this parameter with -s option of JET. See usage of JET to obtain description of this parameter
#!/bin/bash #!/bin/bash
python $GEMME_PATH/gemme.py aliBLAT.fasta -r input -f aliBLAT.fasta python $SGEMME_PATH/sgemme.py ../example/aliBLAT.fasta -r input -f ../example/aliBLAT.fasta
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment