Commit 24e2a357 by DLA-Ranker

Updates

parent 26d5e924
<component name="ProjectCodeStyleConfiguration">
<state>
<option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
</state>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/venv" />
</content>
<orderEntry type="jdk" jdkName="Python 3.5" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="projectConfiguration" value="Twisted Trial" />
<option name="PROJECT_TEST_RUNNER" value="Twisted Trial" />
</component>
</module>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPep8Inspection" enabled="false" level="WEAK WARNING" enabled_by_default="false">
<option name="ignoredErrors">
<list>
<option value="E111" />
<option value="E114" />
</list>
</option>
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/deepScoring.iml" filepath="$PROJECT_DIR$/.idea/deepScoring.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
</component>
</project>
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
python3 train_model_v2.py -c ~/save/conv_params/conv0.pkl --steps 20000 -t _95_2 --restore ~/save/tests_cluster/Model_95_1/
python3 evaluation.py -s gdt -d ~/data/CASP12_stage1/ -m ~/Temp/Model_95_2 -n _12stage_1
python3 evaluation.py -s cad -d ~/data/CASP12_stage1/ -m ~/Temp/Model_95_2 -n _12stage_1_cad
python3 evaluation.py -s gdt -d ~/data/CASP12_stage2/ -m ~/Temp/Model_95_2 -n _12stage_2
python3 evaluation.py -s cad -d ~/data/CASP12_stage2/ -m ~/Temp/Model_95_2 -n _12stage_2_cad
import numpy as np
import matplotlib.pyplot as plt
import os
import argparse
# Path to the directory where the losses files are located
directory_path = '/home/benoitch/save/tests_cluster/'
files = sorted(os.listdir(directory_path))
losses = []
indix = []
FLAGS = None
line_size = 0.9
def get_list():
"""
format the FLAGS.tests
:return: list of int, list of the test ids to plot
"""
ids = []
for l in FLAGS.tests:
id = ''
for i in l:
id += i
ids.append(int(id))
return ids
def recover_loss(s):
"""
Exctract the loss, from a smoothed loss list
:param s: 1D List containing smooth loss : list created during training of the model
:return: List containing the raw loss
"""
alpha = 0.999
l = np.zeros(s.size)
l[0] = s[0]/1000
for n in range(l.size):
if n == 0:
continue
l[n] = (1/(1-alpha))*((s[n]*(1-alpha**n)/1000) - (alpha*s[n-1]*(1-alpha**(n-1))/1000))
return l
def smooth(l):
"""
Smooth the loss
:param l: 1D list of the raw loss values
:return: 1D list of the smooth loss
"""
s = np.zeros(l.size)
slid = 0
decay = 0.999
dln = 1
for i in range(l.size):
dln = dln*decay
slid = decay*slid + (1-decay)*l[i]
s[i] = 1000* slid / (1 - dln)
return s
def main(ids):
for f in files:
f = directory_path + f
# This two conditions are used to plot reference loss (simply prediciting the mean of what it has seen so far)
# (The files containing the reference loss are located in benoitch/save/tests_cluster/ and are called gt_losses.npy and gt_x_losses.npy)
if f.split('/')[-1][:4] == 'gt_l':
f__ = open(f, 'rb')
loss_gt = np.load(f__)
elif f.split('/')[-1][:4] == 'gt_x':
f__ = open(f, 'rb')
x_gt = np.load(f__)
# then collecting the ids of the tests in the directory
elif f[-3:] == 'npy':
f__ = open(f, 'rb')
loss = np.load(f__)
losses.append(loss)
# get the id of the file's origin network id is an int
f = f[:-4]
indix.append(int(f.split('/')[-1].split('_')[1])) # the id needs to be the 7th char of the file's name
number = -1
for i,l in enumerate(losses):
# need to deal with losses from a same network training splited in two files
if not indix[i] in ids:
continue
if number == -1:
number = indix[i]
y = recover_loss(l)
x = range(l.size)
elif indix[i] == number:
y = np.append(y,recover_loss(l))
x = np.append(x, [x[-1] + n for n in range(l.size)])
else:
s = smooth(y)
plt.plot(x,s, alpha = 0.8, linewidth=line_size)
number = indix[i]
y = recover_loss(l)
x = range(l.size)
s = smooth(y)
plt.plot(x,s, alpha = 0.8, linewidth=line_size)
plt.plot(x_gt,loss_gt, alpha = 0.5, color='grey', linewidth=line_size)
plt.show()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'-t',
'--tests',
type=list,
required=True,
nargs='+',
help='List of tests ids to plot'
)
FLAGS = parser.parse_args()
ids = get_list()
main(ids)
import os
import shutil
class load_config:
def __init__(self):
# Must be in the same folder as three_dl
# loads the configuration from config file
f = open(os.path.join(os.path.dirname(os.path.abspath(__file__)),"config"), 'r')
config = f.read()
f.close()
config = config.split('\n')
for line in config:
if line != '' and line[0] != '#':
[name,var] = line.split('=')
name, var = name.replace(' ', ''), var.replace(' ', '')
self.__dict__[name] = var
# flushing the tensorboard repository
folder = self.TENSORBOARD_PATH
for the_file in os.listdir(folder):
file_path = os.path.join(folder, the_file)
try:
if os.path.isfile(file_path):
os.unlink(file_path)
except Exception as e:
print(e)
Ornate is a method for protein quality assessment.
To install Ornate:
-Install Python 3.5 or later
-Install Tensorflow (preferably with GPU support) from https://www.tensorflow.org/install
-Uncomment the line corresponding to your operating system in the "config" file
To run Ornate:
You can score one structure :
python score.py -s path/to/my_structure.pdb
or all structures in a directory :
python score.py -d path/to/my_directory
To interpret output:
The output in generally composed of multiple lines, each line correspond to one residue.
A typical line output is :
RES 107 L 0.4041
which means that the residue 107 from the input is a leucine and its score is 0.4041.
A high score should correspond to a well folded residue.
Feel free to modify the script score.py to match your needs
\ No newline at end of file
import config
import subprocess
import os
import numpy as np
import scipy.stats
from scipy.stats.stats import pearsonr
import random
import math
import tensorflow as tf
import load_data
import model
import argparse
from glob import glob
def getAverageScoreSco(filename) :
scoreList=[]
#print(filename)
with open(filename) as fp:
lines = fp.readlines()
for l in lines :
#print(l)
for k in l.split(" "):
#print(k)
try:
scoreList.append(float(k))
except ValueError:
continue
if scoreList == [] :
return -1
return np.average(scoreList)
def getAverageScoreOut(filename) :
scoreList=[]
#print(filename)
with open(filename) as fp:
lines = fp.readlines()
for l in lines :
#print(l)#print(k)
try:
scoreList.append(float(l.split(" ")[4]))
except ValueError:
continue
if scoreList == [] :
return -1
return np.average(scoreList)
def getAverageScore(filename) :
scoreList=[]
#print(filename)
with open(filename) as fp:
lines = fp.readlines()
for l in lines :
#print(l)
#print(k)
try:
scoreList.append(float(l[10:]))
except ValueError:
continue
if scoreList == [] :
return -1
return np.average(scoreList)
def getDictScore(filename) :
scoreDict={}
#print(filename)
with open(filename) as fp:
lines = fp.readlines()
for l in lines :
#print(l)
#print(k)
try:
resId = int(l[3:8])
score= float(l[10:])
scoreDict[resId] = score
except ValueError:
continue
return scoreDict
def getDictScoreSco(filename) :
scoreDict={}
#print(filename)
with open(filename) as fp:
lines = fp.readlines()
for l in lines :
#print(l)
#print(k)
try:
resId = int(l.split("r<")[1].split(">")[0])
score= float(l.split(" ")[-1])
scoreDict[resId] = score
except ValueError:
continue
return scoreDict
def main():
gtfiles = glob(FLAGS.directory + '/**/*'+FLAGS.suffix1, recursive=True)
scoresGT = []
scoresRes = []
diffScores = []
for f in gtfiles :
resFile = f[:-len(FLAGS.suffix1)]+FLAGS.suffix2
if os.path.exists(resFile) :
#scoreGT = getAverageScoreOut(f)
#scoreRes = getAverageScore(resFile)
scoreSco = getDictScoreSco(f)
scoreRes = getDictScore(resFile)
for key, value in scoreSco.items():
if key in scoreRes :
scoresGT.append(scoreSco[key])
scoresRes.append(scoreRes[key])
diffScores.append((scoreSco[key] - scoreRes[key])*(scoreSco[key] - scoreRes[key]))
#print(f)
#print(scoreGT)
#print(scoreRes)
#if scoresGT == [] :
# return
print(len(scoresGT))
print(pearsonr(scoresGT, scoresRes))
print(np.mean(diffScores)*1000)
print(1000*(np.std(diffScores))/math.sqrt(len(diffScores)))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'-d',
'--directory',
type=str,
help='Path to the validation data'
)
parser.add_argument(
'-s',
'--suffix1',
default=".cad",
type=str,
help='suffix to add to gt file'
)
parser.add_argument(
'-f',
'--suffix2',
default=".orn",
type=str,
help='suffix to add to result file'
)
FLAGS = parser.parse_args()
main()
# Config file for Ornate scoring function
#Please uncomment the line corresponding to your system
MAP_GENERATOR_PATH = bin/Linux/maps_generator
#MAP_GENERATOR_PATH = bin/Windows/maps_generator.exe
#MAP_GENERATOR_PATH = bin/MacOs/maps_generator
# Place to store temporary files
TEMP_PATH = /tmp/
# Place where the pre-trained model is located
MODEL_PATH = model/model.ckpt
import os
import shutil
class load_config:
def __init__(self):
# Must be in the same folder as three_dl
# loads the configuration from config file
f = open(os.path.join(os.path.dirname(os.path.abspath(__file__)),"config"), 'r')
config = f.read()
f.close()
config = config.split('\n')
for line in config:
if line != '' and line[0] != '#':
[name,var] = line.split('=')
name, var = name.replace(' ', ''), var.replace(' ', '')
self.__dict__[name] = var
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import gzip
import numpy
from six.moves import xrange
from tensorflow.contrib.learn.python.learn.datasets import base
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import random_seed
import tensorflow as tf
def _read32(bytestream):
dt = numpy.dtype(numpy.uint32)
return numpy.frombuffer(bytestream.read(4), dtype=dt)[0]
def check_dims(f, gridSize, nbDim):
print('Check dimensions ', f.name, flush = True)
with f as bytestream:
headerSize = _read32(bytestream)
magic = _read32(bytestream)
if magic != 7919:
raise ValueError('Invalid magic number %d in maps file: %s' %
(magic, f.name))
rows = _read32(bytestream)
cols = _read32(bytestream)
lays = _read32(bytestream)
assert(rows == gridSize)
assert(cols == gridSize)
assert(lays == gridSize)
chan = _read32(bytestream)
assert(chan == nbDim)
def extract_maps(f):
#print('Extracting', f.name, flush = True)
with f as bytestream:
headerSize = _read32(bytestream)
magic = _read32(bytestream)
if magic != 7919:
raise ValueError('Invalid magic number %d in maps file: %s' %
(magic, f.name))
rows = _read32(bytestream)
#print("rows "+str(rows))
cols = _read32(bytestream)
#print("cols "+str(cols))
lays = _read32(bytestream)
#print("lays "+str(lays))
chan = _read32(bytestream)
#print("chan "+str(chan))
metaSize = _read32(bytestream)
#print("metaSize "+str(metaSize))
num_maps = _read32(bytestream)
#print("num_maps "+str(num_maps))
header_end = bytestream.read(headerSize - 4*8)
if num_maps<=0 :
return None,None
size = int(rows) * int(cols) * int(lays) * int(chan) * int(num_maps)
size += int(metaSize) * int(num_maps)
try :
buf = bytestream.read(size)
except OverflowError :
return None, None
data = numpy.frombuffer(buf, dtype=numpy.uint8)
data = data.reshape(num_maps, -1)
meta = numpy.ascontiguousarray(data[:, -int(metaSize):]).view(dtype=numpy.int32)
data = data[:,:-int(metaSize)]
return data , meta
class DataSet(object):
def __init__(self,
maps,
meta,
dtype=dtypes.float32,
seed=None,
prop = 1,
shuffle = False):
# prop means the percentage of maps from the data that are put in the dataset, useful to make the dataset lighter
# when doing that shuffle is useful to take different residue each time
seed1, seed2 = random_seed.get_seed(seed)
numpy.random.seed(seed1 if seed is None else seed2)
dtype = dtypes.as_dtype(dtype).base_dtype
if dtype not in (dtypes.uint8, dtypes.float32, dtypes.float16):
raise TypeError('Invalid map dtype %r, expected uint8 or float32 or float16' %
dtype)
if dtype == dtypes.float32:
maps = maps.astype(numpy.float32)
numpy.multiply(maps, 1.0 / 255.0, out = maps)
if dtype == dtypes.float16:
maps = maps.astype(numpy.float16)
numpy.multiply(maps, 1.0 / 255.0, out = maps)
if shuffle:
perm0 = numpy.arange(maps.shape[0])[:int(maps.shape[0]*prop)]
self._maps = maps[perm0]
self._meta = meta[perm0]
else:
self._maps = maps
self._meta = meta
self._epochs_completed = 0
self._index_in_epoch = 0
self._num_res = self._maps.shape[0]
@property
def maps(self):
return self._maps
@property
def meta(self):
return self._meta
@property
def num_res(self):
return self._num_res
@property
def epochs_completed(self):
return self._epochs_completed
def next_batch(self, batch_size, shuffle=True, select_residue = -1):
"""Return the next `batch_size` examples from this data set."""
# Select residue is not used anymore, just kept for compatibility purposes
start = self._index_in_epoch
# Shuffle for the first epoch
if self._epochs_completed == 0 and start == 0 and shuffle:
perm0 = numpy.arange(self._num_res)
numpy.random.shuffle(perm0)
self._maps = self.maps[perm0]
self._meta = self._meta[perm0] # Go to the next epoch
if start + batch_size > self._num_res:
# Finished epoch
self._epochs_completed += 1
# Get the rest examples in this epoch
rest_num_examples = self._num_res - start
maps_rest_part = self._maps[start:self._num_res]
meta_rest_part = self._meta[start:self._num_res]
# Shuffle the data
if shuffle:
perm = numpy.arange(self._num_res)
numpy.random.shuffle(perm)
self._maps = self.maps[perm]
self._meta = self.meta[perm]
# Start next epoch
start = 0
self._index_in_epoch = batch_size - rest_num_examples
end = self._index_in_epoch
maps_new_part = self._maps[start:end]
meta_new_part = self._meta[start:end]
return numpy.concatenate((maps_rest_part, maps_new_part), axis=0) , numpy.concatenate((meta_rest_part, meta_new_part), axis=0)
else:
self._index_in_epoch += batch_size
end = self._index_in_epoch
return self._maps[start:end], self._meta[start:end]
def append(self, dataSet_):
self._maps = numpy.concatenate((self._maps, dataSet_._maps))
self._meta = numpy.concatenate((self._meta, dataSet_._meta))
self._num_res += dataSet_._num_res
def is_res(self, index, res_code):
if index < self._num_res :
if self._meta[index, 1] == res_code:
return True
else:
print('index = num_res')
return False
def find_next_res(self, index, res_code):
i = index + 1
while (not self.is_res(i, res_code)) and i < self._num_res - 1:
i += 1
if self.is_res(i, res_code):
return i
return -1
def read_data_set(filename,
dtype=dtypes.float32,
seed=None,
shuffle = False,
prop = 1):
local_file = filename
try :
with open(local_file, 'rb') as f:
train_maps,train_meta = extract_maps(f)
if train_maps is None :
return None
train = DataSet(
train_maps, train_meta, dtype=dtype, seed=seed, shuffle = shuffle, prop = prop)
return train
except ValueError :
return None
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import numpy as np
import tensorflow as tf
NUM_RETYPE = 15
GRID_SIZE = 24
GRID_VOXELS = GRID_SIZE * GRID_SIZE * GRID_SIZE
NB_TYPE = 169
def _weight_variable(name, shape):
return tf.get_variable(name, shape, tf.float32, tf.truncated_normal_initializer(stddev=0.01))
def _bias_variable(name, shape):
return tf.get_variable(name, shape, tf.float32, tf.constant_initializer(0.1, dtype=tf.float32))
def scoringModel(num_retype, maps, isTraining, batch_norm = True, validation = 'softplus', final_activation = 'sigmoid'):
print("Create model start")
prev_layer = tf.reshape(maps,[-1,NB_TYPE, GRID_SIZE,GRID_SIZE,GRID_SIZE])
retyper = _weight_variable("retype"+"_"+str(num_retype), [NB_TYPE, num_retype])
with tf.name_scope('Retype'):
tf.summary.histogram("Weights_R", retyper)
prev_layer = tf.transpose(prev_layer, perm=[0, 2, 3, 4, 1])
map_shape = tf.gather(tf.shape(prev_layer), [0,1,2,3]) # Extract the first three dimensions
map_shape = tf.concat([map_shape, [num_retype]], axis=0)
prev_layer = tf.reshape(prev_layer,[-1,NB_TYPE])
prev_layer = tf.matmul(prev_layer,retyper);
retyped = tf.reshape(prev_layer, map_shape)
CONV1_OUT = 20
kernelConv1 = _weight_variable("weights_C1"+"_"+str(num_retype), [3,3,3, num_retype, CONV1_OUT])
prev_layer = tf.nn.conv3d(retyped, kernelConv1, [1, 1, 1, 1, 1], padding='VALID')
biasConv1 = _bias_variable("biases_C1"+"_"+str(num_retype), [CONV1_OUT])
with tf.name_scope('Conv1'):
tf.summary.histogram("weights_C1", kernelConv1)
tf.summary.histogram("bias_C1", biasConv1)
prev_layer = prev_layer + biasConv1;
if batch_norm :
prev_layer = tf.layers.batch_normalization(prev_layer, training = isTraining, name = "batchn1")
prev_layer = tf.nn.dropout(prev_layer, 1 - tf.cast(isTraining, dtype=tf.float32) * 0.5, name="dropout1")
if validation == 'softplus':
conv1 = tf.nn.softplus(prev_layer, name="softplus1")
else:
conv1 = tf.nn.elu(prev_layer, name="elu1")
CONV2_OUT = 30
kernelConv2 = _weight_variable("weights_C2"+"_"+str(num_retype), [4,4,4, CONV1_OUT, CONV2_OUT])
prev_layer = tf.nn.conv3d(conv1, kernelConv2, [1, 1, 1, 1, 1], padding='VALID')
biasConv2 = _bias_variable("biases_C2"+"_"+str(num_retype), [CONV2_OUT])
with tf.name_scope('Conv2'):
tf.summary.histogram("weights_C2", kernelConv2)
tf.summary.histogram("bias_C2", biasConv2)
prev_layer = prev_layer + biasConv2;
if batch_norm :
prev_layer = tf.layers.batch_normalization(prev_layer, training = isTraining, name = "batchn2")
if validation == 'softplus':
prev_layer = tf.nn.softplus(prev_layer, name="softplus2")
else:
prev_layer = tf.nn.elu(prev_layer, name="elu2")
CONV3_OUT = 20
kernelConv3 = _weight_variable("weights_C3"+"_"+str(num_retype), [4,4,4, CONV2_OUT, CONV3_OUT])
prev_layer = tf.nn.conv3d(prev_layer, kernelConv3, [1, 1, 1, 1, 1], padding='VALID')
biasConv3 = _bias_variable("biases_C3"+"_"+str(num_retype), [CONV3_OUT])
with tf.name_scope('Conv3'):
tf.summary.histogram("weights_C3", kernelConv3)
tf.summary.histogram("bias_C3", biasConv3)
prev_layer = prev_layer + biasConv3;
if batch_norm :
prev_layer = tf.layers.batch_normalization(prev_layer, training = isTraining, name = "batchn3")
if validation == 'softplus':
prev_layer = tf.nn.softplus(prev_layer, name="softplus3")
else:
prev_layer = tf.nn.elu(prev_layer, name="elu3")
POOL_SIZE = 4
prev_layer = tf.nn.avg_pool3d(
prev_layer,
[1,POOL_SIZE,POOL_SIZE,POOL_SIZE,1],
[1,POOL_SIZE,POOL_SIZE,POOL_SIZE,1],
padding='VALID')
NB_DIMOUT = 4*4*4*CONV3_OUT
flat0 = tf.reshape(prev_layer,[-1,NB_DIMOUT])
LINEAR1_OUT = 64
weightsLinear = _weight_variable("weights_L1"+"_"+str(num_retype), [NB_DIMOUT, LINEAR1_OUT])
prev_layer = tf.matmul(flat0, weightsLinear)
biasLinear1 = _bias_variable("biases_L1"+"_"+str(num_retype), [LINEAR1_OUT])
with tf.name_scope('Linear1'):
tf.summary.histogram("weights_L1", weightsLinear)
tf.summary.histogram("biases_L1", biasLinear1)
prev_layer = prev_layer + biasLinear1
if batch_norm:
prev_layer = tf.layers.batch_normalization(prev_layer, training = isTraining, name = "batchn4")
#prev_layer = tf.nn.l2_normalize(flat0,dim=1)
if validation == 'softplus':
flat1 = tf.nn.softplus(prev_layer, name="softplus3")
else:
flat1 = tf.nn.elu(prev_layer, name="elu1")
weightsLinear2 = _weight_variable("weights_L2"+"_"+str(num_retype), [LINEAR1_OUT,1])
with tf.name_scope('Linear2'):
tf.summary.histogram("weights_L2", weightsLinear2)
last = tf.matmul(flat1, weightsLinear2)
print("Create model end")
prev_layer = tf.squeeze(last)
if final_activation == 'tanh':
return tf.add(tf.tanh(prev_layer)*0.5, 0.5, name = "main_output"), flat1, last, weightsLinear2
else:
return tf.sigmoid(prev_layer, name = "main_output"), flat1, last, weightsLinear2
def loss(scores, cad_score):
#return tf.losses.mean_squared_error(scores,cad_score)
return tf.square(scores - cad_score)
def training(loss, learning_rate):
optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.9)
#optimizer = tf.train.RMSPropOptimizer(learning_rate, decay = 0.999)
global_step = tf.Variable(0, name='global_step', trainable=False)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_op = optimizer.minimize(loss, global_step=global_step)
return train_op
import config
import subprocess
import os
import numpy as np
import scipy.stats
import random
import tensorflow as tf
import load_data
import model
import argparse
CONF = config.load_config()
FLAGS = None
def predict(sess, maps_placeholder, is_training, logits, filenames, meta_pl = None,outSuffix = ".orn", mapOptions=[]):
print(mapOptions)
# mapping protein
mapFilename = CONF.TEMP_PATH + 'map_'+str(os.getpid())+ '_pred.bin'
subProcessList=[]
k = 0
nbTypes = int(maps_placeholder.shape[1].value/(24*24*24))
for filename in filenames :
print('# Scoring '+filename)
#subprocess.call([CONF.MAP_GENERATOR_PATH, "--mode", "map", "-i", filename, "--native", "-m", "24", "-v", "0.8", "-o", mapFilename])
subProcessList.append(subprocess.Popen([CONF.MAP_GENERATOR_PATH, "--mode", "map", "-i", filename, "--native", "-m", "24", "-v", "0.8", "-o", mapFilename+str(k)]+mapOptions))
k+=1
k = 0
for filename in filenames :
subProcessList[k].wait()
if not os.path.exists(mapFilename+str(k)):
print('# Mapping failed, ignoring protein')
k+=1
continue
predDataset = load_data.read_data_set(mapFilename+str(k))
if predDataset is None :
k+=1
continue
os.remove(mapFilename+str(k))
result_file = open(filename+outSuffix,"w")
preds = []
# compute prediction res by res
for i in range(predDataset.num_res):
f_map = np.reshape(predDataset.maps[i], (1, model.GRID_VOXELS * nbTypes))
if meta_pl is not None :
f_meta = np.reshape(predDataset.meta[i], (1, 16))
feed_dict = {maps_placeholder: f_map, meta_pl:f_meta, is_training: False}
else :
feed_dict = {maps_placeholder: f_map, is_training: False}
pred = sess.run(logits,feed_dict=feed_dict)
preds.append(pred)
outline='RES {:4d} {:c} {:5.4f}'.format(predDataset.meta[i][0], predDataset.meta[i][1], pred)
print(outline, file = result_file)
#print(predDataset.meta[i][0]+)
#print(pred)
result_file.close()
k+=1
def main():
print(FLAGS.options)
#exit()
sess = tf.Session()
print('Restore existing model: %s' % FLAGS.model)
saver = tf.train.import_meta_graph(FLAGS.model + '.meta')
saver.restore(sess, FLAGS.model)
graph = tf.get_default_graph()
# getting placeholder for input data and output
maps_placeholder = graph.get_tensor_by_name('main_input:0')
meta_placeholder = graph.get_tensor_by_name('meta_pl:0')
is_training = graph.get_tensor_by_name('is_training:0')
logits = graph.get_tensor_by_name("main_output:0")
if FLAGS.structure != None :
predict(sess, maps_placeholder, is_training, logits, FLAGS.structure, meta_pl = meta_placeholder, outSuffix = FLAGS.suffix, mapOptions=FLAGS.options.split())
if FLAGS.directory != None :
bufferFiles = []
for filename in os.listdir(FLAGS.directory):
bufferFiles.append(FLAGS.directory+'/'+filename)
if len(bufferFiles) == FLAGS.buffer :
predict(sess, maps_placeholder, is_training, logits, bufferFiles, meta_pl = meta_placeholder, outSuffix = FLAGS.suffix, mapOptions=FLAGS.options.split())
bufferFiles = []
predict(sess, maps_placeholder, is_training, logits, bufferFiles, meta_pl = meta_placeholder, outSuffix = FLAGS.suffix, mapOptions=FLAGS.options.split())
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'-d',
'--directory',
type=str,
help='Path to the validation data'
)
parser.add_argument(
'-s',
'--structure',
type=str,
help='Path to the structure to score (in pdb format)'
)
parser.add_argument(
'-f',
'--suffix',
default=".orn",
type=str,
help='suffix to add to result file'
)
parser.add_argument(
'-b',
'--buffer',
default="8",
type=int,
help='number of files to buffer '
)
parser.add_argument(
'-o',
'--options',
default=["-m", "24", "-v", "0.8"],
type=str,
help='argument to map generator '
)
parser.add_argument(
'-m',
'--model',
default=CONF.MODEL_PATH,
type=str,
help='argument to map generator '
)
FLAGS = parser.parse_args()
#print(FLAGS.options)
main()
import pickle
import model_v2_routed as m
arg = m.layer_params(['conv_20_3_VALID', 'conv_30_3_VALID', 'conv_40_3_VALID','conv_50_5_VALID','conv_60_5_VALID','conv_70_5_VALID','avgpool_2'])
with open('convdeep.pkl', 'wb') as output:
pickle.dump(arg, output, pickle.HIGHEST_PROTOCOL)
arg = m.layer_params(['linear_512','linear_384','linear_256','linear_128','linear_64','linear_32','linear_16','linear_1'])
with open('fcdeep.pkl', 'wb') as output:
pickle.dump(arg, output, pickle.HIGHEST_PROTOCOL)
arg = m.layer_params(['linear_512','linear_384','linear_256','linear_128','linear_64','linear_20'])
with open('fcdeep_router.pkl', 'wb') as output:
pickle.dump(arg, output, pickle.HIGHEST_PROTOCOL)
arg = m.layer_params(['conv_20_3_VALID', 'conv_30_4_VALID', 'conv_20_4_VALID', 'avgpool_4'])
with open('/home/benoitch/save/conv_params/conv0.pkl', 'wb') as output:
pickle.dump(arg, output, pickle.HIGHEST_PROTOCOL)
arg = m.layer_params(['conv_20_3_VALID', 'conv_40_4_VALID', 'conv_60_4_VALID', 'avgpool_4'])
with open('/home/benoitch/save/conv_params/conv1.pkl', 'wb') as output:
pickle.dump(arg, output, pickle.HIGHEST_PROTOCOL)
arg = m.layer_params(['conv_20_3_VALID', 'conv_40_4_VALID', 'avgpool_2', 'conv_60_4_VALID', 'avgpool_2'])
with open('/home/benoitch/save/conv_params/conv2.pkl', 'wb') as output:
pickle.dump(arg, output, pickle.HIGHEST_PROTOCOL)
arg = m.layer_params(['conv_20_5_VALID', 'conv_30_4_VALID', 'conv_30_4_VALID', 'conv_40_4_VALID', 'conv_40_4_VALID','avgpool_2'])
with open('/home/benoitch/save/conv_params/conv3.pkl', 'wb') as output:
pickle.dump(arg, output, pickle.HIGHEST_PROTOCOL)
arg = m.layer_params(['conv_20_3_VALID', 'conv_30_4_VALID', 'conv_20_4_VALID','avgpool_4'])
with open('/home/benoitch/save/conv_params/conv4.pkl', 'wb') as output:
pickle.dump(arg, output, pickle.HIGHEST_PROTOCOL)
arg = m.layer_params(['conv_20_3_VALID', 'conv_30_4_VALID', 'conv_20_4_VALID','avgpool_2'])
with open('/home/benoitch/save/conv_params/conv4.pkl', 'wb') as output:
pickle.dump(arg, output, pickle.HIGHEST_PROTOCOL)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import gzip
import numpy
from six.moves import xrange
from tensorflow.contrib.learn.python.learn.datasets import base
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import random_seed
import tensorflow as tf
def _read32(bytestream):
dt = numpy.dtype(numpy.uint32)
return numpy.frombuffer(bytestream.read(4), dtype=dt)[0]
def check_dims(f, gridSize, nbDim):
print('Check dimensions ', f.name, flush = True)
with f as bytestream:
headerSize = _read32(bytestream)
magic = _read32(bytestream)
if magic != 7919:
raise ValueError('Invalid magic number %d in maps file: %s' %
(magic, f.name))
rows = _read32(bytestream)
cols = _read32(bytestream)
lays = _read32(bytestream)
assert(rows == gridSize)
assert(cols == gridSize)
assert(lays == gridSize)
chan = _read32(bytestream)
assert(chan == nbDim)
def extract_maps(f):
print('Extracting', f.name, flush = True)
with f as bytestream:
headerSize = _read32(bytestream)
magic = _read32(bytestream)
if magic != 7919:
raise ValueError('Invalid magic number %d in maps file: %s' %
(magic, f.name))
rows = _read32(bytestream)
#print("rows "+str(rows))
cols = _read32(bytestream)
#print("cols "+str(cols))
lays = _read32(bytestream)
#print("lays "+str(lays))
chan = _read32(bytestream)
#print("chan "+str(chan))
metaSize = _read32(bytestream)
#print("metaSize "+str(metaSize))
num_maps = _read32(bytestream)
#print("num_maps "+str(num_maps))
header_end = bytestream.read(headerSize - 4*8)
if num_maps<=0 :
return None,None
size = int(rows) * int(cols) * int(lays) * int(chan) * int(num_maps)
size += int(metaSize) * int(num_maps)
try :
buf = bytestream.read(size)
except OverflowError :
return None, None
data = numpy.frombuffer(buf, dtype=numpy.uint8)
data = data.reshape(num_maps, -1)
meta = numpy.ascontiguousarray(data[:, -int(metaSize):]).view(dtype=numpy.int32)
ss_dict = {0: -1,
66:0,#B
98:0,#b
67:1,#C
69:2,#E
71:3,#G
72:4,#H
73:5,#I
84:6,#T
}
meta[:,3] = [ss_dict[x] for x in meta[:,3]]
res_dict = {0:-1,
65:0, #A
67:1, #C
68:2, #D
69:3, #E
70:4, #F
71:5, #G
72:6, #H
73:7, #I
75:8, #K
76:9, #L
77:10,#M
78:11,#N
80:12,#P
81:13,#Q
82:14,#R
83:15,#S
84:16,#T
86:17,#V
87:18,#W
89:19 #Y
}
meta[:,1] = [res_dict[x] for x in meta[:,1]]
#print(meta[:,3])
#print(meta[:,2])
data = data[:,:-int(metaSize)]
return data , meta
class DataSet(object):
def __init__(self,
maps,
meta,
dtype=dtypes.float32,
seed=None,
prop = 1,
shuffle = False):
# prop means the percentage of maps from the data that are put in the dataset, useful to make the dataset lighter
# when doing that shuffle is useful to take different residue each time
seed1, seed2 = random_seed.get_seed(seed)
numpy.random.seed(seed1 if seed is None else seed2)
dtype = dtypes.as_dtype(dtype).base_dtype
if dtype not in (dtypes.uint8, dtypes.float32, dtypes.float16):
raise TypeError('Invalid map dtype %r, expected uint8 or float32 or float16' %
dtype)
if dtype == dtypes.float32:
maps = maps.astype(numpy.float32)
numpy.multiply(maps, 1.0 / 255.0, out = maps)
if dtype == dtypes.float16:
maps = maps.astype(numpy.float16)
numpy.multiply(maps, 1.0 / 255.0, out = maps)
if shuffle:
perm0 = numpy.arange(maps.shape[0])[:int(maps.shape[0]*prop)]
self._maps = maps[perm0]
self._meta = meta[perm0]
else:
self._maps = maps
self._meta = meta
self._epochs_completed = 0
self._index_in_epoch = 0
self._num_res = self._maps.shape[0]
@property
def maps(self):
return self._maps
@property
def meta(self):
return self._meta
@property
def num_res(self):
return self._num_res
@property
def epochs_completed(self):
return self._epochs_completed
def next_batch(self, batch_size, shuffle=True, select_residue = -1):
"""Return the next `batch_size` examples from this data set."""
# Select residue is not used anymore, just kept for compatibility purposes
start = self._index_in_epoch
# Shuffle for the first epoch
if self._epochs_completed == 0 and start == 0 and shuffle:
perm0 = numpy.arange(self._num_res)
numpy.random.shuffle(perm0)
self._maps = self.maps[perm0]
self._meta = self._meta[perm0] # Go to the next epoch
if start + batch_size > self._num_res:
# Finished epoch
self._epochs_completed += 1
# Get the rest examples in this epoch
rest_num_examples = self._num_res - start
maps_rest_part = self._maps[start:self._num_res]
meta_rest_part = self._meta[start:self._num_res]
# Shuffle the data
if shuffle:
perm = numpy.arange(self._num_res)
numpy.random.shuffle(perm)
self._maps = self.maps[perm]
self._meta = self.meta[perm]
# Start next epoch
start = 0
self._index_in_epoch = batch_size - rest_num_examples
end = self._index_in_epoch
maps_new_part = self._maps[start:end]
meta_new_part = self._meta[start:end]
return numpy.concatenate((maps_rest_part, maps_new_part), axis=0) , numpy.concatenate((meta_rest_part, meta_new_part), axis=0)
else:
self._index_in_epoch += batch_size
end = self._index_in_epoch
return self._maps[start:end], self._meta[start:end]
def append(self, dataSet_):
self._maps = numpy.concatenate((self._maps, dataSet_._maps))
self._meta = numpy.concatenate((self._meta, dataSet_._meta))
self._num_res += dataSet_._num_res
def is_res(self, index, res_code):
if index < self._num_res :
if self._meta[index, 1] == res_code:
return True
else:
print('index = num_res')
return False
def find_next_res(self, index, res_code):
i = index + 1
while (not self.is_res(i, res_code)) and i < self._num_res - 1:
i += 1
if self.is_res(i, res_code):
return i
return -1
def read_data_set(filename,
dtype=dtypes.float32,
seed=None,
shuffle = False,
prop = 1):
local_file = filename
try :
with open(local_file, 'rb') as f:
train_maps,train_meta = extract_maps(f)
if train_maps is None :
return None
train = DataSet(
train_maps, train_meta, dtype=dtype, seed=seed, shuffle = shuffle, prop = prop)
return train
except ValueError :
return None
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import gzip
import numpy
from six.moves import xrange
#from tensorflow.contrib.learn.python.learn.datasets import base
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import random_seed
import tensorflow as tf
def _read32(bytestream):
dt = numpy.dtype(numpy.uint32)
return numpy.frombuffer(bytestream.read(4), dtype=dt)[0]
def check_dims(f, gridSize, nbDim):
print('Check dimensions ', f.name, flush = True)
with f as bytestream:
headerSize = _read32(bytestream)
magic = _read32(bytestream)
if magic != 7919:
raise ValueError('Invalid magic number %d in maps file: %s' %
(magic, f.name))
rows = _read32(bytestream)
cols = _read32(bytestream)
lays = _read32(bytestream)
assert(rows == gridSize)
assert(cols == gridSize)
assert(lays == gridSize)
chan = _read32(bytestream)
assert(chan == nbDim)
def extract_maps(f):
#print('Extracting', f.name, flush = True)
with f as bytestream:
headerSize = _read32(bytestream)
magic = _read32(bytestream)
if magic != 7919:
raise ValueError('Invalid magic number %d in maps file: %s' %
(magic, f.name))
rows = _read32(bytestream)
#print("rows "+str(rows))
cols = _read32(bytestream)
#print("cols "+str(cols))
lays = _read32(bytestream)
#print("lays "+str(lays))
chan = _read32(bytestream)
#print("chan "+str(chan))
metaSize = _read32(bytestream)
#print("metaSize "+str(metaSize))
num_maps = _read32(bytestream)
#print("num_maps "+str(num_maps))
header_end = bytestream.read(headerSize - 4*8)
if num_maps<=0 :
return None,None
size = int(rows) * int(cols) * int(lays) * int(chan) * int(num_maps)
size += int(metaSize) * int(num_maps)
try :
buf = bytestream.read(size)
except OverflowError :
return None, None
data = numpy.frombuffer(buf, dtype=numpy.uint8)
data = data.reshape(num_maps, -1)
meta = numpy.ascontiguousarray(data[:, -int(metaSize):]).view(dtype=numpy.int32)
ss_dict = {0: -1,
66:0,#B
98:0,#b
67:1,#C
69:2,#E
71:3,#G
72:4,#H
73:5,#I
84:6,#T
}
#meta[:,3] = [ss_dict[x] for x in meta[:,3]] #Y commented!
res_dict = {0:-1,
65:0, #A
67:1, #C
68:2, #D
69:3, #E
70:4, #F
71:5, #G
72:6, #H
73:7, #I
75:8, #K
76:9, #L
77:10,#M
78:11,#N
80:12,#P
81:13,#Q
82:14,#R
83:15,#S
84:16,#T
86:17,#V
87:18,#W
89:19 #Y
}
#meta[:,1] = [res_dict[x] for x in meta[:,1]]
#print(meta[:,3])
#print(meta[:,2])
data = data[:,:-int(metaSize)]
return data , meta
class DataSet(object):
def __init__(self,
maps,
meta,
dtype=dtypes.float32,
seed=None,
prop = 1,
shuffle = False):
# prop means the percentage of maps from the data that are put in the dataset, useful to make the dataset lighter
# when doing that shuffle is useful to take different residue each time
seed1, seed2 = random_seed.get_seed(seed)
numpy.random.seed(seed1 if seed is None else seed2)
dtype = dtypes.as_dtype(dtype).base_dtype
if dtype not in (dtypes.uint8, dtypes.float32, dtypes.float16):
raise TypeError('Invalid map dtype %r, expected uint8 or float32 or float16' %
dtype)
if dtype == dtypes.float32:
maps = maps.astype(numpy.float32)
numpy.multiply(maps, 1.0 / 255.0, out = maps)
if dtype == dtypes.float16:
maps = maps.astype(numpy.float16)
numpy.multiply(maps, 1.0 / 255.0, out = maps)
if shuffle:
perm0 = numpy.arange(maps.shape[0])[:int(maps.shape[0]*prop)]
self._maps = maps[perm0]
self._meta = meta[perm0]
else:
self._maps = maps
self._meta = meta
self._epochs_completed = 0
self._index_in_epoch = 0
self._num_res = self._maps.shape[0]
@property
def maps(self):
return self._maps
@property
def meta(self):
return self._meta
@property
def num_res(self):
return self._num_res
@property
def epochs_completed(self):
return self._epochs_completed
def next_batch(self, batch_size, shuffle=True, select_residue = -1):
"""Return the next `batch_size` examples from this data set."""
# Select residue is not used anymore, just kept for compatibility purposes
start = self._index_in_epoch
# Shuffle for the first epoch
if self._epochs_completed == 0 and start == 0 and shuffle:
perm0 = numpy.arange(self._num_res)
numpy.random.shuffle(perm0)
self._maps = self.maps[perm0]
self._meta = self._meta[perm0] # Go to the next epoch
if start + batch_size > self._num_res:
# Finished epoch
self._epochs_completed += 1
# Get the rest examples in this epoch
rest_num_examples = self._num_res - start
maps_rest_part = self._maps[start:self._num_res]
meta_rest_part = self._meta[start:self._num_res]
# Shuffle the data
if shuffle:
perm = numpy.arange(self._num_res)
numpy.random.shuffle(perm)
self._maps = self.maps[perm]
self._meta = self.meta[perm]
# Start next epoch
start = 0
self._index_in_epoch = batch_size - rest_num_examples
end = self._index_in_epoch
maps_new_part = self._maps[start:end]
meta_new_part = self._meta[start:end]
return numpy.concatenate((maps_rest_part, maps_new_part), axis=0) , numpy.concatenate((meta_rest_part, meta_new_part), axis=0)
else:
self._index_in_epoch += batch_size
end = self._index_in_epoch
return self._maps[start:end], self._meta[start:end]
def append(self, dataSet_):
self._maps = numpy.concatenate((self._maps, dataSet_._maps))
self._meta = numpy.concatenate((self._meta, dataSet_._meta))
self._num_res += dataSet_._num_res
def is_res(self, index, res_code):
if index < self._num_res :
if self._meta[index, 1] == res_code:
return True
else:
print('index = num_res')
return False
def find_next_res(self, index, res_code):
i = index + 1
while (not self.is_res(i, res_code)) and i < self._num_res - 1:
i += 1
if self.is_res(i, res_code):
return i
return -1
def read_data_set(filename,
dtype=dtypes.float32,
seed=None,
shuffle = False,
prop = 1):
local_file = filename
try :
with open(local_file, 'rb') as f:
train_maps,train_meta = extract_maps(f)
if train_maps is None :
return None
train = DataSet(
train_maps, train_meta, dtype=dtype, seed=seed, shuffle = shuffle, prop = prop)
return train
except ValueError :
return None
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import numpy as np
import tensorflow as tf
NUM_RETYPE = 15
GRID_SIZE = 24
GRID_VOXELS = GRID_SIZE * GRID_SIZE * GRID_SIZE
NB_TYPE = 169
def _weight_variable(name, shape):
return tf.get_variable(name, shape, tf.float32, tf.truncated_normal_initializer(stddev=0.01))
def _bias_variable(name, shape):
return tf.get_variable(name, shape, tf.float32, tf.constant_initializer(0.1, dtype=tf.float32))
def scoringModel(num_retype, maps, isTraining, batch_norm = True, validation = 'softplus', final_activation = 'sigmoid'):
print("Create model start")
prev_layer = tf.reshape(maps,[-1,NB_TYPE, GRID_SIZE,GRID_SIZE,GRID_SIZE])
retyper = _weight_variable("retype"+"_"+str(num_retype), [NB_TYPE, num_retype])
with tf.name_scope('Retype'):
tf.summary.histogram("Weights_R", retyper)
prev_layer = tf.transpose(prev_layer, perm=[0, 2, 3, 4, 1])
map_shape = tf.gather(tf.shape(prev_layer), [0,1,2,3]) # Extract the first three dimensions
map_shape = tf.concat([map_shape, [num_retype]], axis=0)
prev_layer = tf.reshape(prev_layer,[-1,NB_TYPE])
prev_layer = tf.matmul(prev_layer,retyper);
retyped = tf.reshape(prev_layer, map_shape)
CONV1_OUT = 20
kernelConv1 = _weight_variable("weights_C1"+"_"+str(num_retype), [3,3,3, num_retype, CONV1_OUT])
prev_layer = tf.nn.conv3d(retyped, kernelConv1, [1, 1, 1, 1, 1], padding='VALID')
biasConv1 = _bias_variable("biases_C1"+"_"+str(num_retype), [CONV1_OUT])
with tf.name_scope('Conv1'):
tf.summary.histogram("weights_C1", kernelConv1)
tf.summary.histogram("bias_C1", biasConv1)
prev_layer = prev_layer + biasConv1;
if batch_norm :
prev_layer = tf.layers.batch_normalization(prev_layer, training = isTraining, name = "batchn1")
prev_layer = tf.nn.dropout(prev_layer, 1 - tf.cast(isTraining, dtype=tf.float32) * 0.5, name="dropout1")
if validation == 'softplus':
conv1 = tf.nn.softplus(prev_layer, name="softplus1")
else:
conv1 = tf.nn.elu(prev_layer, name="elu1")
CONV2_OUT = 30
kernelConv2 = _weight_variable("weights_C2"+"_"+str(num_retype), [4,4,4, CONV1_OUT, CONV2_OUT])
prev_layer = tf.nn.conv3d(conv1, kernelConv2, [1, 1, 1, 1, 1], padding='VALID')
biasConv2 = _bias_variable("biases_C2"+"_"+str(num_retype), [CONV2_OUT])
with tf.name_scope('Conv2'):
tf.summary.histogram("weights_C2", kernelConv2)
tf.summary.histogram("bias_C2", biasConv2)
prev_layer = prev_layer + biasConv2;
if batch_norm :
prev_layer = tf.layers.batch_normalization(prev_layer, training = isTraining, name = "batchn2")
if validation == 'softplus':
prev_layer = tf.nn.softplus(prev_layer, name="softplus2")
else:
prev_layer = tf.nn.elu(prev_layer, name="elu2")
CONV3_OUT = 20
kernelConv3 = _weight_variable("weights_C3"+"_"+str(num_retype), [4,4,4, CONV2_OUT, CONV3_OUT])
prev_layer = tf.nn.conv3d(prev_layer, kernelConv3, [1, 1, 1, 1, 1], padding='VALID')
biasConv3 = _bias_variable("biases_C3"+"_"+str(num_retype), [CONV3_OUT])
with tf.name_scope('Conv3'):
tf.summary.histogram("weights_C3", kernelConv3)
tf.summary.histogram("bias_C3", biasConv3)
prev_layer = prev_layer + biasConv3;
if batch_norm :
prev_layer = tf.layers.batch_normalization(prev_layer, training = isTraining, name = "batchn3")
if validation == 'softplus':
prev_layer = tf.nn.softplus(prev_layer, name="softplus3")
else:
prev_layer = tf.nn.elu(prev_layer, name="elu3")
POOL_SIZE = 4
prev_layer = tf.nn.avg_pool3d(
prev_layer,
[1,POOL_SIZE,POOL_SIZE,POOL_SIZE,1],
[1,POOL_SIZE,POOL_SIZE,POOL_SIZE,1],
padding='VALID')
NB_DIMOUT = 4*4*4*CONV3_OUT
flat0 = tf.reshape(prev_layer,[-1,NB_DIMOUT])
LINEAR1_OUT = 64
weightsLinear = _weight_variable("weights_L1"+"_"+str(num_retype), [NB_DIMOUT, LINEAR1_OUT])
prev_layer = tf.matmul(flat0, weightsLinear)
biasLinear1 = _bias_variable("biases_L1"+"_"+str(num_retype), [LINEAR1_OUT])
with tf.name_scope('Linear1'):
tf.summary.histogram("weights_L1", weightsLinear)
tf.summary.histogram("biases_L1", biasLinear1)
prev_layer = prev_layer + biasLinear1
if batch_norm:
prev_layer = tf.layers.batch_normalization(prev_layer, training = isTraining, name = "batchn4")
#prev_layer = tf.nn.l2_normalize(flat0,dim=1)
if validation == 'softplus':
flat1 = tf.nn.softplus(prev_layer, name="softplus3")
else:
flat1 = tf.nn.elu(prev_layer, name="elu1")
weightsLinear2 = _weight_variable("weights_L2"+"_"+str(num_retype), [LINEAR1_OUT,1])
with tf.name_scope('Linear2'):
tf.summary.histogram("weights_L2", weightsLinear2)
last = tf.matmul(flat1, weightsLinear2)
print("Create model end")
prev_layer = tf.squeeze(last)
if final_activation == 'tanh':
return tf.add(tf.tanh(prev_layer)*0.5, 0.5, name = "main_output"), flat1, last, weightsLinear2
else:
return tf.sigmoid(prev_layer, name = "main_output"), flat1, last, weightsLinear2
def loss(scores, cad_score):
#return tf.losses.mean_squared_error(scores,cad_score)
return tf.square(scores - cad_score)
def training(loss, learning_rate):
optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.9)
#optimizer = tf.train.RMSPropOptimizer(learning_rate, decay = 0.999)
global_step = tf.Variable(0, name='global_step', trainable=False)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_op = optimizer.minimize(loss, global_step=global_step)
return train_op
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
# NUM_RETYPE = 15
# GRID_SIZE = 24
# GRID_VOXELS = GRID_SIZE * GRID_SIZE * GRID_SIZE
# NB_TYPE = 169
def _weight_variable(name, shape):
return tf.get_variable(name, shape, tf.float32, tf.truncated_normal_initializer(stddev=0.01))
def _bias_variable(name, shape):
return tf.get_variable(name, shape, tf.float32, tf.constant_initializer(0.1, dtype=tf.float32))
class ScoringModel:
def __init__(self,
num_retype=15,
GRID_SIZE=24,
NB_TYPE=169,
batch_norm=True,
validation='softplus',
final_activation='sigmoid'
):
self.num_retype = num_retype
self.GRID_SIZE = GRID_SIZE
self.GRID_VOXELS = self.GRID_SIZE * self.GRID_SIZE * self.GRID_SIZE
self.NB_TYPE = NB_TYPE
self.batch_norm = batch_norm
self.validation = validation
self.final_activation = final_activation
def get_pred(self,
maps,
isTraining):
print('Creating model...')
input_data = tf.reshape(maps, [-1, self.NB_TYPE, self.GRID_SIZE, self.GRID_SIZE, self.GRID_SIZE])
# First step reducing data dimensionality
retyped = self.retype_layer(input_data, self.num_retype, self.NB_TYPE, name='retype')
# First convolution
CONV1_OUT = 20
out_conv_1 = self.conv_layer(retyped, [3, 3, 3, self.num_retype, CONV1_OUT], name='CONV_1')
# batch norm and activation
out_conv_1 = self.activation_normalization_layer(out_conv_1, self.batch_norm, self.validation, isTraining, name = 'act_norm_1')
# Second convolution
CONV2_OUT = 30
out_conv_2 = self.conv_layer(out_conv_1, [4, 4, 4, CONV1_OUT, CONV2_OUT], name='CONV_2')
# Batch norm and activation
out_conv_2 = self.activation_normalization_layer(out_conv_2, self.batch_norm, self.validation, isTraining, name = 'act_norm_2')
# Third convolution
CONV3_OUT = 20
out_conv_3 = self.conv_layer(out_conv_2, [4, 4, 4, CONV2_OUT, CONV3_OUT], name='CONV_3')
out_conv_3 = self.activation_normalization_layer(out_conv_3, self.batch_norm, self.validation, isTraining, name = 'act_norm_3')
# pooling and flattening
POOL_SIZE = 4
prev_layer = tf.nn.avg_pool3d(
out_conv_3,
[1, POOL_SIZE, POOL_SIZE, POOL_SIZE, 1],
[1, POOL_SIZE, POOL_SIZE, POOL_SIZE, 1],
padding='VALID')
NB_DIMOUT = 4 * 4 * 4 * CONV3_OUT
flat0 = tf.reshape(prev_layer, [-1, NB_DIMOUT])
# Fully connected layer 1
LINEAR1_OUT = 64
out_l1 = self.fc_layer(flat0, NB_DIMOUT, LINEAR1_OUT, bias=True, name='linear_1', num_retype=self.num_retype)
out_l1 = self.activation_normalization_layer(out_l1, self.batch_norm, self.validation, isTraining, name = 'act_norm_4')
out = self.fc_layer(out_l1, LINEAR1_OUT, 1, False, 'Linear_2', self.num_retype)
out = tf.squeeze(out)
if self.final_activation == 'tanh':
return tf.add(tf.tanh(out) * 0.5, 0.5, name="main_output")
else:
return tf.sigmoid(out, name="main_output")
def retype_layer(self, prev_layer, num_retype_, input_, name='retype'):
retyper = _weight_variable(name + "_" + str(num_retype_), [input_, num_retype_])
with tf.name_scope(name):
tf.summary.histogram(name, retyper)
prev_layer = tf.transpose(prev_layer, perm=[0, 2, 3, 4, 1])
map_shape = tf.gather(tf.shape(prev_layer), [0, 1, 2, 3]) # Extract the first three dimensions
map_shape = tf.concat([map_shape, [self.num_retype]], axis=0)
prev_layer = tf.reshape(prev_layer, [-1, self.NB_TYPE])
prev_layer = tf.matmul(prev_layer, retyper)
return tf.reshape(prev_layer, map_shape)
def conv_layer(self, prev_layer, kernel_size, name='CONV'):
kernelConv = _weight_variable("weights_" + name + "_" + str(self.num_retype), kernel_size)
prev_layer = tf.nn.conv3d(prev_layer, kernelConv, [1, 1, 1, 1, 1], padding='VALID', name = name)
biasConv = _bias_variable("biases_" + name + "_" + str(kernel_size[3]), kernel_size[-1])
with tf.name_scope(name):
tf.summary.histogram("weights_" + name, kernelConv)
tf.summary.histogram("biases_" + name, biasConv)
return prev_layer + biasConv;
def activation_normalization_layer(self, input_vector, batch_norm, validation, isTraining, name='act_norm_'):
if batch_norm:
input_vector = tf.layers.batch_normalization(input_vector, training=isTraining, name = name)
if validation == 'softplus':
return tf.nn.softplus(input_vector, name="softplus")
else:
return tf.nn.elu(input_vector, name="elu")
def fc_layer(self, input_vector, input_size, output_size, bias, name, num_retype):
weightsLinear = _weight_variable("weights_" + name + "_" + str(num_retype), [input_size, output_size])
prev_layer = tf.matmul(input_vector, weightsLinear)
if bias:
biasLinear = _bias_variable("biases_" + name + "_" + str(num_retype), [output_size])
with tf.name_scope(name):
tf.summary.histogram("weights_" + name, weightsLinear)
if bias:
tf.summary.histogram("biases_" + name, biasLinear)
if bias:
return prev_layer + biasLinear
else:
return prev_layer
def compute_loss(self, scores, cad_score):
return tf.square(scores - cad_score, name='loss')
def train(self, loss, learning_rate):
optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.9)
# optimizer = tf.train.RMSPropOptimizer(learning_rate, decay = 0.999)
global_step = tf.Variable(0, name='global_step', trainable=False)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_op = optimizer.minimize(loss, global_step=global_step, name='train_op')
return train_op
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
import numpy as np
import os
def stat_file(path_to_file):
f = open(path_to_file, 'r')
lines = f.readlines()
scores = []
for l in lines:
[_, sco] = l.split(' ')
if sco[-2:] == '\n':
sco = sco[:-2]
if sco == '0':
scores.append(0)
else:
scores.append(float(sco))
f.close()
if len(scores) == 0:
return [0]
return np.mean(scores)
def stat_prot(path_to_prot):
file_ls = os.listdir(path_to_prot)
scores_p = np.array([])
for f in file_ls:
if f[-4:] == '.sco':
sco_f = stat_file(path_to_prot + '/' + f)
scores_p = np.append(scores_p, sco_f)
if scores_p == np.array([]):
return None
return scores_p
def stat_casp(path_to_casp):
file_ls = os.listdir(path_to_casp)
scores_c = np.array([])
for f in file_ls:
stats_prot = stat_prot(path_to_casp + '/' + f)
if not stats_prot is None:
scores_c = np.append(scores_c, stats_prot)
scores_c = np.reshape(scores_c,(-1,))
return scores_c
def stat_full(path_to_data, casp_choice):
file_ls = casp_choice
scores_full = np.array([])
for f in file_ls:
print('Stats : ' + f)
scores_c = stat_casp(path_to_data + '/' + f + '/MODELS/')
scores_full = np.append(scores_full, scores_c)
print('Done')
scores_full = np.reshape(scores_full,(-1,))
return scores_full
def main():
print(stat_full('/home/benoitch/data/', ['CASP7','CASP8']))
if __name__ == '__main__':
main()
Namespace(batch_size=10, conv_size_list=[], data_variability='all', dropout=0.5, elementSize=1, evaluate=False, full_size_list=[], groupSize=12, learning_rate=0.0001, log_dir='/tmp/tensorflow/mnist/logs/fully_connected_feed2', log_file='tmp.log', max_steps=25000, restore='', test_file='', training_data_path='')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment