Best Python code snippet using hypothesis
model.py
Source:model.py  
1import tensorflow as tf2import numpy as np3import random45class S2VT_model():6    7    def __init__(self, frame_steps=80.0, frame_feat_dim=4096, caption_steps=45, vocab_size=3000, dim_hidden=300):8        910        self.frame_steps = frame_steps11        self.frame_feat_dim = frame_feat_dim12        self.caption_steps = caption_steps13        self.vocab_size = vocab_size14        self.dim_hidden = dim_hidden1516        ## Graph input17        self.frame = tf.placeholder(tf.float32, [None, frame_steps, frame_feat_dim])18        self.caption = tf.placeholder(tf.int32, [None, caption_steps+1])19        self.caption_mask = tf.placeholder(tf.float32, [None, caption_steps+1])20        self.scheduled_sampling_prob = tf.placeholder(tf.float32, [], name='scheduled_sampling_prob')21        batch_frame = tf.shape(self.frame)[0]22        batch_caption = tf.shape(self.caption)[0]23        tf.Assert(tf.equal(batch_frame, batch_caption), [batch_frame, batch_caption])24        batch_size = batch_frame25        self.train_state = tf.placeholder(tf.bool)2627        28        ## frame Embedding param 29        with tf.variable_scope("frame_embedding"):30            w_frame_embed = tf.get_variable("w_frame_embed", [frame_feat_dim, dim_hidden], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))31            b_frame_embed = tf.get_variable("b_frame_embed", [dim_hidden], initializer=tf.constant_initializer(0.0))32        33        ## word embedding param34        with tf.device("/cpu:0"):35            embedding = tf.get_variable("embedding", [vocab_size, dim_hidden], dtype=tf.float32)36        37        ## word embedding to onehot param38        w_word_onehot = tf.get_variable("w_word_onehot", [dim_hidden, vocab_size], initializer=tf.contrib.layers.xavier_initializer(dtype=tf.float32))39        b_word_onehot = tf.get_variable("b_word_onehot", [vocab_size], initializer=tf.constant_initializer(0.0))40        41        ## two lstm param42        with tf.variable_scope("att_lstm"):43            att_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)44        with tf.variable_scope("cap_lstm"):45            cap_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)46        47        att_state = (tf.zeros([batch_size, dim_hidden]),tf.zeros([batch_size, dim_hidden]))48        cap_state = (tf.zeros([batch_size, dim_hidden]),tf.zeros([batch_size, dim_hidden]))49        50        padding = tf.zeros([batch_size, dim_hidden])51        52        ##################### Computing Graph ########################53        54        frame_flat = tf.reshape(self.frame, [-1, frame_feat_dim])55        frame_embedding = tf.nn.xw_plus_b( frame_flat, w_frame_embed, b_frame_embed )56        frame_embedding = tf.reshape(frame_embedding, [batch_size, frame_steps, dim_hidden])        57        58        59        cap_lstm_outputs = []60        61        ## Encoding stage62        for i in range(frame_steps):63            with tf.variable_scope('att_lstm'):64                if i > 0:65                    tf.get_variable_scope().reuse_variables()66                output1, att_state = att_lstm(frame_embedding[:,i,:], att_state)67            ##input shape of cap_lstm2: [batch_size, 2*dim_hidden]68            with tf.variable_scope('cap_lstm'):69                if i > 0:70                    tf.get_variable_scope().reuse_variables()71                output2, cap_state = cap_lstm(tf.concat([padding, output1], 1), cap_state)72        73        ## Decoding stage74        ## Training util75        def train_cap(prev_layer_output, prev_decoder_output, prev_state):76            with tf.device('/cpu:0'):77                word_index = tf.argmax(prev_decoder_output, axis=1)78                word_embed = tf.nn.embedding_lookup(embedding, word_index)79                output, state = cap_lstm(80                    tf.concat([word_embed, prev_layer_output], 1), prev_state)81                m_state, c_state = state82                return output, m_state, c_state83        def test_cap(prev_layer_output, prev_decoder_output, prev_state):84            ##  TODO: beam search85            word_index = tf.argmax(prev_decoder_output, axis=1)86            word_embed = tf.nn.embedding_lookup(embedding, word_index)87            output, state = cap_lstm(88                tf.concat([word_embed, prev_layer_output], 1), prev_state)89            m_state, c_state = state90            return output, m_state, c_state91        output2 = tf.tile(tf.one_hot([4], vocab_size), [batch_size, 1])92        scheduled_sampling_distribution = tf.random_uniform([caption_steps], 0, 1)93        for i in range(caption_steps):94            95            with tf.variable_scope('att_lstm'):96                tf.get_variable_scope().reuse_variables()97                output1, att_state = att_lstm(padding, att_state)98                        99            with tf.variable_scope('cap_lstm'):100                tf.get_variable_scope().reuse_variables()101102103                output2, m_state, c_state = tf.cond(self.train_state, lambda: train_cap(output1, self.caption[:i], cap_state), lambda: test_cap(output1, output2, cap_state))104                cap_state = (m_state, c_state)105                cap_lstm_outputs.append(output2)106                107        108109110        output = tf.reshape(tf.concat(cap_lstm_outputs , 1), [-1, dim_hidden]) 111112        ## shape (batch_size*caption_steps, vocab_size)               113        onehot_word_logits = tf.nn.xw_plus_b(output, w_word_onehot, b_word_onehot)114        self.predict_result = tf.reshape(tf.argmax(onehot_word_logits[:,2:], 1)+2, [batch_size, caption_steps])115        116        loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example([onehot_word_logits],117                                                                  [tf.reshape(self.caption[:,1:], [-1])],118                                                                  [tf.reshape(self.caption_mask[:,1:], [-1])])119        120        self.cost = tf.reduce_mean(loss)121        self.global_step = tf.Variable(0, trainable=False)122        self.train_op = tf.train.AdamOptimizer().minimize(self.cost,global_step=self.global_step)123        124        config = tf.ConfigProto(log_device_placement = True)125        config.gpu_options.allow_growth = True126        127        self.sess = tf.Session(config=config)128129    def train(self, input_frame, input_caption,input_caption_mask, keep_prob=0.5):130        _,cost = self.sess.run([self.train_op,self.cost],feed_dict={self.frame:input_frame, 131                                                                    self.caption:input_caption, 132                                                                    self.caption_mask:input_caption_mask,133                                                                    self.train_state:True})134        return cost135   136    def predict(self, input_frame):137        padding = np.zeros([input_frame.shape[0], self.caption_steps + 1])138        words = self.sess.run([self.predict_result], feed_dict={self.frame: input_frame,139                                                                self.caption: padding,140                                                                self.train_state: False,141                                                                self.scheduled_sampling_prob: 0.0})142        return words143    def initialize(self):144        self.sess.run(tf.global_variables_initializer())145    146    def schedule_sampling(self):147        prob = self.global_step / self.schedule_sampling_converge148        return random.random() > prob149150151class S2VT_attention_model():152    153    def __init__(self,frame_steps=20, frame_feat_dim=4096, caption_steps=45, vocab_size=3000, dim_hidden=300):154        155        self.frame_steps = frame_steps156        self.frame_feat_dim = frame_feat_dim157        self.caption_steps = caption_steps158        self.vocab_size = vocab_size159        self.dim_hidden = dim_hidden160        161        ## Graph input162    163        self.frame = tf.placeholder(tf.float32, [None, frame_steps, frame_feat_dim])    164        self.caption = tf.placeholder(tf.int64, [None,caption_steps+1])165        self.caption_mask = tf.placeholder(tf.float32, [None, caption_steps+1])166        self.scheduled_sampling_prob = tf.placeholder(167            tf.float32, [], name='scheduled_sampling_prob')168        batch_frame = tf.shape(self.frame)[0]169        batch_caption = tf.shape(self.caption)[0]170        tf.Assert(tf.equal(batch_frame, batch_caption), [batch_frame, batch_caption])171        self.batch_size = batch_frame172        self.train_state = tf.placeholder(tf.bool)173        self.keep_prob = tf.placeholder(tf.float32)174        175        self.global_step = tf.Variable(0, trainable=False)176        ## frame Embedding param 177        with tf.variable_scope("frame_embedding"):178            w_frame_embed = tf.get_variable("w_frame_embed", [frame_feat_dim, dim_hidden], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))179            b_frame_embed = tf.get_variable("b_frame_embed", [dim_hidden], initializer=tf.constant_initializer(0.0))180        181        ## word embedding param182        with tf.device("/cpu:0"):183            embedding = tf.get_variable("embedding", [vocab_size, dim_hidden], dtype=tf.float32)184        185        ## word embedding to onehot param186        w_word_onehot = tf.get_variable("w_word_onehot", [dim_hidden, vocab_size], initializer=tf.contrib.layers.xavier_initializer(dtype=tf.float32))187        b_word_onehot = tf.get_variable("b_word_onehot", [vocab_size], initializer=tf.constant_initializer(0.0))188        189        ## attention_position_embedding190        wp = tf.get_variable("w_position_emb_1", [self.dim_hidden,self.dim_hidden], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))191        vp = tf.get_variable("w_position_emb_2", [1,self.dim_hidden], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))192193        ## attention_align_embedding194        wa = tf.get_variable("w_align_emb",[self.dim_hidden,self.dim_hidden],initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))        195196        ## attention_align_embedding197        wc = tf.get_variable("w_attention_emb",[2*self.dim_hidden,self.dim_hidden],initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))        198199200        ## two lstm param201        with tf.variable_scope("att_lstm"):202            att_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)203            #att_lstm = tf.contrib.rnn.DropoutWrapper(att_lstm,input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob)204        with tf.variable_scope("cap_lstm"):205            cap_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)206            #cap_lstm = tf.contrib.rnn.DropoutWrapper(cap_lstm,input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob)            207        208        att_state = (tf.zeros([self.batch_size, dim_hidden]),tf.zeros([self.batch_size, dim_hidden]))209        cap_state = (tf.zeros([self.batch_size, dim_hidden]),tf.zeros([self.batch_size, dim_hidden]))210        211        padding = tf.zeros([self.batch_size, dim_hidden])212        213        ##################### Computing Graph ########################214        215        frame_flat = tf.reshape(self.frame, [-1, frame_feat_dim])216        frame_embedding = tf.nn.xw_plus_b( frame_flat, w_frame_embed, b_frame_embed )217        frame_embedding = tf.reshape(frame_embedding, [self.batch_size, frame_steps, dim_hidden])        218        219        enc_lstm_outputs = []220        dec_lstm_outputs = []221        ## Encoding stage222        for i in range(frame_steps):223224            with tf.variable_scope('att_lstm'):225                if i > 0:226                    tf.get_variable_scope().reuse_variables()227                output1, att_state = att_lstm(frame_embedding[:,i,:], att_state)228            ##input shape of cap_lstm2: [batch_size, 2*dim_hidden]229            with tf.variable_scope('cap_lstm'):230                if i > 0:231                    tf.get_variable_scope().reuse_variables()232                #tf.get_variable_scope().reuse_variables()233                output2, cap_state = cap_lstm(tf.concat([padding, output1], 1), cap_state)234            enc_lstm_outputs.append(output2)235        236        ## (batch_size,frame_step,dim_hidden)237        enc_lstm_outputs = tf.reshape(tf.concat(enc_lstm_outputs , 1),[self.batch_size,self.frame_steps,self.dim_hidden])238        239        ## Decoding stage240        ## Training util241        def train_cap(input_lstm,prev_endcoder_output,real_ans,prev_decoder_output,global_step,prev_state):242           word_index = tf.cond(self.scheduled_sampling_prob <= tf.random_uniform([], 0, 1),243                                lambda: real_ans,244                                lambda: tf.argmax(prev_decoder_output, axis=1))245           with tf.device('/cpu:0'):246                word_embed = tf.nn.embedding_lookup(embedding,word_index)247                output, state = input_lstm(248                    tf.concat([word_embed, prev_endcoder_output], 1), prev_state)249                m_state, c_state = state250           return output, m_state, c_state251        def test_cap(input_lstm,prev_encoder_output, prev_decoder_output, prev_state):252            ##  TODO: beam search253            with tf.device('cpu:0'):254                word_index = tf.argmax(prev_decoder_output, axis=1)255                word_embed = tf.nn.embedding_lookup(embedding, word_index)256                output, state = input_lstm(257                    tf.concat([word_embed, prev_encoder_output], 1), prev_state)258                m_state, c_state = state259            return output, m_state, c_state260        ## Decoding stage261        prev_step_word = tf.tile(tf.one_hot([4], vocab_size), [self.batch_size, 1])262        for i in range(caption_steps):263            264            with tf.variable_scope('att_lstm'):265                tf.get_variable_scope().reuse_variables()266                output1, att_state = att_lstm(padding, att_state)267                        268            with tf.variable_scope('cap_lstm'):269                tf.get_variable_scope().reuse_variables()270                output2, m_state, c_state = tf.cond(self.train_state, lambda: train_cap(271                    cap_lstm, output1, self.caption[:, i], prev_step_word, self.global_step, cap_state), lambda: test_cap(cap_lstm, output1, prev_step_word, cap_state))272                cap_state = (m_state, c_state)273                prev_step_word = tf.nn.xw_plus_b(274                    output2, w_word_onehot, b_word_onehot)275            ## Attention276            #output2 = self.local_attention(output2,enc_lstm_outputs,wp,vp,wa)277            #concat_output = tf.concat([attention_output,output2] , 1)278            #output2 = tf.tanh(tf.matmul(concat_output,wc))  279            dec_lstm_outputs.append(prev_step_word)280        281        onehot_word_logits = tf.reshape(tf.concat(dec_lstm_outputs , 1), [-1,vocab_size])282        283        self.predict_result = tf.reshape(tf.argmax(onehot_word_logits[:,2:], 1)+2, [self.batch_size, caption_steps])284        285        onehot_word_logits = tf.unstack(tf.reshape(onehot_word_logits,[self.batch_size,caption_steps,vocab_size]),axis = 1)286        287        caption_ans = tf.unstack(self.caption[:,1:],axis = 1) 288  289        caption_ans_mask = tf.unstack(self.caption_mask[:,1:],axis = 1)     290        loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(onehot_word_logits,291                                                                  caption_ans,292                                                                  caption_ans_mask)293        294        self.cost = tf.reduce_mean(loss)295        #self.global_step = tf.Variable(0, trainable=False)296        self.train_op = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(self.cost, global_step=self.global_step)297        298299        config = tf.ConfigProto(log_device_placement = True)300        config.gpu_options.allow_growth = True301        302        self.sess = tf.Session(config=config)303304    def train(self, input_frame, input_caption, input_caption_mask, keep_prob=0.5, scheduled_sampling_prob=0.0):305        _,cost = self.sess.run([self.train_op,self.cost],feed_dict={self.frame:input_frame, 306                                                                    self.caption:input_caption, 307                                                                    self.caption_mask:input_caption_mask,308                                                                    self.train_state:True,309                                                                    self.scheduled_sampling_prob:scheduled_sampling_prob,310                                                                    self.keep_prob:keep_prob})311        return cost312   313    def predict(self, input_frame):314        padding = np.zeros([input_frame.shape[0], self.caption_steps + 1])315        words = self.sess.run([self.predict_result], feed_dict={self.frame: input_frame,316                                                                self.caption: padding,317                                                                self.train_state: False,318                                                                self.scheduled_sampling_prob: 1.0,319                                                                self.keep_prob: 1.0})320        return words321    def initialize(self):322        self.sess.run(tf.global_variables_initializer()) 323324325    def local_attention(self,decode_vec,encode_vecs,wp,vp,wa):326        327328        ## (batch_size,frame_step)329        score = self.align(decode_vec,encode_vecs,wa)330        ## (dim_hidden,batch_size)331        decode_vec_t = tf.transpose(decode_vec,[1,0])332        ## (1,batch_size)333        pos_feature = tf.matmul(vp,tf.tanh(tf.matmul(wp,decode_vec_t)))334        ## (1,batch_size)335        pt = tf.reshape(self.frame_steps*tf.sigmoid(pos_feature),[self.batch_size])336        local_center = tf.round(pt)337338        half_window = 2 #tf.constant(4,shape = [1])339        delta = half_window/2340        341        def index_frame(ele):342            frames,center,pt,score = ele343            s = tf.range(self.frame_steps,dtype = tf.float32)344            score = score*tf.exp(-tf.square(s-pt)/(2*delta*delta))345            right = tf.minimum(center+half_window,self.frame_steps)346            left = tf.maximum(center-half_window,0)347            right = tf.cast(right,tf.int32)348            left = tf.cast(left,tf.int32)349            score = tf.expand_dims(score,0)350            attention_vec = tf.matmul(score[:,left:right],frames[left:right,:])351            attention_vec = tf.reshape(attention_vec,[self.dim_hidden])352            return attention_vec353        ## (batch_size,dim_hidden)354        attention_vec = tf.map_fn(index_frame,[encode_vecs,local_center,pt,score],dtype=tf.float32)355        return attention_vec+decode_vec356                                357        358    359360    def align(self,decode_vec,encode_vecs,wa):361        ## (batch_size,dim_hidden,frame_step)362        encode_vecs_t = tf.transpose(encode_vecs,[0,2,1])363        ## (batch_size,1,dim_hidden)*(batch_size,dim_hidden,frame_step)364        score = tf.matmul(tf.expand_dims(tf.matmul(decode_vec,wa),1),encode_vecs_t)365        score = tf.reshape(score,[self.batch_size,self.frame_steps])366        ## (batch_size,frame_step):367        return score368369370    def saveModel(self,filepath):371        global_step = self.sess.run(self.global_step)372        saver = tf.train.Saver()373        saver.save(self.sess, './'+filepath+'_para/model_%d.ckpt' % (global_step))374        375    def loadModel(self, model_path):376        saver = tf.train.Saver(restore_sequentially=True)377        saver.restore(self.sess, model_path)378379380381class Effective_attention_model():382  383    def __init__(self,frame_steps=20, frame_feat_dim=4096, caption_steps=45, vocab_size=3000, dim_hidden=200):384        385        self.frame_steps = frame_steps386        self.frame_feat_dim = frame_feat_dim387        self.caption_steps = caption_steps388        self.vocab_size = vocab_size389        self.dim_hidden = dim_hidden390    391        ## Graph input392        self.frame = tf.placeholder(tf.float32, [None, frame_steps, frame_feat_dim])393        self.caption = tf.placeholder(tf.int64, [None,caption_steps+1])394        self.caption_mask = tf.placeholder(tf.float32, [None, caption_steps+1])395        batch_frame = tf.shape(self.frame)[0]396        batch_caption = tf.shape(self.caption)[0]397        tf.Assert(tf.equal(batch_frame, batch_caption), [batch_frame, batch_caption])398        self.batch_size = batch_frame399        self.train_state = tf.placeholder(tf.bool)400        self.scheduled_sampling_prob = tf.placeholder(401                tf.float32, [], name='scheduled_sampling_prob')402        self.keep_prob = tf.placeholder(tf.float32)403404        self.global_step = tf.Variable(0, trainable=False)405        ## frame Embedding param406        with tf.variable_scope("frame_embedding"):407            w_frame_embed = tf.get_variable("w_frame_embed", [frame_feat_dim, 2*dim_hidden], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))408            b_frame_embed = tf.get_variable("b_frame_embed", [2*dim_hidden], initializer=tf.constant_initializer(0.0))409        410        ## word embedding param411        with tf.device("/cpu:0"):412            embedding = tf.get_variable("embedding", [vocab_size, dim_hidden], dtype=tf.float32)413        414        ## word embedding to onehot param415        w_word_onehot = tf.get_variable("w_word_onehot", [dim_hidden, vocab_size], initializer=tf.contrib.layers.xavier_initializer(dtype=tf.float32))416        b_word_onehot = tf.get_variable("b_word_onehot", [vocab_size], initializer=tf.constant_initializer(0.0))417        418        ## attention_position_embedding419        wp = tf.get_variable("w_position_emb_1", [self.dim_hidden,self.dim_hidden], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))420        vp = tf.get_variable("w_position_emb_2", [1,self.dim_hidden], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))421422        ## attention_align_embedding423        wa = tf.get_variable("w_align_emb",[self.dim_hidden,self.dim_hidden],initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))        424425        ## attention_align_embedding426        wc = tf.get_variable("w_attention_emb",[2*self.dim_hidden,self.dim_hidden],initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))        427428429        ## two lstm param430        with tf.variable_scope("att_lstm"):431            att_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)432            att_lstm = tf.contrib.rnn.DropoutWrapper(att_lstm,input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob)433        with tf.variable_scope("cap_lstm"):434            cap_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)        435            cap_lstm = tf.contrib.rnn.DropoutWrapper(cap_lstm,input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob)                436        437        att_state = (tf.zeros([self.batch_size, dim_hidden]),tf.zeros([self.batch_size, dim_hidden]))438        cap_state = (tf.zeros([self.batch_size, dim_hidden]),tf.zeros([self.batch_size, dim_hidden]))439        440        padding = tf.zeros([self.batch_size, dim_hidden])441        442        ##################### Computing Graph ########################443        444        frame_flat = tf.reshape(self.frame, [-1, frame_feat_dim])445        frame_embedding = tf.nn.xw_plus_b( frame_flat, w_frame_embed, b_frame_embed )446        frame_embedding = tf.reshape(frame_embedding, [self.batch_size, frame_steps, 2*dim_hidden])        447        448        enc_lstm_outputs = []449        dec_lstm_outputs = []450        ## Encoding stage451        for i in range(frame_steps):452453            with tf.variable_scope('att_lstm'):454                if i > 0:455                    tf.get_variable_scope().reuse_variables()456                output1, att_state = att_lstm(frame_embedding[:,i,:], att_state)457            ##input shape of cap_lstm2: [batch_size, 2*dim_hidden]458            with tf.variable_scope('cap_lstm'):459                if i > 0:460                    tf.get_variable_scope().reuse_variables()461                output2, cap_state = cap_lstm(output1, cap_state)462            enc_lstm_outputs.append(output2)463        464        ## (batch_size,frame_step,dim_hidden)465        enc_lstm_outputs = tf.reshape(tf.concat(enc_lstm_outputs , 1),[self.batch_size,self.frame_steps,self.dim_hidden])466        467        ## Decoding stage468        ## Training util469        def train_cap(input_lstm,real_ans,prev_decoder_output,prev_attention_output,global_step,prev_state):470            471           with tf.device('cpu:0'):472              word_index = tf.cond(self.scheduled_sampling_prob >= tf.random_uniform([], 0, 1),473                                lambda:real_ans,474                                lambda:tf.argmax(prev_decoder_output, axis=1))475              #word_index = tf.argmax(real_ans, axis=1)476              word_embed = tf.nn.embedding_lookup(embedding, word_index)      477           output, state = input_lstm(tf.concat([word_embed, prev_attention_output], 1), prev_state)478           m_state, c_state = state479           return output, m_state, c_state480        def test_cap(input_lstm, prev_decoder_output, prev_attention_output,prev_state):481            ##  TODO: beam search482            with tf.device('cpu:0'):483                word_index = tf.argmax(prev_decoder_output, axis=1)484                word_embed = tf.nn.embedding_lookup(embedding, word_index)485                output, state = input_lstm(486                    tf.concat([word_embed,prev_attention_output], 1), prev_state)487                m_state, c_state = state488            return output, m_state, c_state489        prev_step_word = tf.tile(tf.one_hot([4], vocab_size), [self.batch_size, 1])490        attention_output = tf.zeros(shape = [self.batch_size,dim_hidden])491        ## Decoding stage492        for i in range(caption_steps):493            494            with tf.variable_scope('att_lstm'):495                tf.get_variable_scope().reuse_variables()496                output1, m_state, c_state = tf.cond(self.train_state, lambda: train_cap(att_lstm, self.caption[:,i],prev_step_word,attention_output,self.global_step,att_state), lambda: test_cap(att_lstm, prev_step_word,attention_output,att_state))497                att_state = (m_state, c_state)498                        499            with tf.variable_scope('cap_lstm'):500                tf.get_variable_scope().reuse_variables()501                output2, cap_state = cap_lstm(output1,cap_state)502            ## Attention503            attention_output = self.global_attention(output2,enc_lstm_outputs,wa)504            #attention_output = self.local_attention(output2,enc_lstm_outputs,wp,vp,wa)505            concat_output = tf.concat([attention_output,output2] , 1)506            attention_output = tf.tanh(tf.matmul(concat_output,wc))  507            prev_step_word = tf.nn.xw_plus_b(attention_output, w_word_onehot, b_word_onehot)508            dec_lstm_outputs.append(prev_step_word)509510        onehot_word_logits = tf.reshape(tf.concat(dec_lstm_outputs , 1), [-1,vocab_size])511        512        self.predict_result = tf.reshape(tf.argmax(onehot_word_logits[:,2:], 1)+2, [self.batch_size, caption_steps])513        514        onehot_word_logits = tf.unstack(tf.reshape(onehot_word_logits,[self.batch_size,caption_steps,vocab_size]),axis = 1)515        516        caption_ans = tf.unstack(self.caption[:,1:],axis = 1) 517  518        caption_ans_mask = tf.unstack(self.caption_mask[:,1:],axis = 1)     519        loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(onehot_word_logits,520                                                                  caption_ans,521                                                                  caption_ans_mask)522        523        self.cost = tf.reduce_mean(loss)524        #self.global_step = tf.Variable(0, trainable=False)525        self.train_op = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(self.cost, global_step=self.global_step)526        527528        config = tf.ConfigProto(log_device_placement = True)529        config.gpu_options.allow_growth = True530        531        self.sess = tf.Session(config=config)532533    def train(self, input_frame, input_caption,input_caption_mask, keep_prob=0.5, scheduled_sampling_prob=0.0):534        _,cost = self.sess.run([self.train_op,self.cost],feed_dict={self.frame:input_frame, 535                                                                    self.caption:input_caption, 536                                                                    self.caption_mask:input_caption_mask,537                                                                    self.train_state:True,538                                                                    self.scheduled_sampling_prob:scheduled_sampling_prob,539                                                                    self.keep_prob:keep_prob})540        return cost541   542    def predict(self, input_frame):543        padding = np.zeros([input_frame.shape[0], self.caption_steps + 1])544        words = self.sess.run([self.predict_result], feed_dict={self.frame: input_frame,545                                                                self.caption: padding,546                                                                self.train_state: False,547                                                                self.scheduled_sampling_prob:1.0,548                                                                self.keep_prob:1.0})549        return words550    def initialize(self):551        self.sess.run(tf.global_variables_initializer()) 552553554555    def global_attention(self,decode_vec,encode_vecs,wa):556        ## (batch_size,frame_step)557        score = tf.nn.softmax(self.score(decode_vec,encode_vecs,wa))558        attention_vec = tf.reduce_sum(encode_vecs*tf.tile(tf.expand_dims(score,2),[1,1,self.dim_hidden]),1  )559        560        return attention_vec561    def local_attention(self,decode_vec,encode_vecs,wp,vp,wa):562563        ## (batch_size,frame_step)564        score = self.score(decode_vec,encode_vecs,wa)565        ## (dim_hidden,batch_size)566        decode_vec_t = tf.transpose(decode_vec,[1,0])567        ## (1,batch_size)568        pos_feature = tf.matmul(vp,tf.tanh(tf.matmul(wp,decode_vec_t)))569        ## (1,batch_size)570        pt = tf.reshape(self.frame_steps*tf.sigmoid(pos_feature),[self.batch_size])571        local_center = tf.round(pt)572573        half_window = 2 #tf.constant(4,shape = [1])574        delta = half_window/2575        576        def index_frame(ele):577            frames_ind,center_ind,pt_ind,score_ind = ele578            right = tf.minimum(center_ind+half_window+1,self.frame_steps)579            left = tf.maximum(center_ind-half_window,0)580            right = tf.cast(right,tf.int32)581            left = tf.cast(left,tf.int32)582            frames_ind = frames_ind[left:right,:]583            score_ind  = tf.nn.softmax(score_ind[left:right])584            s = tf.range(self.frame_steps,dtype = tf.float32)585            s = s[left:right]586            score_ind =  score_ind*tf.exp(-tf.square(s-pt_ind)/(2*delta*delta))587            score_ind = tf.expand_dims(score_ind,0)588            attention_vec = tf.matmul(score_ind,frames_ind)589            attention_vec = tf.reshape(attention_vec,[self.dim_hidden])590            return attention_vec591        ## (batch_size,dim_hidden)592        attention_vec = tf.map_fn(index_frame,[encode_vecs,local_center,pt,score],dtype=tf.float32)593        return attention_vec594                                595    def saveModel(self,filepath):596        global_step = self.sess.run(self.global_step)597        saver = tf.train.Saver()598        saver.save(self.sess, './'+filepath+'_para/model_%d.ckpt' % (global_step))599        600    def loadModel(self, model_path):601        saver = tf.train.Saver(restore_sequentially=True)602        saver.restore(self.sess, model_path)603    604605    def score(self,decode_vec,encode_vecs,wa):606        ## (batch_size,dim_hidden,frame_step)607        encode_vecs_t = tf.transpose(encode_vecs,[0,2,1])608        ## (batch_size,1,dim_hidden)*(batch_size,dim_hidden,frame_step)609        score = tf.matmul(tf.expand_dims(tf.matmul(decode_vec,wa),1),encode_vecs_t)610        score = tf.reshape(score,[self.batch_size,self.frame_steps])611        ## (batch_size,frame_step)612        613        return score614"""615class Adversary_S2VT_model():616617     def __init__(self,frame_steps=20, frame_feat_dim=4096, caption_steps=45, vocab_size=3000, dim_hidden=200):618        self.frame_steps = frame_steps619        self.frame_feat_dim = frame_feat_dim620        self.caption_steps = caption_steps621        self.vocab_size = vocab_size622        self.dim_hidden = dim_hidden623        624        ## Graph input625    626        self.frame = tf.placeholder(tf.float32, [None, frame_steps, frame_feat_dim])    627        self.caption = tf.placeholder(tf.int64, [None,caption_steps+1])628        self.caption_mask = tf.placeholder(tf.float32, [None, caption_steps+1])629        self.scheduled_sampling_prob = tf.placeholder(630            tf.float32, [], name='scheduled_sampling_prob')631        batch_frame = tf.shape(self.frame)[0]632        batch_caption = tf.shape(self.caption)[0]633        tf.Assert(tf.equal(batch_frame, batch_caption), [batch_frame, batch_caption])634        self.batch_size = batch_frame635        self.train_state = tf.placeholder(tf.bool)636        self.keep_prob = tf.placeholder(tf.float32)637        638        self.global_step = tf.Variable(0, trainable=False)639        ## frame Embedding param 640        with tf.variable_scope("frame_embedding"):641            w_frame_embed = tf.get_variable("w_frame_embed", [frame_feat_dim, dim_hidden], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))642            b_frame_embed = tf.get_variable("b_frame_embed", [dim_hidden], initializer=tf.constant_initializer(0.0))643644        ## frame Embedding param 645        with tf.variable_scope("reframe_embedding"):646            w_reframe_embed = tf.get_variable("w_reframe_embed", [dim_hidden,frame_feat_dim], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))647            b_reframe_embed = tf.get_variable("b_reframe_embed", [frame_feat_dim], initializer=tf.constant_initializer(0.0))648       649650        ## word embedding param651        with tf.device("/cpu:0"):652            embedding = tf.get_variable("embedding", [vocab_size, dim_hidden], dtype=tf.float32)653        654        ## word embedding to onehot param655        w_word_onehot = tf.get_variable("w_word_onehot", [dim_hidden, vocab_size], initializer=tf.contrib.layers.xavier_initializer(dtype=tf.float32))656        b_word_onehot = tf.get_variable("b_word_onehot", [vocab_size], initializer=tf.constant_initializer(0.0))657        ## two lstm param658        with tf.variable_scope("att_lstm"):659            att_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)660            att_lstm = tf.contrib.rnn.DropoutWrapper(att_lstm,input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob)661        with tf.variable_scope("cap_lstm"):662            cap_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)663            cap_lstm = tf.contrib.rnn.DropoutWrapper(cap_lstm,input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob)            664        665        att_state = (tf.zeros([self.batch_size, dim_hidden]),tf.zeros([self.batch_size, dim_hidden]))666        cap_state = (tf.zeros([self.batch_size, dim_hidden]),tf.zeros([self.batch_size, dim_hidden]))667        668        padding = tf.nn.embedding_lookup(embedding,tf.zeros(shape = [self.batch_size],dtype=tf.int32))669        670        ##################### Computing Graph ########################671        672        frame_flat = tf.reshape(self.frame, [-1, frame_feat_dim])673        frame_embedding = tf.nn.xw_plus_b( frame_flat, w_frame_embed, b_frame_embed )674        frame_embedding = tf.reshape(frame_embedding, [self.batch_size, frame_steps, dim_hidden])        675        676        enc_lstm_outputs = []677        dec_lstm_outputs = []678        ## Encoding stage679        for i in range(frame_steps):680681            with tf.variable_scope('att_lstm'):682                if i > 0:683                    tf.get_variable_scope().reuse_variables()684                output1, att_state = att_lstm(frame_embedding[:,i,:], att_state)685            ##input shape of cap_lstm2: [batch_size, 2*dim_hidden]686            with tf.variable_scope('cap_lstm'):687                if i > 0:688                    tf.get_variable_scope().reuse_variables()689                #tf.get_variable_scope().reuse_variables()690                output2, cap_state = cap_lstm(tf.concat([padding, output1], 1), cap_state)691            enc_lstm_outputs.append(output2)692        693        ## (batch_size,frame_step,dim_hidden)694        enc_lstm_outputs = tf.reshape(tf.concat(enc_lstm_outputs , 1),[self.batch_size,self.frame_steps,self.dim_hidden])695        696        ## Decoding stage697        ## Training util698        def train_cap(input_lstm,prev_endcoder_output,real_ans,prev_decoder_output,global_step,prev_state):699           word_index = tf.cond(self.scheduled_sampling_prob >= tf.random_uniform([], 0, 1),700                                lambda: real_ans,701                                lambda: tf.argmax(prev_decoder_output, axis=1))702           with tf.device('/cpu:0'):703                word_embed = tf.nn.embedding_lookup(embedding,word_index)704                output, state = input_lstm(705                    tf.concat([word_embed, prev_endcoder_output], 1), prev_state)706                m_state, c_state = state707           return output, m_state, c_state708        def test_cap(input_lstm,prev_encoder_output, prev_decoder_output, prev_state):709            ##  TODO: beam search710            with tf.device('cpu:0'):711                word_index = tf.argmax(prev_decoder_output, axis=1)712                word_embed = tf.nn.embedding_lookup(embedding, word_index)713                output, state = input_lstm(714                    tf.concat([word_embed, prev_encoder_output], 1), prev_state)715                m_state, c_state = state716            return output, m_state, c_state717        ## Decoding stage718        prev_step_word = tf.tile(tf.one_hot([4], vocab_size), [self.batch_size, 1])719        for i in range(caption_steps):720            721            with tf.variable_scope('att_lstm'):722                tf.get_variable_scope().reuse_variables()723                output1, att_state = att_lstm(padding, att_state)724                        725            with tf.variable_scope('cap_lstm'):726                tf.get_variable_scope().reuse_variables()727                output2, m_state, c_state = tf.cond(self.train_state, lambda: train_cap(728                    cap_lstm, output1, self.caption[:, i], prev_step_word, self.global_step, cap_state), lambda: test_cap(cap_lstm, output1, prev_step_word, cap_state))729                cap_state = (m_state, c_state)730                prev_step_word = tf.nn.xw_plus_b(output2, w_word_onehot, b_word_onehot)731            dec_lstm_outputs.append(prev_step_word)732        733        onehot_word_logits = tf.reshape(tf.concat(dec_lstm_outputs , 1), [-1,vocab_size])734        735        self.predict_result = tf.reshape(tf.argmax(onehot_word_logits[:,2:], 1)+2, [self.batch_size, caption_steps])736        737        onehot_word_logits = tf.unstack(tf.reshape(onehot_word_logits,[self.batch_size,caption_steps,vocab_size]),axis = 1)738        739        caption_ans = tf.unstack(self.caption[:,1:],axis = 1) 740  741        caption_ans_mask = tf.unstack(self.caption_mask[:,1:],axis = 1)     742        caption_loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(onehot_word_logits,743                                                                  caption_ans,744                                                                  caption_ans_mask)745        746747        #################### second stage #######################748        749        with tf.variable_scope("second_att_lstm"):750            second_att_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)751            second_att_lstm = tf.contrib.rnn.DropoutWrapper(second_att_lstm,input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob)752        with tf.variable_scope("second_cap_lstm"):753            second_cap_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)754            second_cap_lstm = tf.contrib.rnn.DropoutWrapper(second_att_lstm,input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob)  755        second_att_state = (tf.zeros([self.batch_size, dim_hidden]),tf.zeros([self.batch_size, dim_hidden]))756        second_cap_state = (tf.zeros([self.batch_size, dim_hidden]),tf.zeros([self.batch_size, dim_hidden]))757        758        ##################### Computing Graph ########################759        760        frame_flat = tf.reshape(self.frame, [-1, frame_feat_dim])761        frame_embedding = tf.nn.xw_plus_b( frame_flat, w_frame_embed, b_frame_embed )762        frame_embedding = tf.reshape(frame_embedding, [self.batch_size, frame_steps, dim_hidden])        763        764        second_enc_lstm_outputs = []765        second_dec_lstm_outputs = []766        ## Encoding stage767        for i in range(caption_steps):768769            with tf.variable_scope('second_att_lstm'):770                if i > 0:771                    tf.get_variable_scope().reuse_variables()772                word_index = tf.cond(self.scheduled_sampling_prob >= tf.random_uniform([], 0, 1),773                                lambda: self.caption[:, i+1],774                                lambda: tf.argmax(dec_lstm_outputs[i], axis=1)*tf.cast(self.caption_mask[:,i+1],tf.int64))775                with tf.device('/cpu:0'):776                    word_embed = tf.nn.embedding_lookup(embedding,word_index)777                    output1, second_att_state = second_att_lstm(word_embed, second_att_state)778            ##input shape of cap_lstm2: [batch_size, 2*dim_hidden]779            with tf.variable_scope('second_cap_lstm'):780                if i > 0:781                    tf.get_variable_scope().reuse_variables()782                output2, cap_state = cap_lstm(tf.concat([padding, output1], 1), second_cap_state)783            second_enc_lstm_outputs.append(output2)784        785        ## second_Decoding stage786        prev_step_word = second_enc_lstm_outputs[-1]787        for i in range(self.frame_steps):788            with tf.variable_scope('second_att_lstm'):789                tf.get_variable_scope().reuse_variables()790                output1, second_att_state = second_att_lstm(padding, second_att_state)791                        792            with tf.variable_scope('second_cap_lstm'):793                tf.get_variable_scope().reuse_variables()794                output2, second_cap_state = second_cap_lstm(tf.concat([prev_step_word,output1],1),second_cap_state)795                prev_step_word = tf.nn.xw_plus_b(self.frame[:,i,:],w_frame_embed,b_frame_embed)796            second_dec_lstm_outputs.append(tf.nn.xw_plus_b(output2,w_reframe_embed,b_reframe_embed))797798        second_dec_lstm_outputs = tf.reshape(tf.concat(second_dec_lstm_outputs , 1),[-1,frame_feat_dim])799        frame_loss = tf.reduce_mean(tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(second_dec_lstm_outputs, frame_flat)),-1)))800801802803        ratio = 0.7804        self.cost = ratio*tf.reduce_mean(caption_loss)+(1-ratio)*frame_loss805        #self.global_step = tf.Variable(0, trainable=False)806        self.train_op = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(self.cost, global_step=self.global_step)807        self.train_conv_op = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(self.cost, var_list=[w_frame_embed,b_frame_embed,w_reframe_embed,b_reframe_embed,embedding,w_word_onehot,b_word_onehot],global_step=self.global_step) 808809        config = tf.ConfigProto(log_device_placement = True)810        config.gpu_options.allow_growth = True811        812        self.sess = tf.Session(config=config)813814     def train(self, input_frame, input_caption, input_caption_mask, keep_prob=0.5, scheduled_sampling_prob=0.0):815        if scheduled_sampling_prob == 1:816            _,cost = self.sess.run([self.train_conv_op,self.cost],feed_dict={self.frame:input_frame, 817                                                                        self.caption:input_caption, 818                                                                        self.caption_mask:input_caption_mask,819                                                                        self.train_state:True,820                                                                        self.scheduled_sampling_prob:scheduled_sampling_prob,821                                                                        self.keep_prob:keep_prob})822        else:823            _,cost = self.sess.run([self.train_op,self.cost],feed_dict={self.frame:input_frame, 824                                                                        self.caption:input_caption, 825                                                                        self.caption_mask:input_caption_mask,826                                                                        self.train_state:True,827                                                                        self.scheduled_sampling_prob:scheduled_sampling_prob,828                                                                        self.keep_prob:keep_prob})829                                        830        return cost831   832     def predict(self, input_frame):833        padding = np.zeros([input_frame.shape[0], self.caption_steps + 1])834        words = self.sess.run([self.predict_result], feed_dict={self.frame: input_frame,835                                                                self.caption: padding,836                                                                self.train_state: False,837                                                                self.scheduled_sampling_prob: 1.0,838                                                                self.keep_prob: 1.0})839        return words840     def initialize(self):841        self.sess.run(tf.global_variables_initializer()) 842843     def saveModel(self,filepath):844        global_step = self.sess.run(self.global_step)845        saver = tf.train.Saver()846        saver.save(self.sess, './'+filepath+'_para/model_%d.ckpt' % (global_step))847        848     def loadModel(self, model_path):849        saver = tf.train.Saver(restore_sequentially=True)850        saver.restore(self.sess, model_path)
...layers.py
Source:layers.py  
1import tensorflow as tf2import numpy as np3def lrelu(x):4    return tf.maximum(x*0.2,x)5def upsample_and_concat_c(x1, x2, output_channels, in_channels, scope,reuse=False):6    with tf.variable_scope(scope,reuse=reuse):7        pool_size = 28        deconv_filter = tf.get_variable(shape= [pool_size, pool_size, output_channels, in_channels],initializer=tf.truncated_normal_initializer(stddev=0.02),name='dcf')9        deconv = tf.nn.conv2d_transpose(x1, deconv_filter, tf.shape(x2) , strides=[1, pool_size, pool_size, 1] )10        deconv_output =  tf.concat([deconv, x2],3)11   #     deconv_output.set_shape([None, None, None, output_channels*2])12    return deconv_output13def affine_mapping(x,in_chan,out_chan,scope,reuse=False):14    with tf.variable_scope(scope,reuse=reuse):15        mapping=tf.get_variable(shape=[in_chan+1,out_chan],initializer=tf.truncated_normal_initializer(0.0,1.0),dtype=tf.float32,name='mapping')16        x_pixels = tf.reshape(x, [-1, in_chan])17        bias = tf.ones_like(x_pixels[:,0:1])18        x_pixels = tf.concat([x_pixels,bias],1)19        x_pixels = tf.matmul(x_pixels, mapping)20        shape = tf.shape(x)21    return tf.reshape(x_pixels, [shape[0],shape[1],shape[2],out_chan])22def coeff_estimate(x,chan,scope,reuse=False):23    with tf.variable_scope(scope,reuse=reuse):24        shape = tf.shape(x)25        conv0 = slim.conv2d(current,chan,[3,3], activation_fn=lrelu,scope='g_conv0',reuse=reuse)26        pool0=slim.max_pool2d(conv0, [2, 2], padding='SAME',scope='pool0')27        conv1 = slim.conv2d(pool0,chan,[3,3], activation_fn=lrelu,scope='g_conv1',reuse=reuse)28        pool1=slim.max_pool2d(conv1, [2, 2], padding='SAME',scope='pool1')29        conv2 = slim.conv2d(pool1,chan,[3,3], activation_fn=lrelu,scope='g_conv2',reuse=reuse)30        pool2=slim.max_pool2d(conv2, [2, 2], padding='SAME',scope='pool2')31        conv3 = slim.conv2d(pool3,chan,[3,3], activation_fn=lrelu,scope='conv0',reuse=reuse)32        conv3 = slim.conv2d(conv3,chan,[3,3], activation_fn=lrelu,scope='conv1',reuse=reuse)33        dconv2 =  upsample_and_concat_c( conv3, conv2, chan, chan, scope='uac2',reuse=reuse )	34        dconv2 = slim.conv2d(dconv2,chan,[3,3], activation_fn=lrelu,scope='d_conv2',reuse=reuse)35        dconv1 =  upsample_and_concat_c( dconv2, conv1, chan, chan, scope='uac1',reuse=reuse )	36        dconv1 = slim.conv2d(dconv1,chan,[3,3], activation_fn=lrelu,scope='d_conv1',reuse=reuse)37        dconv0 =  upsample_and_concat_c( dconv1, conv0, chan, chan, scope='uac0',reuse=reuse )	38        dconv0 = slim.conv2d(dconv0,chan,[3,3], activation_fn=lrelu,scope='d_conv0',reuse=reuse)39    return dconv040'''41def perpixel_conv(fp,coef,chan,scope,reuse=False):42    with tf.variable_scope(scope,reuse=reuse):43	padding = 144	paddings = [[0,0],[padding,padding],[padding,padding],[0,0]]45	fp = tf.pad(fp,paddings,name='pad')46	result0 = fp[:,0:-2,0:-2,:]*coef[:,:,:,:chan]47	result1 = fp[:,1:-1,0:-2,:]*coef[:,:,:,chan:chan*2]48	result2 = fp[:,2:,0:-2,:]*coef[:,:,:,chan*2:chan*3]49	result3 = fp[:,0:-2,1:-1,:]*coef[:,:,:,chan*3:chan*4]50	result4 = fp[:,1:-1,1:-1,:]*coef[:,:,:,chan*4:chan*5]51	result5 = fp[:,2:,1:-1,:]*coef[:,:,:,chan*5:chan*6]52	result6 = fp[:,0:-2,2:,:]*coef[:,:,:,chan*6:chan*7]53	result7 = fp[:,1:-1,2:,:]*coef[:,:,:,chan*7:chan*8]54	result8 = fp[:,2:,2:,:]*coef[:,:,:,chan*8:chan*9]55	result = result0 + result1 + result2 + result3 + result4 + result5 + result6 + result7 + result8 + coef[:,:,:,chan*9:chan*10]56    return lrelu(result)57'''58def perpixel_affine(fp,coef,chan,scope,reuse=False):59    with tf.variable_scope(scope,reuse=reuse):60        padding = 161        paddings = [[0,0],[padding,padding],[padding,padding],[0,0]]62        fp = tf.pad(fp,paddings,name='pad')63        result0 = fp[:,0:-2,0:-2,:]*coef[:,:,:,:chan]64        result1 = fp[:,1:-1,0:-2,:]*coef[:,:,:,chan:chan*2]65        result2 = fp[:,2:,0:-2,:]*coef[:,:,:,chan*2:chan*3]66        result3 = fp[:,0:-2,1:-1,:]*coef[:,:,:,chan*3:chan*4]67        result4 = fp[:,1:-1,1:-1,:]*coef[:,:,:,chan*4:chan*5]68        result5 = fp[:,2:,1:-1,:]*coef[:,:,:,chan*5:chan*6]69        result6 = fp[:,0:-2,2:,:]*coef[:,:,:,chan*6:chan*7]70        result7 = fp[:,1:-1,2:,:]*coef[:,:,:,chan*7:chan*8]71        result8 = fp[:,2:,2:,:]*coef[:,:,:,chan*8:chan*9]72        result = result0 + result1 + result2 + result3 + result4 + result5 + result6 + result7 + result8 + coef[:,:,:,chan*9:chan*10]73    return lrelu(result)74def guided_upsampling(input_ftmp,guide_ftmp):75    # input_ftmp must be a [Batch,H,W,Intensity,Channel] shaped feature map76    # guide_ftmp must be a [Batch,H*factor,W*factor,1] shaped feature map77    def get_pixel_value(img, x, y, z):78        ## Getting parameters79        batch_size = tf.shape(img)[0]80        height = tf.shape(x)[0]81        width = tf.shape(x)[1]82        ## Preprocessing83        x = tf.cast(x,dtype=tf.int32)84        y = tf.cast(y,dtype=tf.int32)85        z = tf.cast(z,dtype=tf.int32)86        x = tf.expand_dims(x,0)87        y = tf.expand_dims(y,0)88        z = tf.expand_dims(z,0)89        x = tf.tile(x,[batch_size,1,1])90        y = tf.tile(y,[batch_size,1,1]) # x,y,z = [b,h,w]91        z = tf.tile(z,[batch_size,1,1])92        # Then b93        batch_idx = tf.range(0, batch_size)94        batch_idx = tf.reshape(batch_idx, (batch_size,1,1))95        b = tf.tile(batch_idx, (1, height, width)) # b = [b,h,w]96    97        indices = tf.stack([b, y, x, z], 3)   98    return tf.gather_nd(img, indices)99    100    ##### Do the job101    shape = tf.shape(input_ftmp)102    height = shape[1]103    width = shape[2]104    intensity = shape[3]105    height_s,width_s,intensity_s = tf.cast(height,dtype=tf.float32),tf.cast(width,dtype=tf.float32),tf.cast(intensity,dtype=tf.float32)106    new_shape = tf.shape(guide_ftmp)107    new_height = new_shape[1]108    new_width = new_shape[2]109    # create meshgrid110    x = tf.linspace(0.0, 1.0, new_width)     111    y = tf.linspace(0.0, 1.0, new_height)112    x_t, y_t = tf.meshgrid(x, y)113    z_t = guide_ftmp[0,:,:,0]114    # Transform the coords115    x_te = x_t*(width_s-1.0)116    y_te = y_t*(height_s-1.0)117    z_te = z_t*(intensity_s-1.0)118    # 8 neighborhood119    x0 = tf.floor(x_te)120    x1 = x0 + 1.0121    y0 = tf.floor(y_te)122    y1 = y0 + 1.0123    z0 = tf.floor(z_te)124    z1 = z0 + 1.0125    x0 = tf.clip_by_value(x0, 0.0, width_s-1.0)126    x1 = tf.clip_by_value(x1, 0.0, width_s-1.0)127    y0 = tf.clip_by_value(y0, 0.0, height_s-1.0)128    y1 = tf.clip_by_value(y1, 0.0, height_s-1.0)129    z0 = tf.clip_by_value(z0, 0.0, intensity_s-1.0)130    z1 = tf.clip_by_value(z1, 0.0, intensity_s-1.0)131    Ia = get_pixel_value(input_ftmp, x0, y0,z0)132    Ib = get_pixel_value(input_ftmp, x0, y0,z1)133    Ic = get_pixel_value(input_ftmp, x1, y0,z0)134    Id = get_pixel_value(input_ftmp, x1, y0,z1)135    Ie = get_pixel_value(input_ftmp, x0, y1,z0)136    If = get_pixel_value(input_ftmp, x0, y1,z1)137    Ig = get_pixel_value(input_ftmp, x1, y1,z0)138    Ih = get_pixel_value(input_ftmp, x1, y1,z1)139    wa = tf.maximum(1.0-tf.abs(x0-x_te),0.0) * tf.maximum(1.0-tf.abs(y0-y_te),0.0) * tf.maximum(1.0-tf.abs(z0-z_te),0.0)140    wb = tf.maximum(1.0-tf.abs(x0-x_te),0.0) * tf.maximum(1.0-tf.abs(y0-y_te),0.0) * tf.maximum(1.0-tf.abs(z1-z_te),0.0)141    wc = tf.maximum(1.0-tf.abs(x1-x_te),0.0) * tf.maximum(1.0-tf.abs(y0-y_te),0.0) * tf.maximum(1.0-tf.abs(z0-z_te),0.0)142    wd = tf.maximum(1.0-tf.abs(x1-x_te),0.0) * tf.maximum(1.0-tf.abs(y0-y_te),0.0) * tf.maximum(1.0-tf.abs(z1-z_te),0.0)143    we = tf.maximum(1.0-tf.abs(x0-x_te),0.0) * tf.maximum(1.0-tf.abs(y1-y_te),0.0) * tf.maximum(1.0-tf.abs(z0-z_te),0.0)144    wf = tf.maximum(1.0-tf.abs(x0-x_te),0.0) * tf.maximum(1.0-tf.abs(y1-y_te),0.0) * tf.maximum(1.0-tf.abs(z1-z_te),0.0)145    wg = tf.maximum(1.0-tf.abs(x1-x_te),0.0) * tf.maximum(1.0-tf.abs(y1-y_te),0.0) * tf.maximum(1.0-tf.abs(z0-z_te),0.0)146    wh = tf.maximum(1.0-tf.abs(x1-x_te),0.0) * tf.maximum(1.0-tf.abs(y1-y_te),0.0) * tf.maximum(1.0-tf.abs(z1-z_te),0.0)147    wa = tf.expand_dims(tf.expand_dims(wa, axis=0),3)148    wb = tf.expand_dims(tf.expand_dims(wb, axis=0),3)149    wc = tf.expand_dims(tf.expand_dims(wc, axis=0),3)150    wd = tf.expand_dims(tf.expand_dims(wd, axis=0),3)151    we = tf.expand_dims(tf.expand_dims(we, axis=0),3)152    wf = tf.expand_dims(tf.expand_dims(wf, axis=0),3)153    wg = tf.expand_dims(tf.expand_dims(wg, axis=0),3)154    wh = tf.expand_dims(tf.expand_dims(wh, axis=0),3)155    out = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id, we*Ie, wf*If, wg*Ig, wh*Ih])156    return out157def gaussian_func(x1,x2,sigma):158    return tf.exp(-1.0*((x1-x2)**2.0)/(2.0*(sigma**2.0)))159def bilateral_joint_upsampling(input_ftmp,guide_ftmp,factor_g=0.2,factor_s=1.0,scope=None,reuse=False):160    with tf.variable_scope(scope,reuse=reuse):161        shape = tf.shape(input_ftmp)162        batchsize = shape[0]163        height = shape[1]164        width = shape[2]165        height_s,width_s = tf.cast(height,dtype=tf.float32),tf.cast(width,dtype=tf.float32)166        new_shape = tf.shape(guide_ftmp)167        new_height = new_shape[1]168        new_width = new_shape[2]169        new_height_s,new_width_s = tf.cast(new_height,dtype=tf.float32),tf.cast(new_width,dtype=tf.float32)170        x = tf.linspace(0.0, 1.0, new_width)     171        y = tf.linspace(0.0, 1.0, new_height)172        xt, yt = tf.meshgrid(x, y)173        xt = tf.tile(tf.expand_dims(tf.expand_dims(xt,0),3),[batchsize,1,1,1])174        yt = tf.tile(tf.expand_dims(tf.expand_dims(yt,0),3),[batchsize,1,1,1])175    176        ## Spatial 177        xd = tf.clip_by_value((width_s-1.0)*xt, 0.0, width_s-1.0)178        yd = tf.clip_by_value((height_s-1.0)*yt, 0.0, height_s-1.0)179        xd0 = tf.floor(xd)180        xd1 = xd0 + 1.0181        yd0 = tf.floor(yd)182        yd1 = yd0 + 1.0183        xd0 = tf.clip_by_value(xd0,0.0, width_s-1.0)184        xd1 = tf.clip_by_value(xd1,0.0, width_s-1.0)185        yd0 = tf.clip_by_value(yd0,0.0, height_s-1.0)186        yd1 = tf.clip_by_value(yd1,0.0, height_s-1.0)187        batch_idx = tf.range(0, batchsize)188        batch_idx = tf.reshape(batch_idx, (batchsize,1,1,1))189        bd = tf.tile(batch_idx, (1, new_height, new_width,1))190    191        indices00 = tf.concat([bd,tf.cast(yd0,tf.int32),tf.cast(xd0,tf.int32)],3)192        indices01 = tf.concat([bd,tf.cast(yd0,tf.int32),tf.cast(xd1,tf.int32)],3)193        indices10 = tf.concat([bd,tf.cast(yd1,tf.int32),tf.cast(xd0,tf.int32)],3)194        indices11 = tf.concat([bd,tf.cast(yd1,tf.int32),tf.cast(xd1,tf.int32)],3)195    196        I00 = tf.gather_nd(input_ftmp, indices00)197        I01 = tf.gather_nd(input_ftmp, indices01)198        I10 = tf.gather_nd(input_ftmp, indices10)199        I11 = tf.gather_nd(input_ftmp, indices11)200    201        #ws00 = gaussian_func(yd0,yd,factor_s) * gaussian_func(xd0,xd,factor_s)202        #ws01 = gaussian_func(yd0,yd,factor_s) * gaussian_func(xd1,xd,factor_s) 203        #ws10 = gaussian_func(yd1,yd,factor_s) * gaussian_func(xd0,xd,factor_s)204        #ws11 = gaussian_func(yd1,yd,factor_s) * gaussian_func(xd1,xd,factor_s)205        ws00 = tf.maximum(1.0-factor_s*tf.abs(yd0-yd),0.0)*tf.maximum(1.0-factor_s*tf.abs(xd0-xd),0.0)206        ws01 = tf.maximum(1.0-factor_s*tf.abs(yd0-yd),0.0)*tf.maximum(1.0-factor_s*tf.abs(xd1-xd),0.0)207        ws10 = tf.maximum(1.0-factor_s*tf.abs(yd1-yd),0.0)*tf.maximum(1.0-factor_s*tf.abs(xd0-xd),0.0)208        ws11 = tf.maximum(1.0-factor_s*tf.abs(yd1-yd),0.0)*tf.maximum(1.0-factor_s*tf.abs(xd1-xd),0.0)209    210        ## Guide211        xu = tf.clip_by_value((new_width_s-1.0)*xt, 0.0, new_width_s-1.0)212        yu = tf.clip_by_value((new_height_s-1.0)*yt, 0.0, new_height_s-1.0)213        xu0 = tf.clip_by_value((new_width_s-1.0)*xd0/(width_s-1.0), 0.0, new_width_s-1.0)214        xu1 = tf.clip_by_value((new_width_s-1.0)*xd1/(width_s-1.0), 0.0, new_width_s-1.0)215        yu0 = tf.clip_by_value((new_height_s-1.0)*yd0/(height_s-1.0), 0.0, new_height_s-1.0)216        yu1 = tf.clip_by_value((new_height_s-1.0)*yd1/(height_s-1.0), 0.0, new_height_s-1.0)217        bu = tf.tile(batch_idx, (1, new_height, new_width,1))218    219        indices00 = tf.concat([bu,tf.cast(yu0,tf.int32),tf.cast(xu0,tf.int32)],3)220        indices01 = tf.concat([bu,tf.cast(yu0,tf.int32),tf.cast(xu1,tf.int32)],3)221        indices10 = tf.concat([bu,tf.cast(yu1,tf.int32),tf.cast(xu0,tf.int32)],3)222        indices11 = tf.concat([bu,tf.cast(yu1,tf.int32),tf.cast(xu1,tf.int32)],3)223        indicestt = tf.concat([bu,tf.cast(yu,tf.int32),tf.cast(xu,tf.int32)],3)224    225        guide00 = tf.gather_nd(guide_ftmp, indices00)226        guide01 = tf.gather_nd(guide_ftmp, indices01)227        guide10 = tf.gather_nd(guide_ftmp, indices10)228        guide11 = tf.gather_nd(guide_ftmp, indices11) 229        guidett = tf.gather_nd(guide_ftmp, indicestt) 230    231	#factor_g=tf.get_variable(shape=[],initializer=tf.constant_initializer(factor_g),dtype=tf.float32,name='factor_g')232        wg00 = gaussian_func(guide00,guidett,factor_g)  233        wg01 = gaussian_func(guide01,guidett,factor_g)   234        wg10 = gaussian_func(guide10,guidett,factor_g)  235        wg11 = gaussian_func(guide11,guidett,factor_g)236    237        ## Final mearged238        weight00 = ws00*wg00239        weight01 = ws01*wg01240        weight10 = ws10*wg10241        weight11 = ws11*wg11242        weight_sum = weight00 + weight01 + weight10 + weight11 + 0.01243        I00 = I00*weight00244        I01 = I01*weight01245        I10 = I10*weight10246        I11 = I11*weight11247    return tf.add_n([I00,I01,I10,I11])/weight_sum248def bilateral_joint_upsampling_lin(input_ftmp,guide_ftmp,factor_g=5.0,factor_s=1.0,scope=None,reuse=False):249    with tf.variable_scope(scope,reuse=reuse):250        shape = tf.shape(input_ftmp)251        batchsize = shape[0]252        height = shape[1]253        width = shape[2]254        height_s,width_s = tf.cast(height,dtype=tf.float32),tf.cast(width,dtype=tf.float32)255        new_shape = tf.shape(guide_ftmp)256        new_height = new_shape[1]257        new_width = new_shape[2]258        new_height_s,new_width_s = tf.cast(new_height,dtype=tf.float32),tf.cast(new_width,dtype=tf.float32)259        x = tf.linspace(0.0, 1.0, new_width)     260        y = tf.linspace(0.0, 1.0, new_height)261        xt, yt = tf.meshgrid(x, y)262        xt = tf.tile(tf.expand_dims(tf.expand_dims(xt,0),3),[batchsize,1,1,1])263        yt = tf.tile(tf.expand_dims(tf.expand_dims(yt,0),3),[batchsize,1,1,1])264    265        ## Spatial 266        xd = tf.clip_by_value((width_s-1.0)*xt, 0.0, width_s-1.0)267        yd = tf.clip_by_value((height_s-1.0)*yt, 0.0, height_s-1.0)268        xd0 = tf.floor(xd)269        xd1 = xd0 + 1.0270        yd0 = tf.floor(yd)271        yd1 = yd0 + 1.0272        xd0 = tf.clip_by_value(xd0,0.0, width_s-1.0)273        xd1 = tf.clip_by_value(xd1,0.0, width_s-1.0)274        yd0 = tf.clip_by_value(yd0,0.0, height_s-1.0)275        yd1 = tf.clip_by_value(yd1,0.0, height_s-1.0)276        batch_idx = tf.range(0, batchsize)277        batch_idx = tf.reshape(batch_idx, (batchsize,1,1,1))278        bd = tf.tile(batch_idx, (1, new_height, new_width,1))279    280        indices00 = tf.concat([bd,tf.cast(yd0,tf.int32),tf.cast(xd0,tf.int32)],3)281        indices01 = tf.concat([bd,tf.cast(yd0,tf.int32),tf.cast(xd1,tf.int32)],3)282        indices10 = tf.concat([bd,tf.cast(yd1,tf.int32),tf.cast(xd0,tf.int32)],3)283        indices11 = tf.concat([bd,tf.cast(yd1,tf.int32),tf.cast(xd1,tf.int32)],3)284    285        I00 = tf.gather_nd(input_ftmp, indices00)286        I01 = tf.gather_nd(input_ftmp, indices01)287        I10 = tf.gather_nd(input_ftmp, indices10)288        I11 = tf.gather_nd(input_ftmp, indices11)289    290        ws00 = tf.maximum(1.0-factor_s*tf.abs(yd0-yd),0.0)*tf.maximum(1.0-factor_s*tf.abs(xd0-xd),0.0)291        ws01 = tf.maximum(1.0-factor_s*tf.abs(yd0-yd),0.0)*tf.maximum(1.0-factor_s*tf.abs(xd1-xd),0.0)292        ws10 = tf.maximum(1.0-factor_s*tf.abs(yd1-yd),0.0)*tf.maximum(1.0-factor_s*tf.abs(xd0-xd),0.0)293        ws11 = tf.maximum(1.0-factor_s*tf.abs(yd1-yd),0.0)*tf.maximum(1.0-factor_s*tf.abs(xd1-xd),0.0)294    295        ## Guide296        xu = tf.clip_by_value((new_width_s-1.0)*xt, 0.0, new_width_s-1.0)297        yu = tf.clip_by_value((new_height_s-1.0)*yt, 0.0, new_height_s-1.0)298        xu0 = tf.clip_by_value((new_width_s-1.0)*xd0/(width_s-1.0), 0.0, new_width_s-1.0)299        xu1 = tf.clip_by_value((new_width_s-1.0)*xd1/(width_s-1.0), 0.0, new_width_s-1.0)300        yu0 = tf.clip_by_value((new_height_s-1.0)*yd0/(height_s-1.0), 0.0, new_height_s-1.0)301        yu1 = tf.clip_by_value((new_height_s-1.0)*yd1/(height_s-1.0), 0.0, new_height_s-1.0)302        bu = tf.tile(batch_idx, (1, new_height, new_width,1))303    304        indices00 = tf.concat([bu,tf.cast(yu0,tf.int32),tf.cast(xu0,tf.int32)],3)305        indices01 = tf.concat([bu,tf.cast(yu0,tf.int32),tf.cast(xu1,tf.int32)],3)306        indices10 = tf.concat([bu,tf.cast(yu1,tf.int32),tf.cast(xu0,tf.int32)],3)307        indices11 = tf.concat([bu,tf.cast(yu1,tf.int32),tf.cast(xu1,tf.int32)],3)308        indicestt = tf.concat([bu,tf.cast(yu,tf.int32),tf.cast(xu,tf.int32)],3)309    310        guide00 = tf.gather_nd(guide_ftmp, indices00)311        guide01 = tf.gather_nd(guide_ftmp, indices01)312        guide10 = tf.gather_nd(guide_ftmp, indices10)313        guide11 = tf.gather_nd(guide_ftmp, indices11) 314        guidett = tf.gather_nd(guide_ftmp, indicestt) 315    316	#factor_g=tf.get_variable(shape=[],initializer=tf.constant_initializer(factor_g),dtype=tf.float32,name='factor_g')317        wg00 = tf.maximum(1.0-factor_g*tf.abs(guide00-guidett),0.0)318        wg01 = tf.maximum(1.0-factor_g*tf.abs(guide01-guidett),0.0)319        wg10 = tf.maximum(1.0-factor_g*tf.abs(guide10-guidett),0.0)320        wg11 = tf.maximum(1.0-factor_g*tf.abs(guide11-guidett),0.0)321    322        ## Final mearged323        weight00 = ws00*wg00324        weight01 = ws01*wg01325        weight10 = ws10*wg10326        weight11 = ws11*wg11327        I00 = I00*weight00328        I01 = I01*weight01329        I10 = I10*weight10330        I11 = I11*weight11331        weight_sum = weight00 + weight01 + weight10 + weight11 + 0.0001332    return tf.add_n([I00,I01,I10,I11])/weight_sum333def spatial_conv(x,coef):334    x_pad = tf.pad(x,[[0,0],[1,1],[1,1],[0,0]],mode="REFLECT")335    inner0 = x_pad[:,0:-2,1:-1,0:1] * coef[:,:,:,0:1]336    inner1 = x_pad[:,1:-1,0:-2,0:1] * coef[:,:,:,1:2]337    inner2 = x_pad[:,1:-1,1:-1,0:1] * coef[:,:,:,2:3]338    inner3 = x_pad[:,1:-1,2:,0:1] * coef[:,:,:,3:4]339    inner4 = x_pad[:,2:,1:-1,0:1] * coef[:,:,:,4:5]340    return inner0+inner1+inner2+inner3+inner4...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
