Hi, thanks for your code sharing, but because your code is written in theano, and I wa

<div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clip

The output is <div class="snippet-clipboard-content notranslate position-relative

<a class="user-mention notranslate" data-hovercard-type="user" data-hovercard-url="/us

Hi, I have a question when using tensorflow about deep-prior HOT 10 OPEN

WeihongM commented on August 31, 2024

Hi, I have a question when using tensorflow

from deep-prior.

Comments (10)

WeihongM commented on August 31, 2024

import ipdb
import numpy
import matplotlib
matplotlib.use('Agg')  # plot to file
import matplotlib.pyplot as plt
from net.scalenet import ScaleNetParams, ScaleNet
from trainer.scalenettrainer import ScaleNetTrainerParams, ScaleNetTrainer
from util.handdetector import HandDetector

import theano
import os
import cPickle
import sys
from data.importers import ICVLImporter
from data.dataset import ICVLDataset
from util.handpose_evaluation import ICVLHandposeEvaluation
import cv2
import tensorflow as tf

# Ignore all GPUs, tf random forest does not benefit from it.
os.environ["CUDA_VISIBLE_DEVICES"] = ""

def conv2d(x, W, b, stride=1):
    x = tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='VALID')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)		

def maxpool2d(x, k=2):
    return tf.nn.max_pool(
        x,
        ksize = [1, k, k, 1],
        strides = [1, k, k, 1],
        padding='VALID')

def create_model(x0, x1, x2, weights, biases):
    
    x0 = tf.reshape(x0, shape=[-1, 128, 128, 1])
    conv1_1 = conv2d(x0, weights['wc1_1'], biases['bc1_1'])
    conv1_1 = tf.nn.relu(maxpool2d(conv1_1, k=4))
    conv1_2 = conv2d(conv1_1, weights['wc1_2'], biases['bc1_2'])
    conv1_2 = tf.nn.relu(maxpool2d(conv1_2, k=2))
    conv1_3 = conv2d(conv1_2, weights['wc1_3'], biases['bc1_3'])
    conv1_3 = tf.nn.relu(maxpool2d(conv1_3, k=1))
    flat_1_3 = tf.contrib.layers.flatten(conv1_3)

    x1 = tf.reshape(x1, shape=[-1, 64, 64, 1])
    conv2_1 = conv2d(x1, weights['wc2_1'], biases['bc2_1'])
    conv2_1 = tf.nn.relu(maxpool2d(conv2_1, k=2))
    conv2_2 = conv2d(conv2_1, weights['wc2_2'], biases['bc2_2'])
    conv2_2 = tf.nn.relu(maxpool2d(conv2_2, k=2))
    conv2_3 = conv2d(conv2_2, weights['wc2_3'], biases['bc2_3'])
    conv2_3 = tf.nn.relu(maxpool2d(conv2_3, k=1))
    flat_2_3 = tf.contrib.layers.flatten(conv2_3)

    x2 = tf.reshape(x2, shape=[-1, 32, 32, 1])
    conv3_1 = conv2d(x2, weights['wc3_1'], biases['bc3_1'])
    conv3_1 = tf.nn.relu(maxpool2d(conv3_1, k=2))
    conv3_2 = conv2d(conv3_1, weights['wc3_2'], biases['bc3_2'])
    conv3_2 = tf.nn.relu(maxpool2d(conv3_2, k=1))
    conv3_3 = conv2d(conv3_2, weights['wc3_3'], biases['bc3_3'])
    conv3_3 = tf.nn.relu(maxpool2d(conv3_3, k=1))
    flat_3_3 = tf.contrib.layers.flatten(conv3_3)
    

    fc1 = tf.concat([flat_1_3, flat_2_3, flat_3_3], 1)
    fc1 = tf.add(tf.matmul(fc1, weights['fc1']), biases['fc1'])
    fc2 = tf.nn.relu(fc1)

    fc2 = tf.add(tf.matmul(fc2, weights['fc2']), biases['fc2'])
    out = tf.nn.relu(fc2)

    out = tf.add(tf.matmul(out, weights['out']), biases['out'])

    return out

 

eval_prefix = 'ICVL_COM'
if not os.path.exists('./eval/'+eval_prefix+'/'):
    os.makedirs('./eval/'+eval_prefix+'/')

floatX = theano.config.floatX  # @UndefinedVariable

rng = numpy.random.RandomState(23455)
print("create data")

di = ICVLImporter('../data/ICVL/')
Seq1 = di.loadSequence('train', ['0'],shuffle=True,rng=rng,docom=True)
trainSeqs = [Seq1]

Seq2 = di.loadSequence('test_seq_1',docom=True)
testSeqs = [Seq2]

# create training data
trainDataSet = ICVLDataset(trainSeqs)
train_data, train_gt3D = trainDataSet.imgStackDepthOnly('train')

mb = (train_data.nbytes) / (1024 * 1024)
print("data size: {}Mb".format(mb))

testDataSet = ICVLDataset(testSeqs)
test_data, test_gt3D = testDataSet.imgStackDepthOnly('test_seq_1')

val_data = test_data
val_gt3D = test_gt3D

####################################
# resize data
dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2))
xstart = int(train_data.shape[2]/2-dsize[0]/2)
xend = xstart + dsize[0]
ystart = int(train_data.shape[3]/2-dsize[1]/2)
yend = ystart + dsize[1]
train_data2 = train_data[:, :, ystart:yend, xstart:xend]

dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4))
xstart = int(train_data.shape[2]/2-dsize[0]/2)
xend = xstart + dsize[0]
ystart = int(train_data.shape[3]/2-dsize[1]/2)
yend = ystart + dsize[1]
train_data4 = train_data[:, :, ystart:yend, xstart:xend]

dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2))
xstart = int(train_data.shape[2]/2-dsize[0]/2)
xend = xstart + dsize[0]
ystart = int(train_data.shape[3]/2-dsize[1]/2)
yend = ystart + dsize[1]
val_data2 = val_data[:, :, ystart:yend, xstart:xend]

dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4))
xstart = int(train_data.shape[2]/2-dsize[0]/2)
xend = xstart + dsize[0]
ystart = int(train_data.shape[3]/2-dsize[1]/2)
yend = ystart + dsize[1]
val_data4 = val_data[:, :, ystart:yend, xstart:xend]

dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2))
xstart = int(train_data.shape[2]/2-dsize[0]/2)
xend = xstart + dsize[0]
ystart = int(train_data.shape[3]/2-dsize[1]/2)
yend = ystart + dsize[1]
test_data2 = test_data[:, :, ystart:yend, xstart:xend]

dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4))
xstart = int(train_data.shape[2]/2-dsize[0]/2)
xend = xstart + dsize[0]
ystart = int(train_data.shape[3]/2-dsize[1]/2)
yend = ystart + dsize[1]
test_data4 = test_data[:, :, ystart:yend, xstart:xend]

print train_gt3D.max(), test_gt3D.max(), train_gt3D.min(), test_gt3D.min()
print train_data.max(), test_data.max(), train_data.min(), test_data.min()

imgSizeW = train_data.shape[3]
imgSizeH = train_data.shape[2]
nChannels = train_data.shape[1]


X0 = tf.placeholder(tf.float32, shape=(None, 1, 128, 128))
X1 = tf.placeholder(tf.float32, shape=(None, 1, 64, 64))
X2 = tf.placeholder(tf.float32, shape=(None, 1, 32, 32))
Y = tf.placeholder(tf.float32, shape=(None, 3))

weights = {
# 5x5 conv, 1 input, 8 outputs
'wc1_1': tf.Variable(tf.random_normal([5, 5, 1, 8])),
'wc1_2': tf.Variable(tf.random_normal([5, 5, 8, 8])),
'wc1_3': tf.Variable(tf.random_normal([3, 3, 8, 8])),

# 5x5 conv, 32 inputs, 64 outputs
'wc2_1': tf.Variable(tf.random_normal([5, 5, 1, 8])),
'wc2_2': tf.Variable(tf.random_normal([5, 5, 8, 8])),
'wc2_3': tf.Variable(tf.random_normal([3, 3, 8, 8])),

'wc3_1': tf.Variable(tf.random_normal([5, 5, 1, 8])),
'wc3_2': tf.Variable(tf.random_normal([5, 5, 8, 8])),
'wc3_3': tf.Variable(tf.random_normal([3, 3, 8, 8])),

'fc1': tf.Variable(tf.random_normal([2448, 1024])),
'fc2': tf.Variable(tf.random_normal([1024, 1024])),
'out': tf.Variable(tf.random_normal([1024, 3]))
}

biases = {
'bc1_1': tf.Variable(tf.random_normal([8])),
'bc1_2': tf.Variable(tf.random_normal([8])),
'bc1_3': tf.Variable(tf.random_normal([8])),

'bc2_1': tf.Variable(tf.random_normal([8])),
'bc2_2': tf.Variable(tf.random_normal([8])),
'bc2_3': tf.Variable(tf.random_normal([8])),

'bc3_1': tf.Variable(tf.random_normal([8])),
'bc3_2': tf.Variable(tf.random_normal([8])),
'bc3_3': tf.Variable(tf.random_normal([8])),

'fc1': tf.Variable(tf.random_normal([1024])),
'fc2': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([3]))
}   

# the parameter restore
batch_size = 64
learning_rate = 0.0005
weightreg_factor = 0.1  # regularization on the weights

num_epochs = 1
momentum = 0.9

use_early_stopping = True
#lr_of_ep = lambda ep: self.learning_rate/(1+0.2*ep) 
logs_path = './eval/tensorflow_logs/example/'
if not os.path.exists('./eval/'+eval_prefix+'/'):
    os.makedirs(logs_path)


out = create_model(X0, X1, X2, weights, biases)
cost = tf.sqrt(tf.reduce_sum(tf.square(out - Y), 1))  
# define l2 loss   
regularizer = 0
for w in weights:
    regularizer += tf.nn.l2_loss(weights[w])


cost = tf.reduce_mean(cost + weightreg_factor * regularizer)
#optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum = momentum)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(cost)

init = tf.global_variables_initializer()

# Create a summary to monitor cost tensor
tf.summary.scalar("loss", cost)
# Merge all summaries into a single op
merged_summary_op = tf.summary.merge_all()


with tf.Session() as sess:
    #Run the initializer
    sess.run(init)
    data_size = (train_data).shape[0]
    num_batches_per_epoch = int( data_size / batch_size) + 1

    # op to write logs to Tensorboard
    summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())

    for epoch in range(num_epochs):
        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1)* batch_size, data_size)
            data0 = train_data[start_index: end_index]
            data1 = train_data2[start_index: end_index]
            data2 = train_data4[start_index: end_index]
            label = train_gt3D[start_index: end_index, 0, :]
            _, loss, summary, output = sess.run([train_op, cost, merged_summary_op, out], feed_dict={X0: data0, X1: data1, X2: data2, Y: label})

            summary_writer.add_summary(summary, epoch * num_batches_per_epoch + batch_num)
            print("batch_num " + str(batch_num) + ", Minibatch Loss= " + \
              "{:.4f}".format(loss) )
            
            print("the first output is {}".format(output[0,:]) + '/n' + "the true label is {}".format(label[0,:]))

    # Test model
    print("Testing...")
    gt3D = [j.gt3Dorig[0].reshape(1, 3) for j in testSeqs[0].data]
    jts = sess.run(out, feed_dict={X0: test_data, X1: test_data2, X2: test_data4})
    joints = []
    for i in xrange(test_data.shape[0]):
        joints.append(jts[i].reshape(1, 3)*(testSeqs[0].config['cube'][2]/2.) + testSeqs[0].data[i].com)

    hpe = ICVLHandposeEvaluation(gt3D, joints)
    mean_error = hpe.getMeanError()
    max_error = hpe.getMaxError()
    print("Mean error: {}mm, max error: {}mm".format(mean_error, max_error))

from deep-prior.

WeihongM commented on August 31, 2024

The output is

batch_num 0, Minibatch Loss= 2870740.5000
the first output is [ 988596.125    351112.15625 -661840.     ]/nthe true label is [ 0.00355011 -0.06579497 -0.09092724]
batch_num 1, Minibatch Loss= 2343037.0000
the first output is [  996007.5  2312775.   1401645. ]/nthe true label is [ 0.06551268 -0.05326205  0.00721582]
batch_num 2, Minibatch Loss= 1728728.7500
the first output is [-385689.0625  -400038.65625  991194.125  ]/nthe true label is [ 0.02013716 -0.17601474  0.18787402]
batch_num 3, Minibatch Loss= 1772206.0000
the first output is [ 1333414.      -913495.25     457021.1875]/nthe true label is [-0.06101697 -0.01881293 -0.0493584 ]
batch_num 4, Minibatch Loss= 1891400.3750
the first output is [-1273810.5        -901493.8125      -41111.3671875]/nthe true label is [ 0.15115368  0.04643478  0.21591993]
batch_num 5, Minibatch Loss= 1808405.3750
the first output is [   83370.6796875 -2205300.          979629.6875   ]/nthe true label is [ 0.1319444   0.10498183  0.14687036]
batch_num 6, Minibatch Loss= 1979900.7500

from deep-prior.

WeihongM commented on August 31, 2024

@moberweger

from deep-prior.

moberweger commented on August 31, 2024

@WeihongM
since the errors start that high, did you try using a different initialization for the network weights?

from deep-prior.

WeihongM commented on August 31, 2024

@moberweger The code I made is all above, can you give me some advice?

from deep-prior.

WeihongM commented on August 31, 2024

@moberweger Hello,
The problem I mentioned above I have solved, and now I want to implement the second stage part.
However, now I am confused whether the second stage network is the same as the first stage? It is said
the second stage trained on the new network ? or whether to use the first stage network to fine tuning?

from deep-prior.

moberweger commented on August 31, 2024

@WeihongM
I am not sure what you mean by second stage, I guess the refinement network? They are trained independently, one network for each joint. The refinement network takes a crop around the initial location (predicted from the first network) and predicts an offset for the joint location.

from deep-prior.

WeihongM commented on August 31, 2024

@moberweger Hi,
yeap, I mean the refinement stage.
In your words, "predicts an offset for the joint location", Can I directly predict the joint location (using the ground truth label)
Last question, the refinement network which refine the all joints on the hand using the same network? Or different joints using different refinement network? And is the network architecture same as the first stage?
Thanks !

from deep-prior.

moberweger commented on August 31, 2024

@WeihongM
It will not work when predicting the joint location directly, because the network input lacks the context, so you can only predict the difference: groundtruth-predicted
It uses a different network for each joint, the architecture is the same, though.

from deep-prior.

WeihongM commented on August 31, 2024

@moberweger
Sorry, I do not quite catch you about your explanation. You said the network input lacks the context, I think this is because our input is a overlap input (different scales). Comparing with this, our first stage is also a multi-scale input ( sometimes other joints are not included for scaling), but in this stage, we ditectly predict the joint location.(seeing from the main_icvl_com_refine.py)

Thanks, hope for your explanation.

from deep-prior.

Hi, I have a question when using tensorflow about deep-prior HOT 10 OPEN

Comments (10)

Related Issues (20)

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent