Giter Club home page Giter Club logo

Comments (10)

WeihongM avatar WeihongM commented on August 31, 2024
import ipdb
import numpy
import matplotlib
matplotlib.use('Agg')  # plot to file
import matplotlib.pyplot as plt
from net.scalenet import ScaleNetParams, ScaleNet
from trainer.scalenettrainer import ScaleNetTrainerParams, ScaleNetTrainer
from util.handdetector import HandDetector

import theano
import os
import cPickle
import sys
from data.importers import ICVLImporter
from data.dataset import ICVLDataset
from util.handpose_evaluation import ICVLHandposeEvaluation
import cv2
import tensorflow as tf

# Ignore all GPUs, tf random forest does not benefit from it.
os.environ["CUDA_VISIBLE_DEVICES"] = ""

def conv2d(x, W, b, stride=1):
    x = tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='VALID')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)		

def maxpool2d(x, k=2):
    return tf.nn.max_pool(
        x,
        ksize = [1, k, k, 1],
        strides = [1, k, k, 1],
        padding='VALID')

def create_model(x0, x1, x2, weights, biases):
    
    x0 = tf.reshape(x0, shape=[-1, 128, 128, 1])
    conv1_1 = conv2d(x0, weights['wc1_1'], biases['bc1_1'])
    conv1_1 = tf.nn.relu(maxpool2d(conv1_1, k=4))
    conv1_2 = conv2d(conv1_1, weights['wc1_2'], biases['bc1_2'])
    conv1_2 = tf.nn.relu(maxpool2d(conv1_2, k=2))
    conv1_3 = conv2d(conv1_2, weights['wc1_3'], biases['bc1_3'])
    conv1_3 = tf.nn.relu(maxpool2d(conv1_3, k=1))
    flat_1_3 = tf.contrib.layers.flatten(conv1_3)

    x1 = tf.reshape(x1, shape=[-1, 64, 64, 1])
    conv2_1 = conv2d(x1, weights['wc2_1'], biases['bc2_1'])
    conv2_1 = tf.nn.relu(maxpool2d(conv2_1, k=2))
    conv2_2 = conv2d(conv2_1, weights['wc2_2'], biases['bc2_2'])
    conv2_2 = tf.nn.relu(maxpool2d(conv2_2, k=2))
    conv2_3 = conv2d(conv2_2, weights['wc2_3'], biases['bc2_3'])
    conv2_3 = tf.nn.relu(maxpool2d(conv2_3, k=1))
    flat_2_3 = tf.contrib.layers.flatten(conv2_3)

    x2 = tf.reshape(x2, shape=[-1, 32, 32, 1])
    conv3_1 = conv2d(x2, weights['wc3_1'], biases['bc3_1'])
    conv3_1 = tf.nn.relu(maxpool2d(conv3_1, k=2))
    conv3_2 = conv2d(conv3_1, weights['wc3_2'], biases['bc3_2'])
    conv3_2 = tf.nn.relu(maxpool2d(conv3_2, k=1))
    conv3_3 = conv2d(conv3_2, weights['wc3_3'], biases['bc3_3'])
    conv3_3 = tf.nn.relu(maxpool2d(conv3_3, k=1))
    flat_3_3 = tf.contrib.layers.flatten(conv3_3)
    

    fc1 = tf.concat([flat_1_3, flat_2_3, flat_3_3], 1)
    fc1 = tf.add(tf.matmul(fc1, weights['fc1']), biases['fc1'])
    fc2 = tf.nn.relu(fc1)

    fc2 = tf.add(tf.matmul(fc2, weights['fc2']), biases['fc2'])
    out = tf.nn.relu(fc2)

    out = tf.add(tf.matmul(out, weights['out']), biases['out'])

    return out

 

eval_prefix = 'ICVL_COM'
if not os.path.exists('./eval/'+eval_prefix+'/'):
    os.makedirs('./eval/'+eval_prefix+'/')

floatX = theano.config.floatX  # @UndefinedVariable

rng = numpy.random.RandomState(23455)
print("create data")

di = ICVLImporter('../data/ICVL/')
Seq1 = di.loadSequence('train', ['0'],shuffle=True,rng=rng,docom=True)
trainSeqs = [Seq1]

Seq2 = di.loadSequence('test_seq_1',docom=True)
testSeqs = [Seq2]

# create training data
trainDataSet = ICVLDataset(trainSeqs)
train_data, train_gt3D = trainDataSet.imgStackDepthOnly('train')

mb = (train_data.nbytes) / (1024 * 1024)
print("data size: {}Mb".format(mb))

testDataSet = ICVLDataset(testSeqs)
test_data, test_gt3D = testDataSet.imgStackDepthOnly('test_seq_1')

val_data = test_data
val_gt3D = test_gt3D

####################################
# resize data
dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2))
xstart = int(train_data.shape[2]/2-dsize[0]/2)
xend = xstart + dsize[0]
ystart = int(train_data.shape[3]/2-dsize[1]/2)
yend = ystart + dsize[1]
train_data2 = train_data[:, :, ystart:yend, xstart:xend]

dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4))
xstart = int(train_data.shape[2]/2-dsize[0]/2)
xend = xstart + dsize[0]
ystart = int(train_data.shape[3]/2-dsize[1]/2)
yend = ystart + dsize[1]
train_data4 = train_data[:, :, ystart:yend, xstart:xend]

dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2))
xstart = int(train_data.shape[2]/2-dsize[0]/2)
xend = xstart + dsize[0]
ystart = int(train_data.shape[3]/2-dsize[1]/2)
yend = ystart + dsize[1]
val_data2 = val_data[:, :, ystart:yend, xstart:xend]

dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4))
xstart = int(train_data.shape[2]/2-dsize[0]/2)
xend = xstart + dsize[0]
ystart = int(train_data.shape[3]/2-dsize[1]/2)
yend = ystart + dsize[1]
val_data4 = val_data[:, :, ystart:yend, xstart:xend]

dsize = (int(train_data.shape[2]//2), int(train_data.shape[3]//2))
xstart = int(train_data.shape[2]/2-dsize[0]/2)
xend = xstart + dsize[0]
ystart = int(train_data.shape[3]/2-dsize[1]/2)
yend = ystart + dsize[1]
test_data2 = test_data[:, :, ystart:yend, xstart:xend]

dsize = (int(train_data.shape[2]//4), int(train_data.shape[3]//4))
xstart = int(train_data.shape[2]/2-dsize[0]/2)
xend = xstart + dsize[0]
ystart = int(train_data.shape[3]/2-dsize[1]/2)
yend = ystart + dsize[1]
test_data4 = test_data[:, :, ystart:yend, xstart:xend]

print train_gt3D.max(), test_gt3D.max(), train_gt3D.min(), test_gt3D.min()
print train_data.max(), test_data.max(), train_data.min(), test_data.min()

imgSizeW = train_data.shape[3]
imgSizeH = train_data.shape[2]
nChannels = train_data.shape[1]


X0 = tf.placeholder(tf.float32, shape=(None, 1, 128, 128))
X1 = tf.placeholder(tf.float32, shape=(None, 1, 64, 64))
X2 = tf.placeholder(tf.float32, shape=(None, 1, 32, 32))
Y = tf.placeholder(tf.float32, shape=(None, 3))

weights = {
# 5x5 conv, 1 input, 8 outputs
'wc1_1': tf.Variable(tf.random_normal([5, 5, 1, 8])),
'wc1_2': tf.Variable(tf.random_normal([5, 5, 8, 8])),
'wc1_3': tf.Variable(tf.random_normal([3, 3, 8, 8])),

# 5x5 conv, 32 inputs, 64 outputs
'wc2_1': tf.Variable(tf.random_normal([5, 5, 1, 8])),
'wc2_2': tf.Variable(tf.random_normal([5, 5, 8, 8])),
'wc2_3': tf.Variable(tf.random_normal([3, 3, 8, 8])),

'wc3_1': tf.Variable(tf.random_normal([5, 5, 1, 8])),
'wc3_2': tf.Variable(tf.random_normal([5, 5, 8, 8])),
'wc3_3': tf.Variable(tf.random_normal([3, 3, 8, 8])),

'fc1': tf.Variable(tf.random_normal([2448, 1024])),
'fc2': tf.Variable(tf.random_normal([1024, 1024])),
'out': tf.Variable(tf.random_normal([1024, 3]))
}

biases = {
'bc1_1': tf.Variable(tf.random_normal([8])),
'bc1_2': tf.Variable(tf.random_normal([8])),
'bc1_3': tf.Variable(tf.random_normal([8])),

'bc2_1': tf.Variable(tf.random_normal([8])),
'bc2_2': tf.Variable(tf.random_normal([8])),
'bc2_3': tf.Variable(tf.random_normal([8])),

'bc3_1': tf.Variable(tf.random_normal([8])),
'bc3_2': tf.Variable(tf.random_normal([8])),
'bc3_3': tf.Variable(tf.random_normal([8])),

'fc1': tf.Variable(tf.random_normal([1024])),
'fc2': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([3]))
}   

# the parameter restore
batch_size = 64
learning_rate = 0.0005
weightreg_factor = 0.1  # regularization on the weights

num_epochs = 1
momentum = 0.9

use_early_stopping = True
#lr_of_ep = lambda ep: self.learning_rate/(1+0.2*ep) 
logs_path = './eval/tensorflow_logs/example/'
if not os.path.exists('./eval/'+eval_prefix+'/'):
    os.makedirs(logs_path)


out = create_model(X0, X1, X2, weights, biases)
cost = tf.sqrt(tf.reduce_sum(tf.square(out - Y), 1))  
# define l2 loss   
regularizer = 0
for w in weights:
    regularizer += tf.nn.l2_loss(weights[w])


cost = tf.reduce_mean(cost + weightreg_factor * regularizer)
#optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum = momentum)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(cost)

init = tf.global_variables_initializer()

# Create a summary to monitor cost tensor
tf.summary.scalar("loss", cost)
# Merge all summaries into a single op
merged_summary_op = tf.summary.merge_all()


with tf.Session() as sess:
    #Run the initializer
    sess.run(init)
    data_size = (train_data).shape[0]
    num_batches_per_epoch = int( data_size / batch_size) + 1

    # op to write logs to Tensorboard
    summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())

    for epoch in range(num_epochs):
        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1)* batch_size, data_size)
            data0 = train_data[start_index: end_index]
            data1 = train_data2[start_index: end_index]
            data2 = train_data4[start_index: end_index]
            label = train_gt3D[start_index: end_index, 0, :]
            _, loss, summary, output = sess.run([train_op, cost, merged_summary_op, out], feed_dict={X0: data0, X1: data1, X2: data2, Y: label})

            summary_writer.add_summary(summary, epoch * num_batches_per_epoch + batch_num)
            print("batch_num " + str(batch_num) + ", Minibatch Loss= " + \
              "{:.4f}".format(loss) )
            
            print("the first output is {}".format(output[0,:]) + '/n' + "the true label is {}".format(label[0,:]))

    # Test model
    print("Testing...")
    gt3D = [j.gt3Dorig[0].reshape(1, 3) for j in testSeqs[0].data]
    jts = sess.run(out, feed_dict={X0: test_data, X1: test_data2, X2: test_data4})
    joints = []
    for i in xrange(test_data.shape[0]):
        joints.append(jts[i].reshape(1, 3)*(testSeqs[0].config['cube'][2]/2.) + testSeqs[0].data[i].com)

    hpe = ICVLHandposeEvaluation(gt3D, joints)
    mean_error = hpe.getMeanError()
    max_error = hpe.getMaxError()
    print("Mean error: {}mm, max error: {}mm".format(mean_error, max_error))
    

from deep-prior.

WeihongM avatar WeihongM commented on August 31, 2024

The output is

batch_num 0, Minibatch Loss= 2870740.5000
the first output is [ 988596.125    351112.15625 -661840.     ]/nthe true label is [ 0.00355011 -0.06579497 -0.09092724]
batch_num 1, Minibatch Loss= 2343037.0000
the first output is [  996007.5  2312775.   1401645. ]/nthe true label is [ 0.06551268 -0.05326205  0.00721582]
batch_num 2, Minibatch Loss= 1728728.7500
the first output is [-385689.0625  -400038.65625  991194.125  ]/nthe true label is [ 0.02013716 -0.17601474  0.18787402]
batch_num 3, Minibatch Loss= 1772206.0000
the first output is [ 1333414.      -913495.25     457021.1875]/nthe true label is [-0.06101697 -0.01881293 -0.0493584 ]
batch_num 4, Minibatch Loss= 1891400.3750
the first output is [-1273810.5        -901493.8125      -41111.3671875]/nthe true label is [ 0.15115368  0.04643478  0.21591993]
batch_num 5, Minibatch Loss= 1808405.3750
the first output is [   83370.6796875 -2205300.          979629.6875   ]/nthe true label is [ 0.1319444   0.10498183  0.14687036]
batch_num 6, Minibatch Loss= 1979900.7500

from deep-prior.

WeihongM avatar WeihongM commented on August 31, 2024

@moberweger

from deep-prior.

moberweger avatar moberweger commented on August 31, 2024

@WeihongM
since the errors start that high, did you try using a different initialization for the network weights?

from deep-prior.

WeihongM avatar WeihongM commented on August 31, 2024

@moberweger The code I made is all above, can you give me some advice?

from deep-prior.

WeihongM avatar WeihongM commented on August 31, 2024

@moberweger Hello,
The problem I mentioned above I have solved, and now I want to implement the second stage part.
However, now I am confused whether the second stage network is the same as the first stage? It is said
the second stage trained on the new network ? or whether to use the first stage network to fine tuning?

from deep-prior.

moberweger avatar moberweger commented on August 31, 2024

@WeihongM
I am not sure what you mean by second stage, I guess the refinement network? They are trained independently, one network for each joint. The refinement network takes a crop around the initial location (predicted from the first network) and predicts an offset for the joint location.

from deep-prior.

WeihongM avatar WeihongM commented on August 31, 2024

@moberweger Hi,
yeap, I mean the refinement stage.
In your words, "predicts an offset for the joint location", Can I directly predict the joint location (using the ground truth label)
Last question, the refinement network which refine the all joints on the hand using the same network? Or different joints using different refinement network? And is the network architecture same as the first stage?
Thanks !

from deep-prior.

moberweger avatar moberweger commented on August 31, 2024

@WeihongM
It will not work when predicting the joint location directly, because the network input lacks the context, so you can only predict the difference: groundtruth-predicted
It uses a different network for each joint, the architecture is the same, though.

from deep-prior.

WeihongM avatar WeihongM commented on August 31, 2024

@moberweger
Sorry, I do not quite catch you about your explanation. You said the network input lacks the context, I think this is because our input is a overlap input (different scales). Comparing with this, our first stage is also a multi-scale input ( sometimes other joints are not included for scaling), but in this stage, we ditectly predict the joint location.(seeing from the main_icvl_com_refine.py)

Thanks, hope for your explanation.

from deep-prior.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.