Comments (12)
Hi, @zeroXscorpion7 . You can try inference without bndbox, but, the performance can not be guaranteed, some skeleton flex is very likely to happen because of the mean/std shift.
from a2j.
I want to test the efficacy in real-time, how do I identify one picture at a time?
from a2j.
def main():
net = model.A2J_model(num_classes = keypointsNumber)
net.load_state_dict(torch.load(model_dir))
net = net.cuda()
net.eval()
post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None)
output = torch.FloatTensor()
data4DTemp = scio.loadmat(testingImageDir + str(1) + '.mat')['DepthNormal']
depthTemp = data4DTemp[:,:,3]
img=depthTemp
heads = net(img)
pred_keypoints = post_precess(heads,voting=False)
output = torch.cat([output,pred_keypoints.data.cpu()], 0)
result = output.cpu().data.numpy()
assert np.shape(result)==np.shape(result), "source has different shape with target"
Test1_ = np.zeros(result.shape)
Test1_[:, 0] = result[:,1]
Test1_[:, 1] = result[:,0]
Test1_[:, 2] = result[:,2]
Test1 = Test1_
Test1[:,0] = Test1_[:,0]*(Bndbox[i,2]-Bndbox[i,0])/cropWidth + Bndbox[i,0] # x
Test1[:,1] = Test1_[:,1]*(Bndbox[i,3]-Bndbox[i,1])/cropHeight + Bndbox[i,1] # y
Test1[:,2] = Test1_[:,2]/depthFactor
TestWorld = np.ones((len(Test1),keypointsNumber,3))
TestWorld_tuple = pixel2world(Test1[:,0],Test1[:,1],Test1[:,2])
X=np.zeros((15),np.uint8)
Y=np.zeros((15),np.uint8)
for j in range(keypointsNumber):
X[j],Y[j]=world2pixel(TestWorld[0,j,0],TestWorld[0,j,1],TestWorld[0,j,2])
IMGX=np.zeros((240,320,3),np.uint8)
cv2.line(IMGX,(X[0],Y[0]),(X[1],Y[1]),(0,0,255),2)
cv2.line(IMGX,(X[1],Y[1]),(X[2],Y[2]),(0,0,255),2)
cv2.line(IMGX,(X[1],Y[1]),(X[3],Y[3]),(0,0,255),2)
cv2.line(IMGX,(X[1],Y[1]),(X[8],Y[8]),(0,0,255),2)
cv2.line(IMGX,(X[2],Y[2]),(X[4],Y[4]),(0,0,255),2)
cv2.line(IMGX,(X[4],Y[4]),(X[6],Y[6]),(0,0,255),2)
cv2.line(IMGX,(X[3],Y[3]),(X[5],Y[5]),(0,0,255),2)
cv2.line(IMGX,(X[5],Y[5]),(X[7],Y[7]),(0,0,255),2)
cv2.line(IMGX,(X[8],Y[8]),(X[9],Y[9]),(0,0,255),2)
cv2.line(IMGX,(X[8],Y[8]),(X[10],Y[10]),(0,0,255),2)
cv2.line(IMGX,(X[9],Y[9]),(X[11],Y[11]),(0,0,255),2)
cv2.line(IMGX,(X[11],Y[11]),(X[13],Y[13]),(0,0,255),2)
cv2.line(IMGX,(X[10],Y[10]),(X[12],Y[12]),(0,0,255),2)
cv2.line(IMGX,(X[12],Y[12]),(X[14],Y[14]),(0,0,255),2)
for i in range(keypointsNumber):
cv2.circle(IMGX,(X[i],Y[i]),4,(255,255,255),-1)
cv2.imshow('img',IMGX)
cv2.waitKey(0)
cv2.destroyAllWindows()
This is the code I edited, but it has some promble.
How do I edit it?
from a2j.
def main():
net = model.A2J_model(num_classes = keypointsNumber) net.load_state_dict(torch.load(model_dir)) net = net.cuda() net.eval() post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None) output = torch.FloatTensor() data4DTemp = scio.loadmat(testingImageDir + str(1) + '.mat')['DepthNormal'] depthTemp = data4DTemp[:,:,3] img=depthTemp heads = net(img) pred_keypoints = post_precess(heads,voting=False) output = torch.cat([output,pred_keypoints.data.cpu()], 0) result = output.cpu().data.numpy() assert np.shape(result)==np.shape(result), "source has different shape with target" Test1_ = np.zeros(result.shape) Test1_[:, 0] = result[:,1] Test1_[:, 1] = result[:,0] Test1_[:, 2] = result[:,2] Test1 = Test1_ Test1[:,0] = Test1_[:,0]*(Bndbox[i,2]-Bndbox[i,0])/cropWidth + Bndbox[i,0] # x Test1[:,1] = Test1_[:,1]*(Bndbox[i,3]-Bndbox[i,1])/cropHeight + Bndbox[i,1] # y Test1[:,2] = Test1_[:,2]/depthFactor TestWorld = np.ones((len(Test1),keypointsNumber,3)) TestWorld_tuple = pixel2world(Test1[:,0],Test1[:,1],Test1[:,2]) X=np.zeros((15),np.uint8) Y=np.zeros((15),np.uint8) for j in range(keypointsNumber): X[j],Y[j]=world2pixel(TestWorld[0,j,0],TestWorld[0,j,1],TestWorld[0,j,2]) IMGX=np.zeros((240,320,3),np.uint8) cv2.line(IMGX,(X[0],Y[0]),(X[1],Y[1]),(0,0,255),2) cv2.line(IMGX,(X[1],Y[1]),(X[2],Y[2]),(0,0,255),2) cv2.line(IMGX,(X[1],Y[1]),(X[3],Y[3]),(0,0,255),2) cv2.line(IMGX,(X[1],Y[1]),(X[8],Y[8]),(0,0,255),2) cv2.line(IMGX,(X[2],Y[2]),(X[4],Y[4]),(0,0,255),2) cv2.line(IMGX,(X[4],Y[4]),(X[6],Y[6]),(0,0,255),2) cv2.line(IMGX,(X[3],Y[3]),(X[5],Y[5]),(0,0,255),2) cv2.line(IMGX,(X[5],Y[5]),(X[7],Y[7]),(0,0,255),2) cv2.line(IMGX,(X[8],Y[8]),(X[9],Y[9]),(0,0,255),2) cv2.line(IMGX,(X[8],Y[8]),(X[10],Y[10]),(0,0,255),2) cv2.line(IMGX,(X[9],Y[9]),(X[11],Y[11]),(0,0,255),2) cv2.line(IMGX,(X[11],Y[11]),(X[13],Y[13]),(0,0,255),2) cv2.line(IMGX,(X[10],Y[10]),(X[12],Y[12]),(0,0,255),2) cv2.line(IMGX,(X[12],Y[12]),(X[14],Y[14]),(0,0,255),2) for i in range(keypointsNumber): cv2.circle(IMGX,(X[i],Y[i]),4,(255,255,255),-1) cv2.imshow('img',IMGX) cv2.waitKey(0) cv2.destroyAllWindows()
This is the code I edited, but it has some promble.
How do I edit it?
do u solove it ?
from a2j.
I remove my_dataloader and enter depthTemp into dataPreprocess, then I use torch.from_numpy to make it to tensor
def dataPreprocess(img, depth_thres=0.4):
imageOutputs = np.ones((cropHeight, cropWidth, 1), dtype='float32')
imCrop = img.copy()[:, :]
imgResize = cv2.resize(imCrop, (cropWidth, cropHeight), interpolation=cv2.INTER_NEAREST)
imgResize = np.asarray(imgResize,dtype = 'float32') # H*W*C
imgResize = imgResize /5
imageOutputs[:,:,0] = imgResize
imageOutputs = np.asarray(imageOutputs)
imageNCHWOut = imageOutputs.transpose(2, 0, 1) # [H, W, C] --->>> [C, H, W]
imageNCHWOut = np.asarray(imageNCHWOut)
data = torch.from_numpy(imageNCHWOut)
return data
img=np.zeros((1,1,288,288),np.float32)
img[0,:,:,:]= dataPreprocess(depth_map, 0.4)
img=torch.from_numpy(img)
post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None)
output = torch.FloatTensor()
Like these.
from a2j.
I remove my_dataloader and enter depthTemp into dataPreprocess, then I use torch.from_numpy to make it to tensor
def dataPreprocess(img, depth_thres=0.4):
imageOutputs = np.ones((cropHeight, cropWidth, 1), dtype='float32') imCrop = img.copy()[:, :] imgResize = cv2.resize(imCrop, (cropWidth, cropHeight), interpolation=cv2.INTER_NEAREST) imgResize = np.asarray(imgResize,dtype = 'float32') # H*W*C imgResize = imgResize /5 imageOutputs[:,:,0] = imgResize imageOutputs = np.asarray(imageOutputs) imageNCHWOut = imageOutputs.transpose(2, 0, 1) # [H, W, C] --->>> [C, H, W] imageNCHWOut = np.asarray(imageNCHWOut) data = torch.from_numpy(imageNCHWOut) return data
img=np.zeros((1,1,288,288),np.float32)
img[0,:,:,:]= dataPreprocess(depth_map, 0.4)
img=torch.from_numpy(img)
post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None)
output = torch.FloatTensor()Like these.
thank you !!
I am trying to use this model to Identify my pictures, if the bndbox is need?
i see u ask that .Did u try that? what is the performace
from a2j.
If you don't to use bndbox, you may train the data by yourself, or modify the train code.
from a2j.
If you don't to use bndbox, you may train the data by yourself, or modify the train code.
I see. Can u share the train code? you train on ITOP or K2PHD?
I have seen some people use just use depth map to train a alphapose or openpose model .
from a2j.
import cv2
import torch
import torch.utils.data
import torch.optim.lr_scheduler as lr_scheduler
import numpy as np
import scipy.io as scio
import os
from PIL import Image
from torch.autograd import Variable
import model as model
import anchor as anchor
from tqdm import tqdm
import random_erasing
import logging
import time
import datetime
import random
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
DataHyperParms
TrainImgFrames = 1000
TestImgFrames = 100
keypointsNumber = 15
cropWidth = 288
cropHeight = 288
batch_size = 64
learning_rate = 0.00035
Weight_Decay = 1e-4
nepoch = 35
RegLossFactor = 3
spatialFactor = 0.5
RandCropShift = 5
RandshiftDepth = 1
RandRotate = 180
RandScale = (1.0, 0.5)
randomseed = 12345
random.seed(randomseed)
np.random.seed(randomseed)
torch.manual_seed(randomseed)
save_dir = ''
try:
os.makedirs(save_dir)
except OSError:
pass
trainingImageDir = ''
testingImageDir = '' # mat images
keypointsfileTest = ''
keypointsfileTrain = ''
model_dir = ''
result_file = 'result_test.txt'
def pixel2world(x):
x[:, :, 0] = (x[:, :, 0] - 160.0) * x[:, :, 2] * 0.0035
x[:, :, 1] = (120.0 - x[:, :, 1]) * x[:, :, 2] * 0.0035
return x
def world2pixel(x):
x[:, :, 0] = 160.0 + x[:, :, 0] / (x[:, :, 2] * 0.0035)
x[:, :, 1] = 120.0 - x[:, :, 1] / (x[:, :, 2] * 0.0035)
return x
joint_id_to_name = {
0: 'Head',
1: 'Neck',
2: 'RShoulder',
3: 'LShoulder',
4: 'RElbow',
5: 'LElbow',
6: 'RHand',
7: 'LHand',
8: 'Torso',
9: 'RHip',
10: 'LHip',
11: 'RKnee',
12: 'LKnee',
13: 'RFoot',
14: 'LFoot',
}
loading GT keypoints and center points
keypointsWorldtest = scio.loadmat(keypointsfileTest)['keypoints3D'].astype(np.float32)
keypointsPixeltest = np.ones((len(keypointsWorldtest),15,2),dtype='float32')
keypointsPixeltest = world2pixel(keypointsWorldtest)
keypointsWorldtrain = scio.loadmat(keypointsfileTrain)['keypoints3D'].astype(np.float32)
keypointsPixeltrain = np.ones((len(keypointsWorldtrain),15,2),dtype='float32')
keypointsPixeltrain = world2pixel(keypointsWorldtrain)
def transform(img, label, matrix):
'''
img: [H, W] label, [N,2]
'''
img_out = cv2.warpAffine(img,matrix,(cropWidth,cropHeight))
label_out = np.ones((keypointsNumber, 3))
label_out[:,:2] = label[:,:2].copy()
label_out = np.matmul(matrix, label_out.transpose())
label_out = label_out.transpose()
return img_out, label_out
def dataPreprocess(index, img, keypointsUVD, depth_thres=0.4, augment=True):
imageOutputs = np.ones((cropHeight, cropWidth, 1), dtype='float32')
labelOutputs = np.ones((keypointsNumber, 3), dtype = 'float32')
if augment:
RandomOffset_1 = np.random.randint(-1*RandCropShift,RandCropShift)
RandomOffset_2 = np.random.randint(-1*RandCropShift,RandCropShift)
RandomOffset_3 = np.random.randint(-1*RandCropShift,RandCropShift)
RandomOffset_4 = np.random.randint(-1*RandCropShift,RandCropShift)
RandomOffsetDepth = np.random.normal(0, RandshiftDepth, cropHeight*cropWidth).reshape(cropHeight,cropWidth)
RandomOffsetDepth[np.where(RandomOffsetDepth < RandshiftDepth)] = 0
RandomRotate = np.random.randint(-1*RandRotate,RandRotate)
RandomScale = np.random.rand()*RandScale[0]+RandScale[1]
matrix = cv2.getRotationMatrix2D((cropWidth/2,cropHeight/2),RandomRotate,RandomScale)
else:
RandomOffset_1, RandomOffset_2, RandomOffset_3, RandomOffset_4 = 0, 0, 0, 0
RandomRotate = 0
RandomScale = 1
RandomOffsetDepth = 0
matrix = cv2.getRotationMatrix2D((cropWidth/2,cropHeight/2),RandomRotate,RandomScale)
imCrop = img[:, :].copy()
imgResize = cv2.resize(imCrop, (cropWidth, cropHeight), interpolation=cv2.INTER_NEAREST)
imgResize = np.asarray(imgResize,dtype = 'float32') # H*W*C
imgResize = imgResize / 5
## label
label_xy = np.ones((keypointsNumber, 2), dtype = 'float32')
label_xy[:,0] = keypointsUVD[index,:,0].copy()*cropWidth/320 # x
label_xy[:,1] = keypointsUVD[index,:,1].copy()*cropHeight/240 # y
if augment:
imgResize, label_xy = transform(imgResize, label_xy, matrix) ## rotation, scale
imageOutputs[:,:,0] = imgResize
labelOutputs[:,1] = label_xy[:,0]
labelOutputs[:,0] = label_xy[:,1]
labelOutputs[:,2] = (keypointsUVD[index,:,2])*RandomScale # Z
imageOutputs = np.asarray(imageOutputs)
imageNCHWOut = imageOutputs.transpose(2, 0, 1) # [H, W, C] --->>> [C, H, W]
imageNCHWOut = np.asarray(imageNCHWOut)
labelOutputs = np.asarray(labelOutputs)
data, label = torch.from_numpy(imageNCHWOut), torch.from_numpy(labelOutputs)
return data, label
###################### Pytorch dataloader #################
class my_dataloader(torch.utils.data.Dataset):
def __init__(self, ImgDir, keypointsUVD, num, augment=True):
self.ImgDir = ImgDir
self.keypointsUVD = keypointsUVD
self.num = num
self.augment = augment
self.randomErase = random_erasing.RandomErasing(probability = 0.5, sl = 0.02, sh = 0.4, r1 = 0.3, mean=[0])
def __getitem__(self, index):
data4D = scio.loadmat(self.ImgDir + str(index+1) + '.mat')['DepthNormal']
depth = data4D[:,:]
data, label = dataPreprocess(index, depth, self.keypointsUVD, self.augment)
if self.augment:
data = self.randomErase(data)
return data, label
def __len__(self):
return self.num
train_image_datasets = my_dataloader(trainingImageDir, keypointsWorldtrain, TrainImgFrames, augment=True)
train_dataloaders = torch.utils.data.DataLoader(train_image_datasets, batch_size = batch_size,
shuffle = True, num_workers = 8)
test_image_datasets = my_dataloader(testingImageDir, keypointsWorldtest, TestImgFrames, augment=False)
test_dataloaders = torch.utils.data.DataLoader(test_image_datasets, batch_size = batch_size,
shuffle = False, num_workers = 8)
def train():
net = model.A2J_model(num_classes = keypointsNumber)
net = net.cuda()
post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None)
criterion = anchor.A2J_loss(shape=[cropHeight//16,cropWidth//16],thres = [16.0,32.0],stride=16,\
spatialFactor=spatialFactor,img_shape=[cropHeight, cropWidth],P_h=None, P_w=None)
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=Weight_Decay)
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.2)
logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%Y/%m/%d %H:%M:%S', \
filename=os.path.join(save_dir, 'train.log'), level=logging.INFO)
logging.info('======================================================')
for epoch in range(nepoch):
net = net.train()
train_loss_add = 0.0
Cls_loss_add = 0.0
Reg_loss_add = 0.0
timer = time.time()
# Training loop
for i, (img, label) in enumerate(train_dataloaders):
torch.cuda.synchronize()
img, label = img.cuda(), label.cuda()
heads = net(img)
#print(regression)
optimizer.zero_grad()
Cls_loss, Reg_loss = criterion(heads, label)
loss = 1*Cls_loss + Reg_loss*RegLossFactor
loss.backward()
optimizer.step()
torch.cuda.synchronize()
train_loss_add = train_loss_add + (loss.item())*len(img)
Cls_loss_add = Cls_loss_add + (Cls_loss.item())*len(img)
Reg_loss_add = Reg_loss_add + (Reg_loss.item())*len(img)
# printing loss info
if i%10 == 0:
print('epoch: ',epoch, ' step: ', i, 'Cls_loss ',Cls_loss.item(), 'Reg_loss ',Reg_loss.item(), ' total loss ',loss.item())
scheduler.step(epoch)
# time taken
torch.cuda.synchronize()
timer = time.time() - timer
timer = timer / TrainImgFrames
print('==> time to learn 1 sample = %f (ms)' %(timer*1000))
train_loss_add = train_loss_add / TrainImgFrames
Cls_loss_add = Cls_loss_add / TrainImgFrames
Reg_loss_add = Reg_loss_add / TrainImgFrames
print('mean train_loss_add of 1 sample: %f, #train_indexes = %d' %(train_loss_add, TrainImgFrames))
print('mean Cls_loss_add of 1 sample: %f, #train_indexes = %d' %(Cls_loss_add, TrainImgFrames))
print('mean Reg_loss_add of 1 sample: %f, #train_indexes = %d' %(Reg_loss_add, TrainImgFrames))
Error_test = 0
Error_train = 0
Error_test_wrist = 0
if (epoch % 1 == 0):
net = net.eval()
output = torch.FloatTensor()
outputTrain = torch.FloatTensor()
for i, (img, label) in tqdm(enumerate(test_dataloaders)):
with torch.no_grad():
img, label = img.cuda(), label.cuda()
heads = net(img)
pred_keypoints = post_precess(heads, voting=False)
output = torch.cat([output,pred_keypoints.data.cpu()], 0)
result = output.cpu().data.numpy()
Error_test = errorCompute(result,keypointsWorldtest,)
print('epoch: ', epoch, 'Test error:', Error_test)
saveNamePrefix = '%s/net_%d_wetD_' % (save_dir, epoch) + str(Weight_Decay) + '_depFact_' + str(spatialFactor) + '_RegFact_' + str(RegLossFactor) + '_rndShft_' + str(RandCropShift)
torch.save(net.state_dict(), saveNamePrefix + '.pth')
# log
logging.info('Epoch#%d: total loss=%.4f, Cls_loss=%.4f, Reg_loss=%.4f, Err_test=%.4f, lr = %.6f'
%(epoch, train_loss_add, Cls_loss_add, Reg_loss_add, Error_test, scheduler.get_lr()[0]))
def test():
net = model.A2J_model(num_classes = keypointsNumber)
net.load_state_dict(torch.load(model_dir))
net = net.cuda()
net.eval()
post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None)
output = torch.FloatTensor()
torch.cuda.synchronize()
for i, (img, label) in tqdm(enumerate(test_dataloaders)):
with torch.no_grad():
img, label = img.cuda(), label.cuda()
heads = net(img)
pred_keypoints = post_precess(heads,voting=False)
output = torch.cat([output,pred_keypoints.data.cpu()], 0)
torch.cuda.synchronize()
result = output.cpu().data.numpy()
writeTxt(result)
error = errorCompute(result, keypointsWorldtest)
print('Error:', error)
def errorCompute(source, target):
assert np.shape(source)==np.shape(target), "source has different shape with target"
Test1_ = source.copy()
target_ = target.copy()
Test1_[:, :, 0] = source[:,:,1]
Test1_[:, :, 1] = source[:,:,0]
Test1 = Test1_ # [x, y, z]
for i in range(len(Test1_)):
Test1[i,:,0] = Test1_[i,:,0]*320/cropWidth # x
Test1[i,:,1] = Test1_[i,:,1]*240/cropHeight # y
Test1[i,:,2] = source[i,:,2]
labels = pixel2world(target_)
outputs = pixel2world(Test1.copy())
errors = np.sqrt(np.sum((labels - outputs) ** 2, axis=2))
return np.mean(errors)
def writeTxt(result):
resultUVD_ = result.copy()
resultUVD_[:, :, 0] = result[:,:,1]
resultUVD_[:, :, 1] = result[:,:,0]
resultUVD = resultUVD_ # [x, y, z]
for i in range(len(result)):
resultUVD[i,:,0] = resultUVD_[i,:,0]*320/cropWidth # x
resultUVD[i,:,1] = resultUVD_[i,:,1]*240/cropHeight # y
resultUVD[i,:,2] = result[i,:,2]
resultReshape = resultUVD.reshape(len(result), -1)
with open(os.path.join(save_dir, result_file), 'w') as f:
for i in range(len(resultReshape)):
for j in range(keypointsNumber*3):
f.write(str(resultReshape[i, j])+' ')
f.write('\n')
f.close()
if name == 'main':
train()
test()
from a2j.
import cv2
import torch
import torch.utils.data
import torch.optim.lr_scheduler as lr_scheduler
import numpy as np
import scipy.io as scio
import os
from PIL import Image
from torch.autograd import Variable
import model as model
import anchor as anchor
from tqdm import tqdm
import random_erasing
import logging
import time
import datetime
import randomos.environ["CUDA_VISIBLE_DEVICES"] = "0"
DataHyperParms
TrainImgFrames = 1000
TestImgFrames = 100
keypointsNumber = 15
cropWidth = 288
cropHeight = 288
batch_size = 64
learning_rate = 0.00035
Weight_Decay = 1e-4
nepoch = 35
RegLossFactor = 3
spatialFactor = 0.5
RandCropShift = 5
RandshiftDepth = 1
RandRotate = 180
RandScale = (1.0, 0.5)randomseed = 12345
random.seed(randomseed)
np.random.seed(randomseed)
torch.manual_seed(randomseed)save_dir = ''
try:
os.makedirs(save_dir)
except OSError:
passtrainingImageDir = ''
testingImageDir = '' # mat images
keypointsfileTest = ''
keypointsfileTrain = ''
model_dir = ''
result_file = 'result_test.txt'def pixel2world(x):
x[:, :, 0] = (x[:, :, 0] - 160.0) * x[:, :, 2] * 0.0035
x[:, :, 1] = (120.0 - x[:, :, 1]) * x[:, :, 2] * 0.0035
return xdef world2pixel(x):
x[:, :, 0] = 160.0 + x[:, :, 0] / (x[:, :, 2] * 0.0035)
x[:, :, 1] = 120.0 - x[:, :, 1] / (x[:, :, 2] * 0.0035)
return xjoint_id_to_name = {
0: 'Head',
1: 'Neck',
2: 'RShoulder',
3: 'LShoulder',
4: 'RElbow',
5: 'LElbow',
6: 'RHand',
7: 'LHand',
8: 'Torso',
9: 'RHip',
10: 'LHip',
11: 'RKnee',
12: 'LKnee',
13: 'RFoot',
14: 'LFoot',
}loading GT keypoints and center points
keypointsWorldtest = scio.loadmat(keypointsfileTest)['keypoints3D'].astype(np.float32)
keypointsPixeltest = np.ones((len(keypointsWorldtest),15,2),dtype='float32')
keypointsPixeltest = world2pixel(keypointsWorldtest)keypointsWorldtrain = scio.loadmat(keypointsfileTrain)['keypoints3D'].astype(np.float32)
keypointsPixeltrain = np.ones((len(keypointsWorldtrain),15,2),dtype='float32')
keypointsPixeltrain = world2pixel(keypointsWorldtrain)def transform(img, label, matrix):
'''
img: [H, W] label, [N,2]
'''
img_out = cv2.warpAffine(img,matrix,(cropWidth,cropHeight))
label_out = np.ones((keypointsNumber, 3))
label_out[:,:2] = label[:,:2].copy()
label_out = np.matmul(matrix, label_out.transpose())
label_out = label_out.transpose()return img_out, label_out
def dataPreprocess(index, img, keypointsUVD, depth_thres=0.4, augment=True):
imageOutputs = np.ones((cropHeight, cropWidth, 1), dtype='float32') labelOutputs = np.ones((keypointsNumber, 3), dtype = 'float32') if augment: RandomOffset_1 = np.random.randint(-1*RandCropShift,RandCropShift) RandomOffset_2 = np.random.randint(-1*RandCropShift,RandCropShift) RandomOffset_3 = np.random.randint(-1*RandCropShift,RandCropShift) RandomOffset_4 = np.random.randint(-1*RandCropShift,RandCropShift) RandomOffsetDepth = np.random.normal(0, RandshiftDepth, cropHeight*cropWidth).reshape(cropHeight,cropWidth) RandomOffsetDepth[np.where(RandomOffsetDepth < RandshiftDepth)] = 0 RandomRotate = np.random.randint(-1*RandRotate,RandRotate) RandomScale = np.random.rand()*RandScale[0]+RandScale[1] matrix = cv2.getRotationMatrix2D((cropWidth/2,cropHeight/2),RandomRotate,RandomScale) else: RandomOffset_1, RandomOffset_2, RandomOffset_3, RandomOffset_4 = 0, 0, 0, 0 RandomRotate = 0 RandomScale = 1 RandomOffsetDepth = 0 matrix = cv2.getRotationMatrix2D((cropWidth/2,cropHeight/2),RandomRotate,RandomScale) imCrop = img[:, :].copy() imgResize = cv2.resize(imCrop, (cropWidth, cropHeight), interpolation=cv2.INTER_NEAREST) imgResize = np.asarray(imgResize,dtype = 'float32') # H*W*C imgResize = imgResize / 5 ## label label_xy = np.ones((keypointsNumber, 2), dtype = 'float32') label_xy[:,0] = keypointsUVD[index,:,0].copy()*cropWidth/320 # x label_xy[:,1] = keypointsUVD[index,:,1].copy()*cropHeight/240 # y if augment: imgResize, label_xy = transform(imgResize, label_xy, matrix) ## rotation, scale imageOutputs[:,:,0] = imgResize labelOutputs[:,1] = label_xy[:,0] labelOutputs[:,0] = label_xy[:,1] labelOutputs[:,2] = (keypointsUVD[index,:,2])*RandomScale # Z imageOutputs = np.asarray(imageOutputs) imageNCHWOut = imageOutputs.transpose(2, 0, 1) # [H, W, C] --->>> [C, H, W] imageNCHWOut = np.asarray(imageNCHWOut) labelOutputs = np.asarray(labelOutputs) data, label = torch.from_numpy(imageNCHWOut), torch.from_numpy(labelOutputs) return data, label
###################### Pytorch dataloader #################
class my_dataloader(torch.utils.data.Dataset):def __init__(self, ImgDir, keypointsUVD, num, augment=True): self.ImgDir = ImgDir self.keypointsUVD = keypointsUVD self.num = num self.augment = augment self.randomErase = random_erasing.RandomErasing(probability = 0.5, sl = 0.02, sh = 0.4, r1 = 0.3, mean=[0]) def __getitem__(self, index): data4D = scio.loadmat(self.ImgDir + str(index+1) + '.mat')['DepthNormal'] depth = data4D[:,:] data, label = dataPreprocess(index, depth, self.keypointsUVD, self.augment) if self.augment: data = self.randomErase(data) return data, label def __len__(self): return self.num
train_image_datasets = my_dataloader(trainingImageDir, keypointsWorldtrain, TrainImgFrames, augment=True)
train_dataloaders = torch.utils.data.DataLoader(train_image_datasets, batch_size = batch_size,
shuffle = True, num_workers = 8)test_image_datasets = my_dataloader(testingImageDir, keypointsWorldtest, TestImgFrames, augment=False)
test_dataloaders = torch.utils.data.DataLoader(test_image_datasets, batch_size = batch_size,
shuffle = False, num_workers = 8)def train():
net = model.A2J_model(num_classes = keypointsNumber) net = net.cuda() post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None) criterion = anchor.A2J_loss(shape=[cropHeight//16,cropWidth//16],thres = [16.0,32.0],stride=16,\ spatialFactor=spatialFactor,img_shape=[cropHeight, cropWidth],P_h=None, P_w=None) optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=Weight_Decay) scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.2) logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%Y/%m/%d %H:%M:%S', \ filename=os.path.join(save_dir, 'train.log'), level=logging.INFO) logging.info('======================================================') for epoch in range(nepoch): net = net.train() train_loss_add = 0.0 Cls_loss_add = 0.0 Reg_loss_add = 0.0 timer = time.time() # Training loop for i, (img, label) in enumerate(train_dataloaders): torch.cuda.synchronize() img, label = img.cuda(), label.cuda() heads = net(img) #print(regression) optimizer.zero_grad() Cls_loss, Reg_loss = criterion(heads, label) loss = 1*Cls_loss + Reg_loss*RegLossFactor loss.backward() optimizer.step() torch.cuda.synchronize() train_loss_add = train_loss_add + (loss.item())*len(img) Cls_loss_add = Cls_loss_add + (Cls_loss.item())*len(img) Reg_loss_add = Reg_loss_add + (Reg_loss.item())*len(img) # printing loss info if i%10 == 0: print('epoch: ',epoch, ' step: ', i, 'Cls_loss ',Cls_loss.item(), 'Reg_loss ',Reg_loss.item(), ' total loss ',loss.item()) scheduler.step(epoch) # time taken torch.cuda.synchronize() timer = time.time() - timer timer = timer / TrainImgFrames print('==> time to learn 1 sample = %f (ms)' %(timer*1000)) train_loss_add = train_loss_add / TrainImgFrames Cls_loss_add = Cls_loss_add / TrainImgFrames Reg_loss_add = Reg_loss_add / TrainImgFrames print('mean train_loss_add of 1 sample: %f, #train_indexes = %d' %(train_loss_add, TrainImgFrames)) print('mean Cls_loss_add of 1 sample: %f, #train_indexes = %d' %(Cls_loss_add, TrainImgFrames)) print('mean Reg_loss_add of 1 sample: %f, #train_indexes = %d' %(Reg_loss_add, TrainImgFrames)) Error_test = 0 Error_train = 0 Error_test_wrist = 0 if (epoch % 1 == 0): net = net.eval() output = torch.FloatTensor() outputTrain = torch.FloatTensor() for i, (img, label) in tqdm(enumerate(test_dataloaders)): with torch.no_grad(): img, label = img.cuda(), label.cuda() heads = net(img) pred_keypoints = post_precess(heads, voting=False) output = torch.cat([output,pred_keypoints.data.cpu()], 0) result = output.cpu().data.numpy() Error_test = errorCompute(result,keypointsWorldtest,) print('epoch: ', epoch, 'Test error:', Error_test) saveNamePrefix = '%s/net_%d_wetD_' % (save_dir, epoch) + str(Weight_Decay) + '_depFact_' + str(spatialFactor) + '_RegFact_' + str(RegLossFactor) + '_rndShft_' + str(RandCropShift) torch.save(net.state_dict(), saveNamePrefix + '.pth') # log logging.info('Epoch#%d: total loss=%.4f, Cls_loss=%.4f, Reg_loss=%.4f, Err_test=%.4f, lr = %.6f' %(epoch, train_loss_add, Cls_loss_add, Reg_loss_add, Error_test, scheduler.get_lr()[0]))
def test():
net = model.A2J_model(num_classes = keypointsNumber)
net.load_state_dict(torch.load(model_dir))
net = net.cuda()
net.eval()post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None) output = torch.FloatTensor() torch.cuda.synchronize() for i, (img, label) in tqdm(enumerate(test_dataloaders)): with torch.no_grad(): img, label = img.cuda(), label.cuda() heads = net(img) pred_keypoints = post_precess(heads,voting=False) output = torch.cat([output,pred_keypoints.data.cpu()], 0) torch.cuda.synchronize() result = output.cpu().data.numpy() writeTxt(result) error = errorCompute(result, keypointsWorldtest) print('Error:', error)
def errorCompute(source, target):
assert np.shape(source)==np.shape(target), "source has different shape with target"Test1_ = source.copy() target_ = target.copy() Test1_[:, :, 0] = source[:,:,1] Test1_[:, :, 1] = source[:,:,0] Test1 = Test1_ # [x, y, z] for i in range(len(Test1_)): Test1[i,:,0] = Test1_[i,:,0]*320/cropWidth # x Test1[i,:,1] = Test1_[i,:,1]*240/cropHeight # y Test1[i,:,2] = source[i,:,2] labels = pixel2world(target_) outputs = pixel2world(Test1.copy()) errors = np.sqrt(np.sum((labels - outputs) ** 2, axis=2)) return np.mean(errors)
def writeTxt(result):
resultUVD_ = result.copy() resultUVD_[:, :, 0] = result[:,:,1] resultUVD_[:, :, 1] = result[:,:,0] resultUVD = resultUVD_ # [x, y, z] for i in range(len(result)): resultUVD[i,:,0] = resultUVD_[i,:,0]*320/cropWidth # x resultUVD[i,:,1] = resultUVD_[i,:,1]*240/cropHeight # y resultUVD[i,:,2] = result[i,:,2] resultReshape = resultUVD.reshape(len(result), -1) with open(os.path.join(save_dir, result_file), 'w') as f: for i in range(len(resultReshape)): for j in range(keypointsNumber*3): f.write(str(resultReshape[i, j])+' ') f.write('\n') f.close()
if name == 'main':
train()
test()
Thank you very much!! bro! mind i add your email or skype .my email is [email protected]
from a2j.
If you don't to use bndbox, you may train the data by yourself, or modify the train code.
Hi bro.Can u share the itop model or k2phd model.
best regards!
from a2j.
Hi @zeroXscorpion7 ,
- Were you able to utilize this model to predict the Joints for a custom dataset?
- I'm also trying to pass one depth frame along with the ITOP side dataset and change the mean value so that the input depth frame to the model matches with the ITOP_side dataset depth frame.
Unfortunately, the results are very bad. - Could you tell me if you were able to do something more on this?
from a2j.
Related Issues (20)
- How to obtain the center point coordinates and depth values during NYU inference HOT 4
- an you provide the mat file of the detection bounding boxes of the itop side and top training set
- Can you provide the mat file of the detection bounding boxes of the itop side and top training set HOT 2
- Problem while retraining A2J on NYU HOT 3
- Please add a requirements.txt file HOT 1
- 好奇下载的网络有没finetuning过 HOT 2
- Swapping Width and Height dimensions HOT 1
- Selecting anchor points with P = 0.02 HOT 2
- what's the mean of "depthFactor"? HOT 4
- Input Files for the ITOP dataset HOT 1
- Hands2017的数据是基于绝对3D坐标的,你们训练是基于UVD的, 请问Hands2017是如何训练的呢? HOT 6
- 训练ITOP_side数据集 HOT 1
- Unable to reproduce the results for the ITOP side view human body dataset. HOT 1
- reason for num_channel expansion? HOT 3
- training with missing keypoints? HOT 3
- How can I convert pre-trained model to coreml to use it on ios application?
- Why are the Anchor-Points generated on a grid with spaces between them? HOT 1
- Thank you so much!
- Hi, mat files are generated using this script: https://github.com/zhangboshen/A2J/blob/master/data/icvl/data_preprosess.m HOT 1
- Enquiry about drawing human 3D pose on our one depth image HOT 1
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from a2j.