Hi all.
I used DagNN to train my NN for application of image auto white balancing and achieved the final objective of approximately 0.1 (Euclidean distance with pdist function).
Yesterday I found AutoNN an impressive wrapper for MatConvNet and rebuilt my architecture in AutoNN. Using the same layers constructures and initializing with the same parameters, but the objective is 6~8 after same number of epoches.
Any idea about what mistakes I make will be appreciated.
# DagNN code
opts.batchSize = [];
opts.imageSize = [384 384];
opts.averageImage = zeros(3,1) ;
opts.colorDeviation = zeros(3) ;
opts.cudnnWorkspaceLimit = 4*1024*1024*1204 ; % 4GB
opts = vl_argparse(opts, varargin) ;
net = dagnn.DagNN() ;
% -------------------------------------------------------------------------
% Add input section
% -------------------------------------------------------------------------
% Block #1
net.addLayer('conv1',...
dagnn.Conv('size', [1 1 3 8], 'hasBias', true, 'stride', [1 1], 'pad', [0 0 0 0]),...
{'inputimage'},...
{'conv1'},...
{'conv1f' 'conv1b'});
net.addLayer('relu1',...
dagnn.ReLU(),...
{'conv1'},...
{'relu1'},...
{});
net.addLayer('pool1',...
dagnn.Pooling('method', 'max', 'poolSize', [2 2], 'stride', [2 2], 'pad', [0 0 0 0]),...
{'relu1'},...
{'pool1'},...
{});
% Block #2
net.addLayer('conv2',...
dagnn.Conv('size', [5 5 8 32], 'hasBias', true, 'stride', [1 1], 'pad', [2 2 2 2]),...
{'pool1'},...
{'conv2'},...
{'conv2f' 'conv2b'});
net.addLayer('relu2',...
dagnn.ReLU(),...
{'conv2'},...
{'relu2'},...
{});
net.addLayer('pool2',...
dagnn.Pooling('method', 'max', 'poolSize', [2, 2], 'stride', [2 2], 'pad', [0 0 0 0]),...
{'relu2'},...
{'pool2'},...
{});
% Block #3
net.addLayer('conv3',...
dagnn.Conv('size', [3 3 32 128], 'hasBias', true, 'stride', [3 3], 'pad', [3 3 3 3]),...
{'pool2'},...
{'conv3'},...
{'conv3f' 'conv3b'});
net.addLayer('relu3',...
dagnn.ReLU(),...
{'conv3'},...
{'relu3'},...
{});
net.addLayer('pool3',...
dagnn.Pooling('method', 'max', 'poolSize', [2, 2], 'stride', [2 2], 'pad', [0 0 0 0]),...
{'relu3'},...
{'pool3'},...
{});
% Block #4
net.addLayer('conv4',...
dagnn.Conv('size', [1 1 128 256], 'hasBias', true, 'stride', [2 2], 'pad', [0 0 0 0]),...
{'pool3'},...
{'conv4'},...
{'conv4f' 'conv4b'});
net.addLayer('relu4',...
dagnn.ReLU(),...
{'conv4'},...
{'relu4'},...
{});
% Block #5
net.addLayer('conv5',...
dagnn.Conv('size', [9 9 256 64], 'hasBias', true, 'stride', [1 1], 'pad', [0 0 0 0]),...
{'relu4'},...
{'conv5'},...
{'conv5f' 'conv5b'});
net.addLayer('relu5',...
dagnn.ReLU(),...
{'conv5'},...
{'relu5'},...
{});
% Block #6
net.addLayer('cat1',...
dagnn.Concat('dim', 3),...
{'relu5', 'inputsensor', 'inputgyro'},...
{'cat1'});
% Block #7: Muli-Layer-Perceptron
net.addLayer('fc1',...
dagnn.Conv('size', [1 1 73 512], 'hasBias', true, 'stride', [1, 1], 'pad', [0 0 0 0]),...
{'cat1'},...
{'fc1'},...
{'conv6f' 'conv6b'});
net.addLayer('relu6',...
dagnn.ReLU(),...
{'fc1'},...
{'relu6'},...
{});
% Block #8
net.addLayer('prediction',...
dagnn.Conv('size', [1 1 512 2], 'hasBias', true, 'stride', [1, 1], 'pad', [0 0 0 0]),...
{'relu6'},...
{'prediction'},...
{'conv7f' 'conv7b'});
% Block #9: pdist
net.addLayer('objective',...
dagnn.PDist('p', 2, 'aggregate', true),...
{'prediction', 'label'},...
{'objective'},...
{});
% -------------------------------------------------------------------------
% Meta parameters
% -------------------------------------------------------------------------
net.meta.imageSize = opts.imageSize ;
net.meta.averageImage = opts.averageImage ;
lr = [0.001*ones(1,3), 0.0005*ones(1,3), 0.0001*ones(1,3), 0.00005*ones(1,3), 0.00001*ones(1,5)] ;
net.meta.trainOpts.learningRate = lr ;
net.meta.trainOpts.numEpochs = numel(lr) ;
net.meta.trainOpts.momentum = 0.9;
net.meta.trainOpts.batchSize = opts.batchSize ;
net.meta.trainOpts.numSubBatches = 1 ;
net.meta.trainOpts.weightDecay = 0.0001 ;
# params init
f = 1/100;
f_ind = net.layers(1).paramIndexes(1);
b_ind = net.layers(1).paramIndexes(2);
net.params(f_ind).value = 10*f*randn(size(net.params(f_ind).value), 'single');
net.params(f_ind).learningRate = 1;
net.params(f_ind).weightDecay = 1;
for l=2:length(net.layers)
if(strcmp(class(net.layers(l).block), 'dagnn.Conv'))
f_ind = net.layers(l).paramIndexes(1);
b_ind = net.layers(l).paramIndexes(2);
[h,w,in,out] = size(net.params(f_ind).value);
net.params(f_ind).value = f*randn(size(net.params(f_ind).value), 'single');
net.params(f_ind).learningRate = 1;
net.params(f_ind).weightDecay = 1;
net.params(b_ind).value = f*randn(size(net.params(b_ind).value), 'single');
net.params(b_ind).learningRate = 0.5;
net.params(b_ind).weightDecay = 1;
end
end
# AutoNN code
opts.batchSize = 50;
opts.imageSize = [384 384];
opts.averageImage = zeros(3,1) ;
opts.colorDeviation = zeros(3) ;
opts.cudnnWorkspaceLimit = 4*1024*1024*1204 ; % 4GB
opts.learningRate = [0.001*ones(1,3), 0.0005*ones(1,3), 0.0001*ones(1,3), 0.00005*ones(1,3), 0.00001*ones(1,5)] ;
opts = vl_argparse(opts, varargin) ;
f = 1/100; % initialization parameter
% -------------------------------------------------------------------------
% Add input section
% -------------------------------------------------------------------------
inputimage = Input();
inputsensor = Input();
inputgyro = Input();
label = Input();
% Block #1
% create parameters explicitly
filterSize1 = [1 1 3 8];
filters1 = Param('value', 10*f*randn(filterSize1(1),filterSize1(2),filterSize1(3),filterSize1(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases1 = Param('value', zeros(filterSize1(4), 1, 'single'), 'learningRate', 1, 'weightDecay', 1);
conv1 = vl_nnconv(inputimage, filters1, biases1, 'stride', [1 1], 'pad', [0 0 0 0]);
relu1 = vl_nnrelu(conv1);
pool1 = vl_nnpool(relu1, 2, 'stride', 2);
% Block #2
filterSize2 = [5 5 8 32];
filters2 = Param('value', f*randn(filterSize2(1),filterSize2(2),filterSize2(3),filterSize2(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases2 = Param('value', f*randn(1,filterSize2(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
conv2 = vl_nnconv(pool1, filters2, biases2, 'stride', [1 1], 'pad', [2 2 2 2]);
relu2 = vl_nnrelu(conv2);
pool2 = vl_nnpool(relu2, 2, 'stride', 2);
% Block #3
filterSize3 = [3 3 32 128];
filters3 = Param('value', f*randn(filterSize3(1),filterSize3(2),filterSize3(3),filterSize3(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases3 = Param('value', f*randn(1, filterSize3(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
conv3 = vl_nnconv(pool2, filters3, biases3, 'stride', [3 3], 'pad', [3 3 3 3]);
relu3 = vl_nnrelu(conv3);
pool3 = vl_nnpool(relu3, 2, 'stride', 2);
% Block #4
filterSize4 = [1 1 128 256];
filters4 = Param('value', f*randn(filterSize4(1),filterSize4(2),filterSize4(3),filterSize4(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases4 = Param('value', f*randn(1, filterSize4(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
conv4 = vl_nnconv(pool3, filters4, biases4, 'stride', [2 2], 'pad', [0 0 0 0]);
relu4 = vl_nnrelu(conv4);
% Block #5
filterSize5 = [9 9 256 64];
filters5 = Param('value', f*randn(filterSize5(1),filterSize5(2),filterSize5(3),filterSize5(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases5 = Param('value', f*randn(1, filterSize5(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
conv5 = vl_nnconv(relu4, filters5, biases5, 'stride', [1 1], 'pad', [0 0 0 0]);
relu5 = vl_nnrelu(conv5);
% Block #6: concat
cat6 = cat(3, relu5, inputsensor, inputgyro);
% Block #7: Muli-Layer-Perceptron
filterSize7 = [1 1 73 512];
filters7 = Param('value', f*randn(filterSize7(1),filterSize7(2),filterSize7(3),filterSize7(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases7 = Param('value', f*randn(1, filterSize7(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
fc7 = vl_nnconv(cat6, filters7, biases7, 'stride', [1 1], 'pad', [0 0 0 0]);
relu7 = vl_nnrelu(fc7);
% Block #8: prediction
filterSize8 = [1 1 512 2];
filters8 = Param('value', f*randn(filterSize8(1),filterSize8(2),filterSize8(3),filterSize8(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases8 = Param('value', f*randn(1, filterSize8(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
prediction8 = vl_nnconv(relu7, filters8, biases8, 'stride', [1 1], 'pad', [0 0 0 0]);
% Block #9: pdist
objective = vl_nnpdist(prediction8, label, 2, 'aggregate', true);
% layers name assignment
Layer.workspaceNames();
% compile the network
inputimage.gpu = true;
net = Net(objective);
net.meta.imageSize = opts.imageSize ;
net.meta.averageImage = opts.averageImage ;
net.meta.trainOpts.learningRate = opts.learningRate ;
net.meta.trainOpts.numEpochs = numel(opts.learningRate) ;
net.meta.trainOpts.momentum = 0.85 ;
net.meta.trainOpts.batchSize = opts.batchSize ;
net.meta.trainOpts.numSubBatches = 1 ;
net.meta.trainOpts.weightDecay = 0.0001 ;
try
layer = Layer.fromCompiledNet(net);
layer{1}.sequentialNames;
layer{1}.plotPDF();
catch
end