Is there a way to train with CPU using train_net.py ?

<a class="user-mention notranslate" data-hovercard-type="user" data-hovercard-url="/us

Train with CPU about fast-rcnn HOT 6 CLOSED

rbgirshick commented on June 15, 2024

Train with CPU

from fast-rcnn.

Comments (6)

zhouphd commented on June 15, 2024 1

@sunshineatnoon
I tried to implement the CPU mode, but this still can not pass the runtest. Hope it helps, :)

`// ------------------------------------------------------------------
// Fast R-CNN
// Copyright (c) 2015 Microsoft
// Licensed under The MIT License [see fast-rcnn/LICENSE for details]
// Written by Ross Girshick
// ------------------------------------------------------------------

#include "caffe/fast_rcnn_layers.hpp"

namespace caffe {

template
void SmoothL1LossLayer::LayerSetUp(
const vector<Blob>& bottom, const vector<Blob>& top) {
SmoothL1LossParameter loss_param = this->layer_param_.smooth_l1_loss_param();
sigma2_ = loss_param.sigma() * loss_param.sigma();
has_weights_ = (bottom.size() >= 3);
if (has_weights_) {
CHECK_EQ(bottom.size(), 4) << "If weights are used, must specify both "
"inside and outside weights";
}
}

template
void SmoothL1LossLayer::Reshape(
const vector<Blob>& bottom, const vector<Blob>& top) {
LossLayer::Reshape(bottom, top);
CHECK_EQ(bottom[0]->channels(), bottom[1]->channels());
CHECK_EQ(bottom[0]->height(), bottom[1]->height());
CHECK_EQ(bottom[0]->width(), bottom[1]->width());
if (has_weights_) {
CHECK_EQ(bottom[0]->channels(), bottom[2]->channels());
CHECK_EQ(bottom[0]->height(), bottom[2]->height());
CHECK_EQ(bottom[0]->width(), bottom[2]->width());
CHECK_EQ(bottom[0]->channels(), bottom[3]->channels());
CHECK_EQ(bottom[0]->height(), bottom[3]->height());
CHECK_EQ(bottom[0]->width(), bottom[3]->width());
}
diff_.Reshape(bottom[0]->num(), bottom[0]->channels(),
bottom[0]->height(), bottom[0]->width());
errors_.Reshape(bottom[0]->num(), bottom[0]->channels(),
bottom[0]->height(), bottom[0]->width());
// vector of ones used to sum
ones_.Reshape(bottom[0]->num(), bottom[0]->channels(),
bottom[0]->height(), bottom[0]->width());
for (int i = 0; i < bottom[0]->count(); ++i) {
ones_.mutable_cpu_data()[i] = Dtype(1);
}
}

template
void SmoothL1LossLayer::Forward_cpu(const vector<Blob>& bottom,
const vector<Blob>& top) {
// NOT_IMPLEMENTED;
int count = bottom[0]->count();
//int num = bottom[0]->num();
const Dtype* in = diff_.cpu_data();
Dtype* out = errors_.mutable_cpu_data();
caffe_set(errors_.count(), Dtype(0), out);

caffe_sub(
count,
bottom[0]->cpu_data(),
bottom[1]->cpu_data(),
diff_.mutable_cpu_data()); // d := b0 - b1
if (has_weights_) {
// apply "inside" weights
caffe_mul(
count,
bottom[2]->cpu_data(),
diff_.cpu_data(),
diff_.mutable_cpu_data()); // d := w_in * (b0 - b1)
}

for (int index = 0;index < count; ++index){
Dtype val = in[index];
Dtype abs_val = abs(val);
if (abs_val < 1.0 / sigma2_) {
out[index] = 0.5 * val * val * sigma2_;
} else {
out[index] = abs_val - 0.5 / sigma2_;
}
}

if (has_weights_) {
// apply "outside" weights
caffe_mul(
count,
bottom[3]->cpu_data(),
errors_.cpu_data(),
errors_.mutable_cpu_data()); // d := w_out * SmoothL1(w_in * (b0 - b1))
}

Dtype loss = caffe_cpu_dot(count, ones_.cpu_data(), errors_.cpu_data());
top[0]->mutable_cpu_data()[0] = loss / bottom[0]->num();
}

template
void SmoothL1LossLayer::Backward_cpu(const vector<Blob>& top,
const vector& propagate_down, const vector<Blob>& bottom) {
// NOT_IMPLEMENTED;
int count = diff_.count();
//int num = diff_.num();
const Dtype* in = diff_.cpu_data();
Dtype* out = errors_.mutable_cpu_data();
caffe_set(errors_.count(), Dtype(0), out);

for (int index = 0;index < count; ++index){
Dtype val = in[index];
Dtype abs_val = abs(val);
if (abs_val < 1.0 / sigma2_) {
out[index] = sigma2_ * val;
} else {
out[index] = (Dtype(0) < val) - (val < Dtype(0));
}
}

for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
const Dtype sign = (i == 0) ? 1 : -1;
const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
caffe_cpu_axpby(
count, // count
alpha, // alpha
diff_.cpu_data(), // x
Dtype(0), // beta
bottom[i]->mutable_cpu_diff()); // y
if (has_weights_) {
// Scale by "inside" weight
caffe_mul(
count,
bottom[2]->cpu_data(),
bottom[i]->cpu_diff(),
bottom[i]->mutable_cpu_diff());
// Scale by "outside" weight
caffe_mul(
count,
bottom[3]->cpu_data(),
bottom[i]->cpu_diff(),
bottom[i]->mutable_cpu_diff());
}
}
}
}

#ifdef CPU_ONLY
STUB_GPU(SmoothL1LossLayer);
#endif

INSTANTIATE_CLASS(SmoothL1LossLayer);
REGISTER_LAYER_CLASS(SmoothL1Loss);

} // namespace caffe
`

from fast-rcnn.

rbgirshick commented on June 15, 2024

train_net.py requires some minor code changes to support CPU training (add a --cpu option to argparser and then handle it appropriately; see demo.py for an example of this). I might add this to the code, but for my workflow CPU-based training is not very useful. Feel free to PR the change (if you do, you should also add a CPU option to test_net.py).

from fast-rcnn.

ssakhavi commented on June 15, 2024

Maybe the main reason I'm asking this is that for memory heavy nets (Like GoogLeNet), CPU is sometimes the only option. (Due to limited GPU memory). I have tried the CPU mode in the demo and it isn't as fast as GPU. You are right that. The whole point of FRCN is to be faster. Thanks for the reply.

from fast-rcnn.

sunshineatnoon commented on June 15, 2024

@ssakhavi Have you tried to train rcnn with cpu?

from fast-rcnn.

sunshineatnoon commented on June 15, 2024

@rbgirshick hi~ It seems that the SmoothL1LossLayer has not been implemented on CPU, so even after changing code in train_net.py, I still cannot train with CPU.

from fast-rcnn.

ericromanenghi commented on June 15, 2024

there is some layer that does not support CPU because they are only implemented for GPU

from fast-rcnn.

Train with CPU about fast-rcnn HOT 6 CLOSED

Comments (6)

Related Issues (20)

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent