Caffe中新建Layer--改寫Faster-RCNN的proposal layer

2019-11-06 06:10:34

字體：大中小

來源：轉載

供稿：網友

// NOTE// Update the next available ID when you add a new LayerParameter field.//// LayerParameter next available layer-specific ID: 147 (last added: recurrent_param)message LayerParameter { optional string name = 1; // the layer name optional string type = 2; // the layer type repeated string bottom = 3; // the name of each bottom blob repeated string top = 4; // the name of each top blob . . . optional ROipoolingParameter roi_pooling_param = 8266711; optional SmoothL1LossParameter smooth_l1_loss_param = 8266712; optional ProposalParameter proposal_param = 8266713;}

在這個文件中會出現V0LayerParameter和V1LayerParameter，這兩個參數是在舊版caffe中的，不用管，上面出現的ROIPoolingParameter和SmoothL1LossParameter是基于py-faster-rcnn代碼來的，添加自己的ProposalParameter即可。參考其他層是實現，然后就需要ProposalParameter，仿照python代碼的寫法，定義feat_stride等參數

// Message that stores parameters used by ProposalLayermessage ProposalParameter { optional uint32 feat_stride = 1 [default = 16]; optional uint32 base_size = 2 [default = 16]; optional uint32 min_size = 3 [default = 16]; repeated float ratio = 4; repeated float scale = 5; optional uint32 pre_nms_topn = 6 [default = 6000]; optional uint32 post_nms_topn = 7 [default = 300]; optional float nms_thresh = 8 [default = 0.7];}

像ConvolutionParameter其中還會有下面這些參數，在這里先都不需要用到

enum Engine { DEFAULT = 0; CAFFE = 1; CUDNN = 2; }

到這里，caffe.proto就基本完成了。

第二步、實現layer的cpp、hpp文件

寫完caffe.proto后，需要加入具體layer，與其他的layer類似，在caffe/src/caffe/layer中建立一個proposal_layer.cpp

#include "caffe/fast_rcnn_layers.hpp"#include "caffe/util/nms.hpp"#define ROUND(x) ((int)((x) + (Dtype)0.5))using std::max;using std::min;namespace caffe {template <typename Dtype>staticint transform_box(Dtype box[], const Dtype dx, const Dtype dy, const Dtype d_log_w, const Dtype d_log_h, const Dtype img_W, const Dtype img_H, const Dtype min_box_W, const Dtype min_box_H){ // width & height of box const Dtype w = box[2] - box[0] + (Dtype)1; const Dtype h = box[3] - box[1] + (Dtype)1; // center location of box const Dtype ctr_x = box[0] + (Dtype)0.5 * w; const Dtype ctr_y = box[1] + (Dtype)0.5 * h; // new center location according to gradient (dx, dy) const Dtype pred_ctr_x = dx * w + ctr_x; const Dtype pred_ctr_y = dy * h + ctr_y; // new width & height according to gradient d(log w), d(log h) const Dtype pred_w = exp(d_log_w) * w; const Dtype pred_h = exp(d_log_h) * h; // update upper-left corner location box[0] = pred_ctr_x - (Dtype)0.5 * pred_w; box[1] = pred_ctr_y - (Dtype)0.5 * pred_h; // update lower-right corner location box[2] = pred_ctr_x + (Dtype)0.5 * pred_w; box[3] = pred_ctr_y + (Dtype)0.5 * pred_h; // adjust new corner locations to be within the image region, box[0] = std::max((Dtype)0, std::min(box[0], img_W - (Dtype)1)); box[1] = std::max((Dtype)0, std::min(box[1], img_H - (Dtype)1)); box[2] = std::max((Dtype)0, std::min(box[2], img_W - (Dtype)1)); box[3] = std::max((Dtype)0, std::min(box[3], img_H - (Dtype)1)); // recompute new width & height const Dtype box_w = box[2] - box[0] + (Dtype)1; const Dtype box_h = box[3] - box[1] + (Dtype)1; // check if new box's size >= threshold return (box_w >= min_box_W) * (box_h >= min_box_H);}template <typename Dtype>staticvoid sort_box(Dtype list_cpu[], const int start, const int end, const int num_top){ const Dtype pivot_score = list_cpu[start * 5 + 4]; int left = start + 1, right = end; Dtype temp[5]; while (left <= right) { while (left <= end && list_cpu[left * 5 + 4] >= pivot_score) ++left; while (right > start && list_cpu[right * 5 + 4] <= pivot_score) --right; if (left <= right) { for (int i = 0; i < 5; ++i) { temp[i] = list_cpu[left * 5 + i]; } for (int i = 0; i < 5; ++i) { list_cpu[left * 5 + i] = list_cpu[right * 5 + i]; } for (int i = 0; i < 5; ++i) { list_cpu[right * 5 + i] = temp[i]; } ++left; --right; } } if (right > start) { for (int i = 0; i < 5; ++i) { temp[i] = list_cpu[start * 5 + i]; } for (int i = 0; i < 5; ++i) { list_cpu[start * 5 + i] = list_cpu[right * 5 + i]; } for (int i = 0; i < 5; ++i) { list_cpu[right * 5 + i] = temp[i]; } } if (start < right - 1) { sort_box(list_cpu, start, right - 1, num_top); } if (right + 1 < num_top && right + 1 < end) { sort_box(list_cpu, right + 1, end, num_top); }}template <typename Dtype>staticvoid generate_anchors(int base_size, const Dtype ratios[], const Dtype scales[], const int num_ratios, const int num_scales, Dtype anchors[]){ // base box's width & height & center location const Dtype base_area = (Dtype)(base_size * base_size); const Dtype center = (Dtype)0.5 * (base_size - (Dtype)1); // enumerate all transformed boxes Dtype* p_anchors = anchors; for (int i = 0; i < num_ratios; ++i) { // transformed width & height for given ratio factors const Dtype ratio_w = (Dtype)ROUND(sqrt(base_area / ratios[i])); const Dtype ratio_h = (Dtype)ROUND(ratio_w * ratios[i]); for (int j = 0; j < num_scales; ++j) { // transformed width & height for given scale factors const Dtype scale_w = (Dtype)0.5 * (ratio_w * scales[j] - (Dtype)1); const Dtype scale_h = (Dtype)0.5 * (ratio_h * scales[j] - (Dtype)1); // (x1, y1, x2, y2) for transformed box p_anchors[0] = center - scale_w; p_anchors[1] = center - scale_h; p_anchors[2] = center + scale_w; p_anchors[3] = center + scale_h; p_anchors += 4; } // endfor j }}template <typename Dtype>staticvoid enumerate_proposals_cpu(const Dtype bottom4d[], const Dtype d_anchor4d[], const Dtype anchors[], Dtype proposals[], const int num_anchors, const int bottom_H, const int bottom_W, const Dtype img_H, const Dtype img_W, const Dtype min_box_H, const Dtype min_box_W, const int feat_stride){ Dtype* p_proposal = proposals; const int bottom_area = bottom_H * bottom_W; for (int h = 0; h < bottom_H; ++h) { for (int w = 0; w < bottom_W; ++w) { const Dtype x = w * feat_stride; const Dtype y = h * feat_stride; const Dtype* p_box = d_anchor4d + h * bottom_W + w; const Dtype* p_score = bottom4d + h * bottom_W + w; for (int k = 0; k < num_anchors; ++k) { const Dtype dx = p_box[(k * 4 + 0) * bottom_area]; const Dtype dy = p_box[(k * 4 + 1) * bottom_area]; const Dtype d_log_w = p_box[(k * 4 + 2) * bottom_area]; const Dtype d_log_h = p_box[(k * 4 + 3) * bottom_area]; p_proposal[0] = x + anchors[k * 4 + 0]; p_proposal[1] = y + anchors[k * 4 + 1]; p_proposal[2] = x + anchors[k * 4 + 2]; p_proposal[3] = y + anchors[k * 4 + 3]; p_proposal[4] = transform_box(p_proposal, dx, dy, d_log_w, d_log_h, img_W, img_H, min_box_W, min_box_H) * p_score[k * bottom_area]; //這步就去掉越過圖片邊框的proposals p_proposal += 5; } // endfor k } // endfor w } // endfor h}template <typename Dtype>staticvoid retrieve_rois_cpu(const int num_rois, const int item_index, const Dtype proposals[], const int roi_indices[], Dtype rois[], Dtype roi_scores[]){ for (int i = 0; i < num_rois; ++i) { const Dtype* const proposals_index = proposals + roi_indices[i] * 5; rois[i * 5 + 0] = item_index; rois[i * 5 + 1] = proposals_index[0]; rois[i * 5 + 2] = proposals_index[1]; rois[i * 5 + 3] = proposals_index[2]; rois[i * 5 + 4] = proposals_index[3]; if (roi_scores) { roi_scores[i] = proposals_index[4]; } }}template <typename Dtype>void ProposalLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { ProposalParameter param = this->layer_param_.proposal_param(); //參考proposal_layer.py //forward的cfg參數 base_size_ = param.base_size(); feat_stride_ = param.feat_stride(); pre_nms_topn_ = param.pre_nms_topn(); //使用nms前RPN proposals最大數量 post_nms_topn_ = param.post_nms_topn();//使用nms后RPN proposals數量 nms_thresh_ = param.nms_thresh(); //閾值 0.7 min_size_ = param.min_size(); //proposal的H W都要大于16 //實現generate_anchors vector<Dtype> ratios(param.ratio_size()); for (int i = 0; i < param.ratio_size(); ++i) { ratios[i] = param.ratio(i); } vector<Dtype> scales(param.scale_size()); for (int i = 0; i < param.scale_size(); ++i) { scales[i] = param.scale(i); } vector<int> anchors_shape(2); anchors_shape[0] = ratios.size() * scales.size(); anchors_shape[1] = 4; anchors_.Reshape(anchors_shape); generate_anchors(base_size_, &ratios[0], &scales[0], ratios.size(), scales.size(), anchors_.mutable_cpu_data()); vector<int> roi_indices_shape(1); roi_indices_shape[0] = post_nms_topn_; roi_indices_.Reshape(roi_indices_shape); //輸出Top[0]，是 R個 regions of interest, each is a 5-tuple (n, x1, y1, x2, y2) ， //其中n 代表batch index； x1, y1, x2, y2表示矩形的4個點的坐標。 //輸出Top[1]為每個proposal的得分，即是一個物體的可能性。 // rois blob : holds R regions of interest, each is a 5 - tuple // (n, x1, y1, x2, y2) specifying an image batch index n and a // rectangle(x1, y1, x2, y2) vector<int> top_shape(2); top_shape[0] = bottom[0]->shape(0) * post_nms_topn_; top_shape[1] = 5; top[0]->Reshape(top_shape); // scores blob : holds scores for R regions of interest if (top.size() > 1) { top_shape.pop_back(); top[1]->Reshape(top_shape); }}template <typename Dtype>void ProposalLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top){ CHECK_EQ(bottom[0]->shape(0), 1) << "Only single item batches are supported"; const Dtype* p_bottom_item = bottom[0]->cpu_data(); const Dtype* p_d_anchor_item = bottom[1]->cpu_data(); const Dtype* p_img_info_cpu = bottom[2]->cpu_data(); Dtype* p_roi_item = top[0]->mutable_cpu_data(); Dtype* p_score_item = (top.size() > 1) ? top[1]->mutable_cpu_data() : NULL; vector<int> proposals_shape(2); vector<int> top_shape(2); proposals_shape[0] = 0; proposals_shape[1] = 5; top_shape[0] = 0; top_shape[1] = 5; for (int n = 0; n < bottom[0]->shape(0); ++n) { // bottom shape: (2 x num_anchors) x H x W const int bottom_H = bottom[0]->height(); const int bottom_W = bottom[0]->width(); // input image height & width const Dtype img_H = p_img_info_cpu[0]; const Dtype img_W = p_img_info_cpu[1]; // scale factor for height & width const Dtype scale_H = p_img_info_cpu[2]; const Dtype scale_W = p_img_info_cpu[3]; // minimum box width & height const Dtype min_box_H = min_size_ * scale_H; const Dtype min_box_W = min_size_ * scale_W; // number of all proposals = num_anchors * H * W const int num_proposals = anchors_.shape(0) * bottom_H * bottom_W; // number of top-n proposals before NMS const int pre_nms_topn = std::min(num_proposals, pre_nms_topn_); // number of final RoIs int num_rois = 0; // enumerate all proposals // num_proposals = num_anchors * H * W // (x1, y1, x2, y2, score) for each proposal // NOTE: for bottom, only foreground scores are passed proposals_shape[0] = num_proposals; proposals_.Reshape(proposals_shape); enumerate_proposals_cpu( p_bottom_item + num_proposals, p_d_anchor_item, anchors_.cpu_data(), proposals_.mutable_cpu_data(), anchors_.shape(0), bottom_H, bottom_W, img_H, img_W, min_box_H, min_box_W, feat_stride_); //得到所有proposals，就是python中的 sort_box(proposals_.mutable_cpu_data(), 0, num_proposals - 1, pre_nms_topn_); //讀取數據的時候使用cpu_data //修改數據的時候使用mutable_cpu_data nms_cpu(pre_nms_topn, proposals_.cpu_data(), roi_indices_.mutable_cpu_data(), &num_rois, 0, nms_thresh_, post_nms_topn_); retrieve_rois_cpu( num_rois, n, proposals_.cpu_data(), roi_indices_.cpu_data(), p_roi_item, p_score_item); top_shape[0] += num_rois; } top[0]->Reshape(top_shape); if (top.size() > 1) { top_shape.pop_back(); top[1]->Reshape(top_shape); }}#ifdef CPU_ONLYSTUB_GPU(ProposalLayer);#endifINSTANTIATE_CLASS(ProposalLayer);REGISTER_LAYER_CLASS(Proposal);} // namespace caffe

補充完成fast_rcnn_layers.hpp

namespace caffe {/* ROIPoolingLayer - Region of Interest Pooling Layer*/template <typename Dtype>class ROIPoolingLayer : public Layer<Dtype> {//...};template <typename Dtype>class SmoothL1LossLayer : public LossLayer<Dtype> {//...};template <typename Dtype>class ProposalLayer : public Layer<Dtype> { public: explicit ProposalLayer(const LayerParameter& param) : Layer<Dtype>(param) {} virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { //LOG(FATAL) << "Reshaping happens during the call to forward."; } virtual inline const char* type() const { return "ProposalLayer"; } protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {} virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {} int base_size_; int feat_stride_; int pre_nms_topn_; int post_nms_topn_; Dtype nms_thresh_; int min_size_; Blob<Dtype> anchors_; Blob<Dtype> proposals_; Blob<int> roi_indices_; Blob<int> nms_mask_;};} // namespace caffe#endif // CAFFE_FAST_RCNN_LAYERS_HPP_

同時在實現proposal layer的過程中，有一部很關鍵，就是非極大值抑制（nms），在原來的python版本中見lib/nms文件夾，這里，我們還需要添加nms相關hpp和cpp文件。在caffe/src/caffe/util中添加nms.cpp文件：

#include "caffe/util/nms.hpp"using std::max;using std::min;namespace caffe {template <typename Dtype>staticDtype iou(const Dtype A[], const Dtype B[]){ if (A[0] > B[2] || A[1] > B[3] || A[2] < B[0] || A[3] < B[1]) { return 0; } // overlapped region (= box) const Dtype x1 = std::max(A[0], B[0]); const Dtype y1 = std::max(A[1], B[1]); const Dtype x2 = std::min(A[2], B[2]); const Dtype y2 = std::min(A[3], B[3]); // intersection area const Dtype width = std::max((Dtype)0, x2 - x1 + (Dtype)1); const Dtype height = std::max((Dtype)0, y2 - y1 + (Dtype)1); const Dtype area = width * height; // area of A, B const Dtype A_area = (A[2] - A[0] + (Dtype)1) * (A[3] - A[1] + (Dtype)1); const Dtype B_area = (B[2] - B[0] + (Dtype)1) * (B[3] - B[1] + (Dtype)1); // IoU return area / (A_area + B_area - area);}template static float iou(const float A[], const float B[]);template static double iou(const double A[], const double B[]);template <typename Dtype>void nms_cpu(const int num_boxes, const Dtype boxes[], int index_out[], int* const num_out, const int base_index, const Dtype nms_thresh, const int max_num_out){ int count = 0; std::vector<char> is_dead(num_boxes); for (int i = 0; i < num_boxes; ++i) { is_dead[i] = 0; } for (int i = 0; i < num_boxes; ++i) { if (is_dead[i]) { continue; } index_out[count++] = base_index + i; if (count == max_num_out) { break; } for (int j = i + 1; j < num_boxes; ++j) { if (!is_dead[j] && iou(&boxes[i * 5], &boxes[j * 5]) > nms_thresh) { is_dead[j] = 1; } } } *num_out = count; is_dead.clear();}templatevoid nms_cpu(const int num_boxes, const float boxes[], int index_out[], int* const num_out, const int base_index, const float nms_thresh, const int max_num_out);templatevoid nms_cpu(const int num_boxes, const double boxes[], int index_out[], int* const num_out, const int base_index, const double nms_thresh, const int max_num_out);} // namespace caffe

在caffe/include/caffe/util中添加nms.hpp文件

#ifndef _CAFFE_UTIL_NMS_HPP_#define _CAFFE_UTIL_NMS_HPP_#include <vector>#include "caffe/blob.hpp"namespace caffe {template <typename Dtype>void nms_cpu(const int num_boxes, const Dtype boxes[], int index_out[], int* const num_out, const int base_index, const Dtype nms_thresh, const int max_num_out);template <typename Dtype>void nms_gpu(const int num_boxes, const Dtype boxes_gpu[], Blob<int>* const p_mask, int index_out_cpu[], int* const num_out, const int base_index, const Dtype nms_thresh, const int max_num_out);} // namespace caffe#endif // CAFFE_UTIL_NMS_HPP_

上面這些代碼的注解不是很詳細，具體解讀等有時間了再來補充。

第三步、測試

在caffe中其實是需要些test部分代碼的，這里沒涉及到，上面的每一步，做完后就可以make下，根據提示的消息再來調代碼。這里給出一個用matlab測試例子，是參考的其他博客，本人沒有試過，僅供參考，出處：原文鏈接利用MATLAB調試 Caffe的MATLAB接口可是個好東西，用MATLAB調試可以很方便地看到各種數據的形式以及結果。我們這邊需要調試自己實現的激活函數，shlu_layer。第一步便是編寫一個測試網絡，具體如下：

name: "SHLUTEST"input: "data"input_dim: 1input_dim: 1input_dim: 100input_dim: 100# 測試后向過程必須加，不然回傳的梯度都會是0force_backward: truelayer { name: "shlu1" type: "Shlu" #這里的名字應該跟你之前定義的一致，要注意大小寫 bottom: "data" top: "shlu1" }

編寫的這個網絡實現的便是輸入數據維數1*1*100*100，通過shlu_layer。接下來，打開Matlab

cd caffematlab

編寫代碼如下：

addpath ./matlabmodel = './shlu_test.prototxt';caffe.set_mode_cpu();# 測試gpu代碼時請用GPU模式#caffe.set_mode_gpu();#caffe.set_device(gpu_id);net = caffe.Net(model, 'test');# 生成1*1*100*100維度的正態分布隨機數，并填入'data'層的blobsnet.blobs('data').set_data(randn(net.blobs('data').shape));# 前向過程net.forward_prefilled();# 檢查生成的"res"是否是期望的結果res = net.blobs('shlu1').get_data();# 后向過程# diff為自己設置的梯度值，保證維度一致net.blobs('shlu1').set_diff(diff);net.backward_prefilled();# 檢查生成的"data_diff"是否是期望的結果data_diff = net.blobs('data').get_diff();

分別在cpu模式與gpu模式下都調試一遍，保證沒有錯誤，再進行自己所需要的網絡的整體配置。