[TOC] # 第四课 1. 编写自定义层 2. 训练过程可视化 # 视频 ## 下载 链接: https://pan.baidu.com/s/1NyvieeskIhq8F3kQqgWR7w 密码: qhky ## 在线观看(强烈推荐) 1080 高清无码带字幕 https://youtu.be/fnTlICNbPag # 准备文件 这是手写训练数据集 ![](https://box.kancloud.cn/3caac25ca0a4c16a59ab9e9fb3dc809c_196x165.png) # 自定义层 编写自定义层就不用lmdb啦,研究学习更方便。 # 解析数据集mnist 首先我们先要解析他. http://yann.lecun.com/exdb/mnist/ ![](https://box.kancloud.cn/597dd1c07138469fd78e49393aa30ad9_543x476.png) 这个网站说明了mnist的数据的结构是怎么样的,要如何去读取出来. ![](https://box.kancloud.cn/8b57d3b1ee83c935987ac5c8285ece64_515x498.png) # 训练过程可视化 有了自定义层,那么卷积的结果我们就能够看到了。虽然越往后面的过程我们人类基本是看不懂了。 ![](https://box.kancloud.cn/0f98430d62efa522600b1b585512724d_1145x689.png) # 源代码 ``` #include <cv.h> #include <highgui.h> #include <cc_utils.h> #include <Windows.h> using namespace cv; using namespace std; using namespace cc; #pragma comment(lib, "libcaffe.lib") //解析训练的数据集label vector<unsigned char> parseTrainingSetLabel(const char* file){ vector<unsigned char> out; FILE* f = fopen(file, "rb"); if (f){ int val[2]; fread(val, 1, sizeof(val), f); int num = val[1]; num = ((num & 0xFF) << 24) | ((num & 0xFF00) << 8) | ((num & 0xFF0000) >> 8) | ((num & 0xFF000000) >> 24); out.resize(num); fread(&out[0], 1, out.size(), f); fclose(f); } return out; } vector<Mat> parseTrainingSetImage(const char* file){ vector<Mat> out; FILE* f = fopen(file, "rb"); if (f){ int val[4]; fread(val, 1, sizeof(val), f); int num = val[1]; num = ((num & 0xFF) << 24) | ((num & 0xFF00) << 8) | ((num & 0xFF0000) >> 8) | ((num & 0xFF000000) >> 24); Mat temp(28, 28, CV_8U); for (int i = 0; i < num; ++i){ fread(temp.data, 1, 28 * 28, f); out.emplace_back(temp.clone()); } fclose(f); } return out; } class MnistDataLayer : public DataLayer{ public: SETUP_LAYERFUNC(MnistDataLayer); virtual ~MnistDataLayer(){ stopBatchLoader(); } //要在loadBatch里面实现数据的加载过程 virtual void loadBatch(Blob** top, int numTop){ Blob* image = top[0]; Blob* label = top[1]; float* label_ptr = label->mutable_cpu_data(); for (int i = 0; i < batch_size_; ++i){ int ind = item_inds_[item_cursor_]; Mat img = images_[ind].clone(); int lab = labels_[ind]; //赋值到caffe的blob里面去就好了 *label_ptr++ = lab; //转换类型,然后乘以缩放系数 img.convertTo(img, CV_32F, 0.00390625); image->setDataRGB(i, img); item_cursor_++; if (item_cursor_ == images_.size()){ //归零后继续打乱 item_cursor_ = 0; std::random_shuffle(item_inds_.begin(), item_inds_.end()); } } } virtual void setup(const char* name, const char* type, const char* param_str, int phase, Blob** bottom, int numBottom, Blob** top, int numTop){ this->phase_ = phase; //准备数据 if (this->phase_ == PhaseTrain){ labels_ = parseTrainingSetLabel("train-labels-idx1-ubyte"); images_ = parseTrainingSetImage("train-images-idx3-ubyte"); } else{ labels_ = parseTrainingSetLabel("t10k-labels-idx1-ubyte"); images_ = parseTrainingSetImage("t10k-images-idx3-ubyte"); } item_cursor_ = 0; item_inds_.resize(images_.size()); //10 //0, 1, 2, 3, 4, 5, 6, 7, 8, 10 //3, 2, 0, 5, 1, 6, 7, 9, 10, 8 for (int i = 0; i < item_inds_.size(); ++i) item_inds_[i] = i; //随机打乱数组 std::random_shuffle(item_inds_.begin(), item_inds_.end()); //解析参数并设置输出的shape auto params = parseParamStr(param_str); this->batch_size_ = getParamInt(params, "batch_size"); int width = getParamInt(params, "width"); int height = getParamInt(params, "height"); CV_Assert(this->batch_size_ > 0); top[0]->Reshape(batch_size_, 1, height, width); top[1]->Reshape(batch_size_, 1, 1, 1); __super::setup(name, type, param_str, phase, bottom, numBottom, top, numTop); } virtual void reshape(Blob** bottom, int numBottom, Blob** top, int numTop){ } private: int item_cursor_; vector<int> item_inds_; //条目索引 int batch_size_; vector<Mat> images_; vector<unsigned char> labels_; int phase_; }; class VisualLayer : public AbstractCustomLayer{ public: SETUP_LAYERFUNC(VisualLayer); virtual void setup(const char* name, const char* type, const char* param_str, int phase, Blob** bottom, int numBottom, Blob** top, int numTop){ } virtual void forward(Blob** bottom, int numBottom, Blob** top, int numTop){ Blob* conv2 = bottom[0]; Mat mat(conv2->height(), conv2->width()*conv2->channel(), CV_32F); float* ptr = conv2->mutable_cpu_data(); for (int i = 0; i < conv2->channel(); ++i){ Mat(conv2->height(), conv2->width(), CV_32F, ptr).copyTo(mat(Rect(conv2->width()*i, 0, conv2->width(), conv2->height()))); ptr += conv2->width() * conv2->height(); } resize(mat, mat, mat.size() * 10); imshow("ip2", mat); waitKey(1); } virtual void reshape(Blob** bottom, int numBottom, Blob** top, int numTop){ } }; void main(){ #if 0 //解析训练的label文件 auto train_label = parseTrainingSetLabel("train-labels-idx1-ubyte"); auto train_images = parseTrainingSetImage("train-images-idx3-ubyte"); auto test_label = parseTrainingSetLabel("t10k-labels-idx1-ubyte"); auto test_images = parseTrainingSetImage("t10k-images-idx3-ubyte"); #endif SetCurrentDirectoryA("F:/research/1.CCDL/第四课-手写自定义层-mnist-lenet/data"); installRegister(); INSTALL_LAYER(MnistDataLayer); INSTALL_LAYER(VisualLayer); WPtr<Solver> solver = loadSolverFromPrototxt("lenet_solver.prototxt"); solver->Solve(); } ```