From c522427c8dfac7173ac244cdb665476e3bf63c36 Mon Sep 17 00:00:00 2001 From: Leo Vandriel Date: Thu, 12 Oct 2017 16:29:52 -0700 Subject: [PATCH] fix use of utils --- README.md | 22 +- include/caffe2/util/cmd.h | 7 +- include/caffe2/util/misc.h | 430 ------------------------------ include/caffe2/util/model.h | 14 +- include/caffe2/util/net.h | 7 + include/caffe2/util/preprocess.h | 240 +++++++++++++++++ include/caffe2/util/train.h | 95 +++++++ include/caffe2/zoo/keeper.h | 56 ++-- src/caffe2/binaries/diff.cc | 10 +- src/caffe2/binaries/dream.cc | 82 +++--- src/caffe2/binaries/imagenet.cc | 39 ++- src/caffe2/binaries/inspect.cc | 35 ++- src/caffe2/binaries/mnist.cc | 63 +++-- src/caffe2/binaries/pretrained.cc | 10 +- src/caffe2/binaries/retrain.cc | 264 ------------------ src/caffe2/binaries/rnn.cc | 129 ++++----- src/caffe2/binaries/train.cc | 269 ++++++++----------- src/caffe2/util/model.cc | 215 ++++++++++++++- src/caffe2/util/net.cc | 45 ++-- 19 files changed, 936 insertions(+), 1096 deletions(-) delete mode 100644 include/caffe2/util/misc.h create mode 100644 include/caffe2/util/preprocess.h create mode 100644 include/caffe2/util/train.h delete mode 100644 src/caffe2/binaries/retrain.cc diff --git a/README.md b/README.md index 69872046..997b7a76 100644 --- a/README.md +++ b/README.md @@ -130,17 +130,17 @@ This tutorial is transcribed in [rnn.cc](src/caffe2/binaries/rnn.cc). It takes t ./bin/rnn -In contrast to the tutorial, this script terminates after 10K iterations. To get more, use `--train_runs`: +In contrast to the tutorial, this script terminates after 10K iterations. To get more, use `--train-runs`: - ./bin/run --train_runs 100000 + ./bin/run --train-runs 100000 To get better results (loss < 1), expand the hidden layer: - ./bin/rnn --train_runs 100000 --batch_size 32 --hidden_size 512 --seq_length 32 + ./bin/rnn --train-runs 100000 --batch-size 32 --hidden-size 512 --seq-length 32 The file `res/dickens.txt` contains a larger volume of text. Because the writing is a bit more recent, it's more challenging to generate convincing results. Also, single newlines are stripped to allow for more creativity. - ./bin/rnn --train_runs 100000 --batch_size 32 --hidden_size 768 --seq_length 32 --train_data res/dickens.txt + ./bin/rnn --train-runs 100000 --batch-size 32 --hidden-size 768 --seq-length 32 --train-data res/dickens.txt After 200K runs, the loss has not dropped below 36, in contrast to the shakespeare text. Perhaps this requires an additional hidden layer in the LSTM model. @@ -150,7 +150,7 @@ Much of the progress in image recognition is published after the yearly [ImageNe To classify the content of an image, run: - ./bin/imagenet --model --image_file + ./bin/imagenet --model --image-file Where the model name is one of the following: @@ -180,21 +180,21 @@ The article [DeCAF: A Deep Convolutional Activation Feature for Generic Visual R First divide all images in subfolders with the label a folder name. Then to retrain the final layer of GoogleNet: - ./bin/retrain --model googlenet --folder --layer pool5/7x7_s1 + ./bin/train --model googlenet --folder --layer pool5/7x7_s1 The script starts out by collecting all images and running them through the pre-trained part of the model. This allows for very fast training on the pre-processed image data. If you have more (GPU) power at your disposal retrain VGG16's final 2 layers: - ./bin/retrain --model vgg16 --folder --layer fc6 + ./bin/train --model vgg16 --folder --layer fc6 Some models, like SqueezeNet require reshaping of their output to N x D tensor: - ./bin/retrain --model squeezenet --folder --layer fire9/concat --reshape_output + ./bin/train --model squeezenet --folder --layer fire9/concat --reshape-output You can also provide your own pre-trained model. Specify the location of the init and predict `.pb` file including a `%` character: - ./bin/retrain --model res/googlenet_%_net.pb --folder --layer pool5/7x7_s1 + ./bin/train --model res/googlenet_%_net.pb --folder --layer pool5/7x7_s1 See also: @@ -203,7 +203,7 @@ See also: ## Training from scratch -To fully train an existing image classification model from scratch, run: +To fully train an existing image classification model from scratch, run without the `--layer` option: ./bin/train --model --folder @@ -213,7 +213,7 @@ Add `--display` for training visualization. Some models, like SqueezeNet require reshaping of their output to N x D tensor: - ./bin/train --model squeezenet --folder --reshape_output + ./bin/train --model squeezenet --folder --reshape-output ## Deep Dream diff --git a/include/caffe2/util/cmd.h b/include/caffe2/util/cmd.h index af0c5ebe..2e1807f4 100644 --- a/include/caffe2/util/cmd.h +++ b/include/caffe2/util/cmd.h @@ -22,7 +22,6 @@ bool cmd_setup_cuda() { option.set_device_type(CUDA); #ifdef WITH_CUDA new CUDAContext(option); - std::cout << std::endl << "using CUDA" << std::endl; return true; #else return false; @@ -50,11 +49,13 @@ bool cmd_init(const std::string title) { return false; } - if (FLAGS_device != "cpu") cmd_setup_cuda(); + auto cuda = (FLAGS_device != "cpu" && cmd_setup_cuda()); std::cout << "optimizer: " << FLAGS_optimizer << std::endl; std::cout << "device: " << FLAGS_device << std::endl; - std::cout << "dump_model: " << (FLAGS_dump_model ? "true" : "false") + std::cout << "using cuda: " << (cuda ? "true" : "false") << std::endl; + ; + std::cout << "dump-model: " << (FLAGS_dump_model ? "true" : "false") << std::endl; return true; diff --git a/include/caffe2/util/misc.h b/include/caffe2/util/misc.h deleted file mode 100644 index ad79f17e..00000000 --- a/include/caffe2/util/misc.h +++ /dev/null @@ -1,430 +0,0 @@ -#ifndef MISC_H -#define MISC_H - -#include -#include -#include - -#include "caffe2/util/blob.h" -#include "caffe2/util/model.h" -#include "caffe2/util/tensor.h" - -#include -#include - -namespace caffe2 { - -enum { kRunTrain = 0, kRunValidate = 1, kRunTest = 2, kRunNum = 3 }; - -static std::map name_for_run({ - {kRunTrain, "train"}, {kRunValidate, "validate"}, {kRunTest, "test"}, -}); - -static std::map percentage_for_run({ - {kRunTest, 10}, {kRunValidate, 20}, {kRunTrain, 70}, -}); - -std::string filename_to_key(const std::string &filename) { - // return filename; - return std::to_string(std::hash{}(filename)) + "_" + filename; -} - -void load_labels(const std::string &folder, const std::string &path_prefix, - std::vector &class_labels, - std::vector> &image_files) { - std::cout << "load class labels.." << std::endl; - auto classes_text_path = path_prefix + "classes.txt"; - ; - std::ifstream infile(classes_text_path); - std::string line; - while (std::getline(infile, line)) { - if (line.size()) { - class_labels.push_back(line); - // std::cout << '.' << line << '.' << std::endl; - } - } - - std::cout << "load image folder.." << std::endl; - auto directory = opendir(folder.c_str()); - CAFFE_ENFORCE(directory, "no image folder " + folder); - if (directory) { - struct stat s; - struct dirent *entry; - while ((entry = readdir(directory))) { - auto class_name = entry->d_name; - auto class_path = folder + '/' + class_name; - if (class_name[0] != '.' && class_name[0] != '_' && - !stat(class_path.c_str(), &s) && (s.st_mode & S_IFDIR)) { - auto subdir = opendir(class_path.c_str()); - if (subdir) { - auto class_index = - find(class_labels.begin(), class_labels.end(), class_name) - - class_labels.begin(); - if (class_index == class_labels.size()) { - class_labels.push_back(class_name); - } - while ((entry = readdir(subdir))) { - auto image_file = entry->d_name; - auto image_path = class_path + '/' + image_file; - if (image_file[0] != '.' && !stat(image_path.c_str(), &s) && - (s.st_mode & S_IFREG)) { - // std::cout << class_name << ' ' << image_path << std::endl; - image_files.push_back({image_path, class_index}); - } - } - closedir(subdir); - } - } - } - closedir(directory); - } - CAFFE_ENFORCE(image_files.size(), "no images found in " + folder); - std::random_shuffle(image_files.begin(), image_files.end()); - std::cout << class_labels.size() << " labels found" << std::endl; - std::cout << image_files.size() << " images found" << std::endl; - - std::cout << "write class labels.." << std::endl; - std::ofstream class_file(classes_text_path); - if (class_file.is_open()) { - for (auto &label : class_labels) { - class_file << label << std::endl; - } - class_file.close(); - } - auto classes_header_path = path_prefix + "classes.h"; - std::ofstream labels_file(classes_header_path.c_str()); - if (labels_file.is_open()) { - labels_file << "const char * retrain_classes[] {"; - bool first = true; - for (auto &label : class_labels) { - if (first) { - first = false; - } else { - labels_file << ','; - } - labels_file << std::endl << '"' << label << '"'; - } - labels_file << std::endl << "};" << std::endl; - labels_file.close(); - } -} - -void write_batch(Workspace &workspace, NetBase *predict_net, - std::string &input_name, std::string &output_name, - std::vector> &batch_files, - std::unique_ptr *database, int size_to_fit) { - std::unique_ptr transaction[kRunNum]; - for (int i = 0; i < kRunNum; i++) { - transaction[i] = database[i]->NewTransaction(); - } - - std::vector filenames; - for (auto &pair : batch_files) { - filenames.push_back(pair.first); - } - std::vector indices; - TensorCPU input; - TensorUtil(input).ReadImages(filenames, size_to_fit, indices); - TensorCPU output; - if (predict_net) { - BlobUtil(*workspace.GetBlob(input_name)).Set(input); - predict_net->Run(); - auto tensor = BlobUtil(*workspace.GetBlob(output_name)).Get(); - output.ResizeLike(tensor); - output.ShareData(tensor); - } else { - output.ResizeLike(input); - output.ShareData(input); - } - - TensorProtos protos; - TensorProto *data = protos.add_protos(); - TensorProto *label = protos.add_protos(); - data->set_data_type(TensorProto::FLOAT); - label->set_data_type(TensorProto::INT32); - label->add_int32_data(0); - TensorSerializer serializer; - std::string value; - std::vector dims(output.dims().begin() + 1, output.dims().end()); - auto size = output.size() / output.dim(0); - auto output_data = output.data(); - for (auto i : indices) { - auto single = TensorCPU( - dims, std::vector(output_data, output_data + size), NULL); - output_data += size; - data->Clear(); - serializer.Serialize(single, "", data, 0, kDefaultChunkSize); - label->set_int32_data(0, batch_files[i].second); - protos.SerializeToString(&value); - int percentage = 0, p = (int)(rand() * 100.0 / RAND_MAX); - auto key = filename_to_key(batch_files[i].first); - for (auto pair : percentage_for_run) { - percentage += pair.second; - if (p < percentage) { - transaction[pair.first]->Put(key, value); - break; - } - } - } - - for (int i = 0; i < kRunNum; i++) { - transaction[i]->Commit(); - } -} - -void pre_process(const std::vector> &image_files, - const std::string *db_paths, NetDef &init_model, - NetDef &predict_model, const std::string &db_type, - int batch_size, int size_to_fit) { - std::cout << "store partial prediction.." << std::endl; - std::unique_ptr database[kRunNum]; - for (int i = 0; i < kRunNum; i++) { - database[i] = db::CreateDB(db_type, db_paths[i], db::WRITE); - } - auto image_count = 0; - Workspace workspace; - auto init_net = CreateNet(init_model, &workspace); - init_net->Run(); - auto predict_net = predict_model.external_input_size() - ? CreateNet(predict_model, &workspace) - : NULL; - auto input_name = predict_model.external_input_size() - ? predict_model.external_input(0) - : ""; - auto output_name = predict_model.external_output_size() - ? predict_model.external_output(0) - : ""; - std::vector> batch_files; - for (auto &pair : image_files) { - auto &filename = pair.first; - auto class_index = pair.second; - image_count++; - auto in_db = false; - auto key = filename_to_key(filename); - for (int i = 0; i < kRunNum && !in_db; i++) { - auto cursor = database[i]->NewCursor(); - cursor->Seek(key); - in_db |= (cursor->Valid() && cursor->key() == key); - } - if (!in_db) { - batch_files.push_back({filename, class_index}); - } - if (image_count % 10 == 0) { - std::cerr << '\r' << std::string(40, ' ') << '\r' << "pre-processing.. " - << image_count << '/' << image_files.size() << " " - << std::setprecision(3) - << ((float)100 * image_count / image_files.size()) << "%" - << std::flush; - } - if (batch_files.size() == batch_size) { - write_batch(workspace, predict_net ? predict_net.get() : NULL, input_name, - output_name, batch_files, database, size_to_fit); - batch_files.clear(); - } - } - if (batch_files.size() > 0) { - write_batch(workspace, predict_net ? predict_net.get() : NULL, input_name, - output_name, batch_files, database, size_to_fit); - } - for (int i = 0; i < kRunNum; i++) { - CAFFE_ENFORCE(database[i]->NewCursor()->Valid(), - "database " + name_for_run[i] + " is empty"); - } - std::cerr << '\r' << std::string(80, ' ') << '\r' << image_files.size() - << " images processed" << std::endl; -} - -void dump_database(const std::string db_path, const std::string &db_type) { - std::cout << "dumping database.." << std::endl; - std::unique_ptr database = db::CreateDB(db_type, db_path, db::READ); - - for (auto cursor = database->NewCursor(); cursor->Valid(); cursor->Next()) { - auto key = cursor->key().substr(0, 48); - auto value = cursor->value(); - TensorProtos protos; - protos.ParseFromString(value); - auto tensor_proto = protos.protos(0); - auto label_proto = protos.protos(1); - TensorDeserializer deserializer; - TensorCPU tensor; - int label = label_proto.int32_data(0); - deserializer.Deserialize(tensor_proto, &tensor); - auto dims = tensor.dims(); - dims.insert(dims.begin(), 1); - tensor.Resize(dims); - std::cout << key << " " - << (value.size() > 1000 ? value.size() / 1000 : value.size()) - << (value.size() > 1000 ? "K" : "B") << " (" << tensor.dims() - << ") " << label << std::endl; - TensorUtil(tensor).ShowImage("inspect", 0, 1.0, 128); - } -} - -void pre_process(const std::vector> &image_files, - const std::string *db_paths, const std::string &db_type, - int size_to_fit) { - NetDef none; - pre_process(image_files, db_paths, none, none, db_type, 64, size_to_fit); -} - -void split_model(NetDef &base_init_model, NetDef &base_predict_model, - const std::string &layer, NetDef &first_init_model, - NetDef &first_predict_model, NetDef &second_init_model, - NetDef &second_predict_model, bool force_cpu, - bool inclusive = true) { - std::cout << "split model.." << std::endl; - std::set static_inputs = - NetUtil(base_predict_model).CollectLayers(layer); - - // copy operators - for (const auto &op : base_init_model.op()) { - auto is_first = (static_inputs.find(op.output(0)) != static_inputs.end()); - auto new_op = (is_first ? first_init_model : second_init_model).add_op(); - new_op->CopyFrom(op); - } - for (const auto &op : base_predict_model.op()) { - auto is_first = (static_inputs.find(op.output(0)) != static_inputs.end() && - (inclusive || op.input(0) != op.output(0))); - auto new_op = - (is_first ? first_predict_model : second_predict_model).add_op(); - new_op->CopyFrom(op); - if (!force_cpu) { - new_op->set_engine("CUDNN"); // TODO: not here - } - } - - // copy externals - if (first_predict_model.op().size()) { - // first_predict_model.add_external_input(base_predict_model.external_input(0)); - } - if (second_predict_model.op().size()) { - // second_predict_model.add_external_input(layer); - } - for (const auto &output : base_init_model.external_output()) { - auto is_first = (static_inputs.find(output) != static_inputs.end()); - if (is_first) { - first_init_model.add_external_output(output); - } else { - second_init_model.add_external_output(output); - } - } - for (const auto &input : base_predict_model.external_input()) { - auto is_first = (static_inputs.find(input) != static_inputs.end()); - if (is_first) { - first_predict_model.add_external_input(input); - } else { - second_predict_model.add_external_input(input); - } - } - if (first_predict_model.op().size()) { - first_predict_model.add_external_output(layer); - } - if (second_predict_model.op().size()) { - second_predict_model.add_external_output( - base_predict_model.external_output(0)); - } - - if (base_init_model.has_name()) { - if (!first_init_model.has_name()) { - first_init_model.set_name(base_init_model.name() + "_first"); - } - if (!second_init_model.has_name()) { - second_init_model.set_name(base_init_model.name() + "_second"); - } - } - if (base_predict_model.has_name()) { - if (!first_predict_model.has_name()) { - first_predict_model.set_name(base_predict_model.name() + "_first"); - } - if (!second_predict_model.has_name()) { - second_predict_model.set_name(base_predict_model.name() + "_second"); - } - } -} - -void set_trainable(OperatorDef &op, bool train) { - if (op.type() == "Dropout") { - for (auto &arg : *op.mutable_arg()) { - if (arg.name() == "is_test") { - arg.set_i(!train); - } - } - } -} - -void copy_train_model(NetDef &base_init_model, NetDef &base_predict_model, - const std::string &layer, int out_size, - NetDef &train_init_model, NetDef &train_predict_model) { - std::string last_w, last_b; - for (const auto &op : base_predict_model.op()) { - auto new_op = train_predict_model.add_op(); - new_op->CopyFrom(op); - set_trainable(*new_op, true); - if (op.type() == "FC") { - last_w = op.input(1); - last_b = op.input(2); - } - } - NetUtil(train_predict_model).SetRenameInplace(); - for (const auto &op : base_init_model.op()) { - auto &output = op.output(0); - auto init_op = train_init_model.add_op(); - bool uniform = (output.find("_b") != std::string::npos); - init_op->set_type(uniform ? "ConstantFill" : "XavierFill"); - for (const auto &arg : op.arg()) { - if (arg.name() == "shape") { - auto init_arg = init_op->add_arg(); - init_arg->set_name("shape"); - if (output == last_w) { - init_arg->add_ints(out_size); - init_arg->add_ints(arg.ints(1)); - } else if (output == last_b) { - init_arg->add_ints(out_size); - } else { - init_arg->CopyFrom(arg); - } - } - } - init_op->add_output(output); - } - std::set existing_inputs; - existing_inputs.insert(train_predict_model.external_input().begin(), - train_predict_model.external_input().end()); - for (const auto &op : train_predict_model.op()) { - for (auto &output : op.output()) { - existing_inputs.insert(output); - } - } - for (const auto &input : base_predict_model.external_input()) { - if (existing_inputs.find(input) == existing_inputs.end()) { - train_predict_model.add_external_input(input); - } - } - for (const auto &output : base_predict_model.external_output()) { - train_predict_model.add_external_output(output); - } - // auto op = train_init_model.add_op(); - // op->set_type("ConstantFill"); - // auto arg = op->add_arg(); - // arg->set_name("shape"); - // arg->add_ints(1); - // op->add_output(layer); -} - -void copy_test_model(NetDef &base_predict_model, NetDef &test_predict_model) { - for (const auto &op : base_predict_model.op()) { - auto new_op = test_predict_model.add_op(); - new_op->CopyFrom(op); - set_trainable(*new_op, false); - } - for (const auto &input : base_predict_model.external_input()) { - test_predict_model.add_external_input(input); - } - for (const auto &output : base_predict_model.external_output()) { - test_predict_model.add_external_output(output); - } -} - -} // namespace caffe2 - -#endif // MISC_H diff --git a/include/caffe2/util/model.h b/include/caffe2/util/model.h index 59304cf4..8fd2c3bf 100644 --- a/include/caffe2/util/model.h +++ b/include/caffe2/util/model.h @@ -16,7 +16,6 @@ class ModelUtil { } ModelUtil(NetUtil &init, NetUtil &predict) : init(init), predict(predict) {} - void SetName(const std::string &name); void AddDatabaseOps(const std::string &name, const std::string &data, const std::string &db, const std::string &db_type, int batch_size); @@ -43,6 +42,19 @@ class ModelUtil { std::vector Params() { return predict.CollectParams(); } + void Split(const std::string &layer, ModelUtil &firstModel, + ModelUtil &secondModel, bool force_cpu, bool inclusive = true); + void CopyTrain(const std::string &layer, int out_size, + ModelUtil &train) const; + void CopyTest(ModelUtil &test) const; + void CopyDeploy(ModelUtil &deploy, Workspace &workspace) const; + + size_t Write(const std::string &path_prefix) const; + size_t Read(const std::string &path_prefix); + void SetName(const std::string &name); + void SetDeviceCUDA(); + std::string Short(); + public: NetUtil init; NetUtil predict; diff --git a/include/caffe2/util/net.h b/include/caffe2/util/net.h index 27fd9b48..e1b88cf9 100644 --- a/include/caffe2/util/net.h +++ b/include/caffe2/util/net.h @@ -166,6 +166,9 @@ class NetUtil { void AddInput(const std::string input); void AddOutput(const std::string output); + const std::string& Input(int i) { return net.external_input(i); } + const std::string& Output(int i) { return net.external_output(i); } + void SetName(const std::string name); void SetType(const std::string type); @@ -193,6 +196,10 @@ class NetUtil { std::string Proto(); std::string Short(); void Print(); + size_t Write(const std::string& path) const; + size_t WriteText(const std::string& path) const; + size_t Read(const std::string& path); + void SetDeviceCUDA(); public: diff --git a/include/caffe2/util/preprocess.h b/include/caffe2/util/preprocess.h new file mode 100644 index 00000000..af6f5782 --- /dev/null +++ b/include/caffe2/util/preprocess.h @@ -0,0 +1,240 @@ +#ifndef PREPROCESS_H +#define PREPROCESS_H + +#include +#include +#include + +#include "caffe2/util/blob.h" +#include "caffe2/util/model.h" +#include "caffe2/util/net.h" +#include "caffe2/util/tensor.h" +#include "caffe2/util/train.h" + +#include +#include + +namespace caffe2 { + +static std::map percentage_for_run({ + {kRunTest, 10}, {kRunValidate, 20}, {kRunTrain, 70}, +}); + +std::string filename_to_key(const std::string &filename) { + // return filename; + return std::to_string(std::hash{}(filename)) + "_" + filename; +} + +void load_labels(const std::string &folder, const std::string &path_prefix, + std::vector &class_labels, + std::vector> &image_files) { + auto classes_text_path = path_prefix + "classes.txt"; + ; + std::ifstream infile(classes_text_path); + std::string line; + while (std::getline(infile, line)) { + if (line.size()) { + class_labels.push_back(line); + } + } + + auto directory = opendir(folder.c_str()); + CAFFE_ENFORCE(directory, "no image folder " + folder); + if (directory) { + struct stat s; + struct dirent *entry; + while ((entry = readdir(directory))) { + auto class_name = entry->d_name; + auto class_path = folder + '/' + class_name; + if (class_name[0] != '.' && class_name[0] != '_' && + !stat(class_path.c_str(), &s) && (s.st_mode & S_IFDIR)) { + auto subdir = opendir(class_path.c_str()); + if (subdir) { + auto class_index = + find(class_labels.begin(), class_labels.end(), class_name) - + class_labels.begin(); + if (class_index == class_labels.size()) { + class_labels.push_back(class_name); + } + while ((entry = readdir(subdir))) { + auto image_file = entry->d_name; + auto image_path = class_path + '/' + image_file; + if (image_file[0] != '.' && !stat(image_path.c_str(), &s) && + (s.st_mode & S_IFREG)) { + image_files.push_back({image_path, class_index}); + } + } + closedir(subdir); + } + } + } + closedir(directory); + } + CAFFE_ENFORCE(image_files.size(), "no images found in " + folder); + std::random_shuffle(image_files.begin(), image_files.end()); + + std::ofstream class_file(classes_text_path); + if (class_file.is_open()) { + for (auto &label : class_labels) { + class_file << label << std::endl; + } + class_file.close(); + } + auto classes_header_path = path_prefix + "classes.h"; + std::ofstream labels_file(classes_header_path.c_str()); + if (labels_file.is_open()) { + labels_file << "const char * retrain_classes[] {"; + bool first = true; + for (auto &label : class_labels) { + if (first) { + first = false; + } else { + labels_file << ','; + } + labels_file << std::endl << '"' << label << '"'; + } + labels_file << std::endl << "};" << std::endl; + labels_file.close(); + } +} + +int write_batch(Workspace &workspace, NetBase *predict_net, + std::string &input_name, std::string &output_name, + std::vector> &batch_files, + std::unique_ptr *database, int size_to_fit) { + std::unique_ptr transaction[kRunNum]; + for (int i = 0; i < kRunNum; i++) { + transaction[i] = database[i]->NewTransaction(); + } + + std::vector filenames; + for (auto &pair : batch_files) { + filenames.push_back(pair.first); + } + std::vector indices; + TensorCPU input; + TensorUtil(input).ReadImages(filenames, size_to_fit, indices); + TensorCPU output; + if (predict_net && input.size() > 0) { + BlobUtil(*workspace.GetBlob(input_name)).Set(input); + predict_net->Run(); + auto tensor = BlobUtil(*workspace.GetBlob(output_name)).Get(); + output.ResizeLike(tensor); + output.ShareData(tensor); + } else { + output.ResizeLike(input); + output.ShareData(input); + } + + TensorProtos protos; + TensorProto *data = protos.add_protos(); + TensorProto *label = protos.add_protos(); + data->set_data_type(TensorProto::FLOAT); + label->set_data_type(TensorProto::INT32); + label->add_int32_data(0); + TensorSerializer serializer; + std::string value; + std::vector dims(output.dims().begin() + 1, output.dims().end()); + auto size = output.dim(0) ? output.size() / output.dim(0) : 0; + auto output_data = output.data(); + for (auto i : indices) { + auto single = TensorCPU( + dims, std::vector(output_data, output_data + size), NULL); + output_data += size; + data->Clear(); + serializer.Serialize(single, "", data, 0, kDefaultChunkSize); + label->set_int32_data(0, batch_files[i].second); + protos.SerializeToString(&value); + int percentage = 0, p = (int)(rand() * 100.0 / RAND_MAX); + auto key = filename_to_key(batch_files[i].first); + for (auto pair : percentage_for_run) { + percentage += pair.second; + if (p < percentage) { + transaction[pair.first]->Put(key, value); + break; + } + } + } + + for (int i = 0; i < kRunNum; i++) { + transaction[i]->Commit(); + } + + return indices.size(); +} + +int preprocess(const std::vector> &image_files, + const std::string *db_paths, ModelUtil &model, + const std::string &db_type, int batch_size, int size_to_fit) { + std::unique_ptr database[kRunNum]; + for (int i = 0; i < kRunNum; i++) { + database[i] = db::CreateDB(db_type, db_paths[i], db::WRITE); + } + auto image_count = 0; + auto sample_count = 0; + Workspace workspace; + auto init_net = CreateNet(model.init.net, &workspace); + init_net->Run(); + auto predict_net = model.predict.net.external_input_size() + ? CreateNet(model.predict.net, &workspace) + : NULL; + auto input_name = model.predict.net.external_input_size() + ? model.predict.net.external_input(0) + : ""; + auto output_name = model.predict.net.external_output_size() + ? model.predict.net.external_output(0) + : ""; + std::vector> batch_files; + for (auto &pair : image_files) { + auto &filename = pair.first; + auto class_index = pair.second; + image_count++; + auto in_db = false; + auto key = filename_to_key(filename); + for (int i = 0; i < kRunNum && !in_db; i++) { + auto cursor = database[i]->NewCursor(); + cursor->Seek(key); + in_db |= (cursor->Valid() && cursor->key() == key); + } + if (!in_db) { + batch_files.push_back({filename, class_index}); + } + if (image_count % 10 == 0) { + std::cerr << '\r' << std::string(40, ' ') << '\r' << "pre-processing.. " + << image_count << '/' << image_files.size() << " " + << std::setprecision(3) + << ((float)100 * image_count / image_files.size()) << "%" + << std::flush; + } + if (batch_files.size() == batch_size) { + sample_count += write_batch( + workspace, predict_net ? predict_net.get() : NULL, input_name, + output_name, batch_files, database, size_to_fit); + batch_files.clear(); + } + } + if (batch_files.size() > 0) { + sample_count += write_batch( + workspace, predict_net ? predict_net.get() : NULL, input_name, + output_name, batch_files, database, size_to_fit); + } + for (int i = 0; i < kRunNum; i++) { + CAFFE_ENFORCE(database[i]->NewCursor()->Valid(), + "database " + name_for_run[i] + " is empty"); + } + std::cerr << '\r' << std::string(80, ' ') << '\r'; + + return sample_count; +} + +void preprocess(const std::vector> &image_files, + const std::string *db_paths, const std::string &db_type, + int size_to_fit) { + NetDef n; + ModelUtil none(n, n); + preprocess(image_files, db_paths, none, db_type, 64, size_to_fit); +} + +} // namespace caffe2 + +#endif // PREPROCESS_H diff --git a/include/caffe2/util/train.h b/include/caffe2/util/train.h new file mode 100644 index 00000000..a475a836 --- /dev/null +++ b/include/caffe2/util/train.h @@ -0,0 +1,95 @@ +#ifndef TRAIN_H +#define TRAIN_H + +#include +#include +#include + +#include "caffe2/util/blob.h" +#include "caffe2/util/model.h" +#include "caffe2/util/net.h" +#include "caffe2/util/tensor.h" + +namespace caffe2 { + +enum { kRunTrain = 0, kRunValidate = 1, kRunTest = 2, kRunNum = 3 }; + +static std::map name_for_run({ + {kRunTrain, "train"}, {kRunValidate, "validate"}, {kRunTest, "test"}, +}); + +void run_trainer(int epochs, ModelUtil &train, ModelUtil &validate, + Workspace &workspace, clock_t &train_time, + clock_t &validate_time) { + CreateNet(train.init.net, &workspace)->Run(); + CreateNet(validate.init.net, &workspace)->Run(); + + auto train_net = CreateNet(train.predict.net, &workspace); + auto validate_net = CreateNet(validate.predict.net, &workspace); + + auto last_time = clock(); + auto last_i = 0; + auto sum_accuracy = 0.f, sum_loss = 0.f; + + for (auto i = 1; i <= epochs; i++) { + train_time -= clock(); + train_net->Run(); + train_time += clock(); + + sum_accuracy += + BlobUtil(*workspace.GetBlob("accuracy")).Get().data()[0]; + sum_loss += BlobUtil(*workspace.GetBlob("loss")).Get().data()[0]; + + auto steps_time = (float)(clock() - last_time) / CLOCKS_PER_SEC; + if (steps_time > 5 || i >= epochs) { + auto iter = BlobUtil(*workspace.GetBlob("iter")).Get().data()[0]; + auto lr = BlobUtil(*workspace.GetBlob("lr")).Get().data()[0]; + auto train_loss = sum_loss / (i - last_i), + train_accuracy = sum_accuracy / (i - last_i); + sum_loss = 0; + sum_accuracy = 0; + validate_time -= clock(); + validate_net->Run(); + validate_time += clock(); + auto validate_accuracy = + BlobUtil(*workspace.GetBlob("accuracy")).Get().data()[0]; + std::cout << "step: " << iter << " rate: " << lr + << " loss: " << train_loss << " accuracy: " << train_accuracy + << " | " << validate_accuracy + << " step_time: " << std::setprecision(3) + << steps_time / (i - last_i) << "s" << std::endl; + last_i = i; + last_time = clock(); + } + } +} + +void run_tester(int epochs, ModelUtil &test, Workspace &workspace, + clock_t &test_time) { + CreateNet(test.init.net, &workspace)->Run(); + auto test_net = CreateNet(test.predict.net, &workspace); + + auto sum_accuracy = 0.f, sum_loss = 0.f; + auto test_step = 10; + for (auto i = 1; i <= epochs; i++) { + test_time -= clock(); + test_net->Run(); + test_time += clock(); + + sum_accuracy += + BlobUtil(*workspace.GetBlob("accuracy")).Get().data()[0]; + sum_loss += BlobUtil(*workspace.GetBlob("loss")).Get().data()[0]; + + if (i % test_step == 0) { + auto loss = sum_loss / test_step, accuracy = sum_accuracy / test_step; + sum_loss = 0; + sum_accuracy = 0; + std::cout << "step: " << i << " loss: " << loss + << " accuracy: " << accuracy << std::endl; + } + } +} + +} // namespace caffe2 + +#endif // TRAIN_H diff --git a/include/caffe2/zoo/keeper.h b/include/caffe2/zoo/keeper.h index 4bbaf890..b221f8a1 100644 --- a/include/caffe2/zoo/keeper.h +++ b/include/caffe2/zoo/keeper.h @@ -119,7 +119,7 @@ class Keeper { << '\r'; return result == CURLE_OK; #else - std::cout << "model download not supported, install cURL" << std::endl; + CAFFE_THROW("model download not supported, install cURL"); return false; #endif } @@ -145,50 +145,50 @@ class Keeper { return true; } - void addTrainedModel(NetDef &init_model, NetDef &predict_model) { - auto at = name_.find("%"); - if (at == std::string::npos) { - CAFFE_ENFORCE(ensureModel(), "model ", name_, " not found"); - std::string init_filename = "res/" + name_ + "_init_net.pb"; - std::string predict_filename = "res/" + name_ + "_predict_net.pb"; - CAFFE_ENFORCE(ReadProtoFromFile(init_filename.c_str(), &init_model)); - CAFFE_ENFORCE(ReadProtoFromFile(predict_filename.c_str(), &predict_model)); - } else { - std::string init_filename = name_.substr(0, at) + "init" + name_.substr(at + 1); - std::string predict_filename = name_.substr(0, at) + "predict" + name_.substr(at + 1); - CAFFE_ENFORCE(ReadProtoFromFile(init_filename.c_str(), &init_model)); - CAFFE_ENFORCE(ReadProtoFromFile(predict_filename.c_str(), &predict_model)); - } + size_t addTrainedModel(ModelUtil &model) { + CAFFE_ENFORCE(ensureModel(), "model ", name_, " not found"); + return model.Read("res/" + name_); } - void addUntrainedModel(NetDef &init_model, NetDef &predict_model) { + size_t addUntrainedModel(ModelUtil &model) { if (name_ == "alexnet") { - AlexNetModel(init_model, predict_model).Add(); + AlexNetModel(model.init.net, model.predict.net).Add(); } else if (name_ == "googlenet") { - GoogleNetModel(init_model, predict_model).Add(); + GoogleNetModel(model.init.net, model.predict.net).Add(); } else if (name_ == "squeezenet") { - SqueezeNetModel(init_model, predict_model).Add(); + SqueezeNetModel(model.init.net, model.predict.net).Add(); } else if (name_ == "vgg16") { - VGGModel(init_model, predict_model).Add(16); + VGGModel(model.init.net, model.predict.net).Add(16); } else if (name_ == "vgg19") { - VGGModel(init_model, predict_model).Add(19); + VGGModel(model.init.net, model.predict.net).Add(19); } else if (name_ == "resnet50") { - ResNetModel(init_model, predict_model).Add(50); + ResNetModel(model.init.net, model.predict.net).Add(50); } else if (name_ == "resnet101") { - ResNetModel(init_model, predict_model).Add(101); + ResNetModel(model.init.net, model.predict.net).Add(101); } else if (name_ == "resnet152") { - ResNetModel(init_model, predict_model).Add(152); + ResNetModel(model.init.net, model.predict.net).Add(152); } else { CAFFE_THROW("model " + name_ + " not implemented"); } + return 0; } - void AddModel(NetDef &init_model, NetDef &predict_model, bool trained) { - if (trained) { - addTrainedModel(init_model, predict_model); + size_t AddModel(ModelUtil &model, bool trained) { + auto at = name_.find("%"); + size_t size = 0; + if (at == std::string::npos) { + if (trained) { + size = addTrainedModel(model); + } else { + size = addUntrainedModel(model); + } } else { - addUntrainedModel(init_model, predict_model); + size += + model.init.Read(name_.substr(0, at) + "init" + name_.substr(at + 1)); + size += model.predict.Read(name_.substr(0, at) + "predict" + + name_.substr(at + 1)); } + return size; } protected: diff --git a/src/caffe2/binaries/diff.cc b/src/caffe2/binaries/diff.cc index 3ca0c884..9fb86d90 100644 --- a/src/caffe2/binaries/diff.cc +++ b/src/caffe2/binaries/diff.cc @@ -15,19 +15,19 @@ namespace caffe2 { void run() { NetDef init_model, predict_model; + ModelUtil model(init_model, predict_model); if (FLAGS_code && !FLAGS_file) { - Keeper(FLAGS_model).AddModel(init_model, predict_model, false); + Keeper(FLAGS_model).AddModel(model, false); } else if (!FLAGS_code && FLAGS_file) { - Keeper(FLAGS_model).AddModel(init_model, predict_model, true); - NetUtil(init_model).SetFillToTrain(); + Keeper(FLAGS_model).AddModel(model, true); + model.init.SetFillToTrain(); } else { std::cerr << "set either --code or --file" << std::endl; } if (FLAGS_short) { - std::cout << NetUtil(predict_model).Short(); - std::cout << NetUtil(init_model).Short(); + std::cout << model.Short(); } else { google::protobuf::io::OstreamOutputStream stream(&std::cout); google::protobuf::TextFormat::Print(predict_model, &stream); diff --git a/src/caffe2/binaries/dream.cc b/src/caffe2/binaries/dream.cc index 32fbf1ef..d1516ce7 100644 --- a/src/caffe2/binaries/dream.cc +++ b/src/caffe2/binaries/dream.cc @@ -1,16 +1,15 @@ #include #include +#include #include "caffe2/util/blob.h" #include "caffe2/util/plot.h" #include "caffe2/util/tensor.h" #include "caffe2/util/window.h" -#include "caffe2/utils/proto_utils.h" #include "caffe2/zoo/keeper.h" #include #include -#include "caffe2/util/misc.h" #include "res/imagenet_classes.h" CAFFE2_DEFINE_string(model, "", "Name of one of the pre-trained models."); @@ -32,44 +31,41 @@ CAFFE2_DEFINE_bool(display, false, "Show image while dreaming."); namespace caffe2 { -void AddNaive(NetDef &init_model, NetDef &dream_model, NetDef &display_model, - int size) { - auto &input = dream_model.external_input(0); - auto &output = dream_model.external_output(0); - - NetUtil init(init_model), dream(dream_model), display(display_model); +void AddNaive(ModelUtil &dream, NetUtil &display, int size) { + auto &input = dream.predict.Input(0); + auto &output = dream.predict.Output(0); // initialize input data - init.AddUniformFillOp({FLAGS_batch, 3, size, size}, FLAGS_initial, - FLAGS_initial + 1, input); + dream.init.AddUniformFillOp({FLAGS_batch, 3, size, size}, FLAGS_initial, + FLAGS_initial + 1, input); // add squared l2 distance to zero as loss if (FLAGS_channel >= 0) { - dream.AddSquaredL2ChannelOp(output, "loss", FLAGS_channel); + dream.predict.AddSquaredL2ChannelOp(output, "loss", FLAGS_channel); } else { - dream.AddSquaredL2Op(output, "loss"); + dream.predict.AddSquaredL2Op(output, "loss"); } - dream.AddConstantFillWithOp(1.f, "loss", "loss_grad"); + dream.predict.AddConstantFillWithOp(1.f, "loss", "loss_grad"); if (FLAGS_display) { - NetUtil(dream).AddTimePlotOp("loss"); + dream.predict.AddTimePlotOp("loss"); } // add back prop - dream.AddAllGradientOp(); + dream.predict.AddAllGradientOp(); // scale gradient - dream.AddMeanStdevOp(input + "_grad", "_", input + "_grad_stdev"); - dream.AddConstantFillWithOp(0.f, input + "_grad_stdev", "zero"); - dream.AddScaleOp(input + "_grad_stdev", input + "_grad_stdev", - 1 / FLAGS_learning_rate); - dream.AddAffineScaleOp(input + "_grad", "zero", input + "_grad_stdev", - input + "_grad", true); + dream.predict.AddMeanStdevOp(input + "_grad", "_", input + "_grad_stdev"); + dream.predict.AddConstantFillWithOp(0.f, input + "_grad_stdev", "zero"); + dream.predict.AddScaleOp(input + "_grad_stdev", input + "_grad_stdev", + 1 / FLAGS_learning_rate); + dream.predict.AddAffineScaleOp(input + "_grad", "zero", input + "_grad_stdev", + input + "_grad", true); // apply gradient to input data - init.AddConstantFillOp({1}, 1.f, "one"); - dream.AddInput("one"); - dream.AddWeightedSumOp({input, "one", input + "_grad", "one"}, input); + dream.init.AddConstantFillOp({1}, 1.f, "one"); + dream.predict.AddInput("one"); + dream.predict.AddWeightedSumOp({input, "one", input + "_grad", "one"}, input); // scale data to image if (FLAGS_image_file.size()) { @@ -107,11 +103,11 @@ void run() { std::cout << "batch: " << FLAGS_batch << std::endl; std::cout << "size: " << FLAGS_size << std::endl; - std::cout << "train_runs: " << FLAGS_train_runs << std::endl; - std::cout << "scale_runs: " << FLAGS_scale_runs << std::endl; - std::cout << "percent_incr: " << FLAGS_percent_incr << std::endl; + std::cout << "train-runs: " << FLAGS_train_runs << std::endl; + std::cout << "scale-runs: " << FLAGS_scale_runs << std::endl; + std::cout << "percent-incr: " << FLAGS_percent_incr << std::endl; std::cout << "initial: " << FLAGS_initial << std::endl; - std::cout << "learning_rate: " << FLAGS_learning_rate << std::endl; + std::cout << "learning-rate: " << FLAGS_learning_rate << std::endl; std::cout << "display: " << (FLAGS_display ? "true" : "false") << std::endl; std::cout << std::endl; @@ -143,21 +139,23 @@ void run() { std::cout << "loading model.." << std::endl; clock_t load_time = 0; NetDef base_init_model, base_predict_model; - + ModelUtil base(base_init_model, base_predict_model); // read model files load_time -= clock(); - Keeper(FLAGS_model).AddModel(base_init_model, base_predict_model, true); + Keeper(FLAGS_model).AddModel(base, true); load_time += clock(); // extract dream model - NetUtil(base_predict_model).CheckLayerAvailable(FLAGS_layer); + base.predict.CheckLayerAvailable(FLAGS_layer); NetDef init_model, dream_model, display_model, unused_model; - NetUtil init(init_model), dream(dream_model), display(display_model); - split_model(base_init_model, base_predict_model, FLAGS_layer, init_model, - dream_model, unused_model, unused_model, FLAGS_device != "cudnn", - false); + NetUtil display(display_model); + ModelUtil dream(init_model, dream_model); + ModelUtil unused(unused_model, unused_model); + + base.Split(FLAGS_layer, dream, unused, FLAGS_device != "cudnn", false); - // add_cout_op(dream_model, { "_conv2/norm2_scale" })->set_engine("CUDNN"); + // add_cout_op(dream.predict.net, { "_conv2/norm2_scale" + // })->set_engine("CUDNN"); // add dream operators auto image_size = FLAGS_size; @@ -167,18 +165,16 @@ void run() { if (image_size < 20) { image_size = 20; } - AddNaive(init_model, dream_model, display_model, image_size); + AddNaive(dream, display, image_size); // set model to use CUDA if (FLAGS_device != "cpu") { - init.SetDeviceCUDA(); dream.SetDeviceCUDA(); display.SetDeviceCUDA(); // dream.SetEngineCudnnOps(); } if (FLAGS_dump_model) { - std::cout << init.Short(); std::cout << dream.Short(); std::cout << display.Short(); } @@ -188,14 +184,14 @@ void run() { Workspace workspace; // setup workspace - auto init_net = CreateNet(init_model, &workspace); - auto predict_net = CreateNet(dream_model, &workspace); - auto display_net = CreateNet(display_model, &workspace); + auto init_net = CreateNet(dream.init.net, &workspace); + auto predict_net = CreateNet(dream.predict.net, &workspace); + auto display_net = CreateNet(display.net, &workspace); init_net->Run(); // read image as tensor if (FLAGS_image_file.size()) { - auto &input_name = dream_model.external_input(0); + auto &input_name = dream.predict.Input(0); TensorCPU input; std::vector x; TensorUtil(input).ReadImages({FLAGS_image_file}, image_size, x, 128); diff --git a/src/caffe2/binaries/imagenet.cc b/src/caffe2/binaries/imagenet.cc index 7df15a43..aa901b7c 100644 --- a/src/caffe2/binaries/imagenet.cc +++ b/src/caffe2/binaries/imagenet.cc @@ -1,8 +1,8 @@ #include #include +#include #include "caffe2/util/blob.h" #include "caffe2/util/tensor.h" -#include "caffe2/utils/proto_utils.h" #include "caffe2/zoo/keeper.h" #include "caffe2/util/cmd.h" @@ -54,12 +54,14 @@ void run() { return; } + auto cuda = (FLAGS_device != "cpu" && cmd_setup_cuda()); + std::cout << "model: " << FLAGS_model << std::endl; - std::cout << "image_file: " << FLAGS_image_file << std::endl; - std::cout << "size_to_fit: " << FLAGS_size_to_fit << std::endl; + std::cout << "image-file: " << FLAGS_image_file << std::endl; + std::cout << "size-to-fit: " << FLAGS_size_to_fit << std::endl; std::cout << "device: " << FLAGS_device << std::endl; - - if (FLAGS_device != "cpu") cmd_setup_cuda(); + std::cout << "using cuda: " << (cuda ? "true" : "false") << std::endl; + ; std::cout << std::endl; @@ -70,31 +72,20 @@ void run() { std::cout << "loading model.." << std::endl; clock_t load_time = 0; NetDef init_model, predict_model; - NetUtil init(init_model), predict(predict_model); + ModelUtil model(init_model, predict_model); // read model files load_time -= clock(); - Keeper(FLAGS_model).AddModel(init_model, predict_model, true); + size_t model_size = Keeper(FLAGS_model).AddModel(model, true); load_time += clock(); - // get model size - auto init_size = std::ifstream("res/" + FLAGS_model + "_init_net.pb", - std::ifstream::ate | std::ifstream::binary) - .tellg(); - auto predict_size = std::ifstream("res/" + FLAGS_model + "_predict_net.pb", - std::ifstream::ate | std::ifstream::binary) - .tellg(); - auto model_size = init_size + predict_size; - // set model to use CUDA if (FLAGS_device != "cpu") { - init.SetDeviceCUDA(); - predict.SetDeviceCUDA(); + model.SetDeviceCUDA(); } if (FLAGS_dump_model) { - std::cout << init.Short(); - std::cout << predict.Short(); + std::cout << model.Short(); } std::cout << "running model.." << std::endl; @@ -102,10 +93,10 @@ void run() { Workspace workspace; // setup workspace - auto &input_name = predict_model.external_input(0); - auto &output_name = predict_model.external_output(0); - auto init_net = CreateNet(init_model, &workspace); - auto predict_net = CreateNet(predict_model, &workspace); + auto &input_name = model.predict.Input(0); + auto &output_name = model.predict.Output(0); + auto init_net = CreateNet(model.init.net, &workspace); + auto predict_net = CreateNet(model.predict.net, &workspace); init_net->Run(); // run predictor diff --git a/src/caffe2/binaries/inspect.cc b/src/caffe2/binaries/inspect.cc index 22ad1d02..41a7fa4c 100644 --- a/src/caffe2/binaries/inspect.cc +++ b/src/caffe2/binaries/inspect.cc @@ -1,4 +1,9 @@ -#include "caffe2/util/misc.h" +#include +#include + +#include "caffe2/util/model.h" +#include "caffe2/util/net.h" +#include "caffe2/util/tensor.h" CAFFE2_DEFINE_string(path, "res/mnist-test-nchw-leveldb", "path of the database"); @@ -6,13 +11,39 @@ CAFFE2_DEFINE_string(db_type, "leveldb", "The database type."); namespace caffe2 { +void dump_database(const std::string db_path, const std::string& db_type) { + std::cout << "dumping database.." << std::endl; + std::unique_ptr database = db::CreateDB(db_type, db_path, db::READ); + + for (auto cursor = database->NewCursor(); cursor->Valid(); cursor->Next()) { + auto key = cursor->key().substr(0, 48); + auto value = cursor->value(); + TensorProtos protos; + protos.ParseFromString(value); + auto tensor_proto = protos.protos(0); + auto label_proto = protos.protos(1); + TensorDeserializer deserializer; + TensorCPU tensor; + int label = label_proto.int32_data(0); + deserializer.Deserialize(tensor_proto, &tensor); + auto dims = tensor.dims(); + dims.insert(dims.begin(), 1); + tensor.Resize(dims); + std::cout << key << " " + << (value.size() > 1000 ? value.size() / 1000 : value.size()) + << (value.size() > 1000 ? "K" : "B") << " (" << tensor.dims() + << ") " << label << std::endl; + TensorUtil(tensor).ShowImage("inspect", 0, 1.0, 128); + } +} + void run() { std::cout << std::endl; std::cout << "## Database inspector ##" << std::endl; std::cout << std::endl; std::cout << "path: " << FLAGS_path << std::endl; - std::cout << "db_type: " << FLAGS_db_type << std::endl; + std::cout << "db-type: " << FLAGS_db_type << std::endl; dump_database(FLAGS_path, FLAGS_db_type); } diff --git a/src/caffe2/binaries/mnist.cc b/src/caffe2/binaries/mnist.cc index 7879f146..57497d99 100644 --- a/src/caffe2/binaries/mnist.cc +++ b/src/caffe2/binaries/mnist.cc @@ -162,11 +162,11 @@ void run() { return; } - std::cout << "train_db: " << FLAGS_train_db << std::endl; - std::cout << "test_db: " << FLAGS_test_db << std::endl; - std::cout << "train_runs: " << FLAGS_train_runs << std::endl; - std::cout << "test_runs: " << FLAGS_test_runs << std::endl; - std::cout << "force_cpu: " << (FLAGS_force_cpu ? "true" : "false") + std::cout << "train-db: " << FLAGS_train_db << std::endl; + std::cout << "test-db: " << FLAGS_test_db << std::endl; + std::cout << "train-runs: " << FLAGS_train_runs << std::endl; + std::cout << "test-runs: " << FLAGS_test_runs << std::endl; + std::cout << "force-cpu: " << (FLAGS_force_cpu ? "true" : "false") << std::endl; std::cout << "display: " << (FLAGS_display ? "true" : "false") << std::endl; @@ -200,64 +200,62 @@ void run() { // >>> train_model = model_helper.ModelHelper(name="mnist_train", // arg_scope={"order": "NCHW"}) - NetDef initTrainModel, predictTrainModel; - ModelUtil trainModel(initTrainModel, predictTrainModel, "mnist_train"); + NetDef train_init_model, train_predict_model; + ModelUtil train(train_init_model, train_predict_model, "mnist_train"); // >>> data, label = AddInput(train_model, batch_size=64, // db=os.path.join(data_folder, 'mnist-train-nchw-leveldb'), // db_type='leveldb') - AddInput(trainModel, 64, FLAGS_train_db, "leveldb"); + AddInput(train, 64, FLAGS_train_db, "leveldb"); // >>> softmax = AddLeNetModel(train_model, data) - AddLeNetModel(trainModel, false); + AddLeNetModel(train, false); // >>> AddTrainingOperators(train_model, softmax, label) - AddTrainingOperators(trainModel); + AddTrainingOperators(train); // >>> AddBookkeepingOperators(train_model) - AddBookkeepingOperators(trainModel); + AddBookkeepingOperators(train); // >>> test_model = model_helper.ModelHelper(name="mnist_test", // arg_scope=arg_scope, init_params=False) - NetDef initTestModel, predictTestModel; - ModelUtil testModel(initTestModel, predictTestModel, "mnist_test"); + NetDef test_init_model, test_predict_model; + ModelUtil test(test_init_model, test_predict_model, "mnist_test"); // >>> data, label = AddInput(test_model, batch_size=100, // db=os.path.join(data_folder, 'mnist-test-nchw-leveldb'), db_type='leveldb') - AddInput(testModel, 100, FLAGS_test_db, "leveldb"); + AddInput(test, 100, FLAGS_test_db, "leveldb"); // >>> softmax = AddLeNetModel(test_model, data) - AddLeNetModel(testModel, true); + AddLeNetModel(test, true); // >>> AddAccuracy(test_model, softmax, label) - AddAccuracy(testModel); + AddAccuracy(test); // >>> deploy_model = model_helper.ModelHelper(name="mnist_deploy", // arg_scope=arg_scope, init_params=False) - NetDef initDeployModel, predictDeployModel; - ModelUtil deployModel(initDeployModel, predictDeployModel, "mnist_model"); - predictDeployModel.add_external_input("data"); + NetDef deploy_init_model, deploy_predict_model; + ModelUtil deploy(deploy_init_model, deploy_predict_model, "mnist_model"); + deploy.predict.AddInput("data"); // >>> AddLeNetModel(deploy_model, "data") - AddLeNetModel(deployModel, true); + AddLeNetModel(deploy, true); #ifdef WITH_CUDA if (!FLAGS_force_cpu) { - initTrainModel.mutable_device_option()->set_device_type(CUDA); - predictTrainModel.mutable_device_option()->set_device_type(CUDA); - initTestModel.mutable_device_option()->set_device_type(CUDA); - predictTestModel.mutable_device_option()->set_device_type(CUDA); + train.SetDeviceCUDA(); + test.SetDeviceCUDA(); } #endif std::cout << std::endl; // >>> workspace.RunNetOnce(train_model.param_init_net) - auto initTrainNet = CreateNet(initTrainModel, &workspace); + auto initTrainNet = CreateNet(train.init.net, &workspace); initTrainNet->Run(); // >>> workspace.CreateNet(train_model.net) - auto predictTrainNet = CreateNet(predictTrainModel, &workspace); + auto predictTrainNet = CreateNet(train.predict.net, &workspace); std::cout << "training.." << std::endl; @@ -280,11 +278,11 @@ void run() { std::cout << std::endl; // >>> workspace.RunNetOnce(test_model.param_init_net) - auto initTestNet = CreateNet(initTestModel, &workspace); + auto initTestNet = CreateNet(test.init.net, &workspace); initTestNet->Run(); // >>> workspace.CreateNet(test_model.net) - auto predictTestNet = CreateNet(predictTestModel, &workspace); + auto predictTestNet = CreateNet(test.predict.net, &workspace); std::cout << "testing.." << std::endl; @@ -303,9 +301,9 @@ void run() { // with open(os.path.join(root_folder, "deploy_net.pbtxt"), 'w') as fid: // fid.write(str(deploy_model.net.Proto())) - for (auto ¶m : predictDeployModel.external_input()) { + for (auto ¶m : deploy.predict.net.external_input()) { auto tensor = BlobUtil(*workspace.GetBlob(param)).Get(); - auto op = initDeployModel.add_op(); + auto op = deploy.init.net.add_op(); op->set_type("GivenTensorFill"); auto arg1 = op->add_arg(); arg1->set_name("shape"); @@ -320,9 +318,8 @@ void run() { } op->add_output(param); } - WriteProtoToTextFile(predictDeployModel, "tmp/mnist_predict_net.pbtxt"); - WriteProtoToBinaryFile(initDeployModel, "tmp/mnist_init_net.pb"); - WriteProtoToBinaryFile(predictDeployModel, "tmp/mnist_predict_net.pb"); + deploy.predict.WriteText("tmp/mnist_predict_net.pbtxt"); + deploy.Write("tmp/mnist"); } void predict_example() { diff --git a/src/caffe2/binaries/pretrained.cc b/src/caffe2/binaries/pretrained.cc index 1e047b24..c7c0ac92 100644 --- a/src/caffe2/binaries/pretrained.cc +++ b/src/caffe2/binaries/pretrained.cc @@ -1,6 +1,6 @@ #include #include -#include "caffe2/utils/proto_utils.h" +#include #include #include @@ -42,10 +42,10 @@ void run() { return; } - std::cout << "init_net: " << FLAGS_init_net << std::endl; - std::cout << "predict_net: " << FLAGS_predict_net << std::endl; - std::cout << "image_file: " << FLAGS_image_file << std::endl; - std::cout << "size_to_fit: " << FLAGS_size_to_fit << std::endl; + std::cout << "init-net: " << FLAGS_init_net << std::endl; + std::cout << "predict-net: " << FLAGS_predict_net << std::endl; + std::cout << "image-file: " << FLAGS_image_file << std::endl; + std::cout << "size-to-fit: " << FLAGS_size_to_fit << std::endl; std::cout << std::endl; diff --git a/src/caffe2/binaries/retrain.cc b/src/caffe2/binaries/retrain.cc deleted file mode 100644 index 7aed57d6..00000000 --- a/src/caffe2/binaries/retrain.cc +++ /dev/null @@ -1,264 +0,0 @@ -#include "caffe2/util/misc.h" - -#include -#include -#include -#include "caffe2/utils/proto_utils.h" -#include "caffe2/zoo/keeper.h" - -#include "res/imagenet_classes.h" - -CAFFE2_DEFINE_string(model, "", "Name of one of the pre-trained models."); -CAFFE2_DEFINE_string(layer, "", - "Name of the layer on which to split the model."); -CAFFE2_DEFINE_string(folder, "", "Folder with subfolders with images"); - -CAFFE2_DEFINE_string(db_type, "leveldb", "The database type."); -CAFFE2_DEFINE_int(size_to_fit, 224, "The image file."); -CAFFE2_DEFINE_int(train_runs, 100, "The of training runs."); -CAFFE2_DEFINE_int(test_runs, 50, "The of training runs."); -CAFFE2_DEFINE_int(batch_size, 64, "Training batch size."); -CAFFE2_DEFINE_double(learning_rate, 1e-4, "Learning rate."); -CAFFE2_DEFINE_bool(reshape_output, false, - "Reshape output (necessary for squeeznet)"); - -#include "caffe2/util/cmd.h" - -namespace caffe2 { - -void run() { - if (!cmd_init("Partial Retrain Example")) { - return; - } - - if (!FLAGS_model.size()) { - std::cerr << "specify a model name using --model " << std::endl; - for (auto const &pair : keeper_model_lookup) { - std::cerr << " " << pair.first << std::endl; - } - return; - } - - if (!FLAGS_folder.size()) { - std::cerr << "specify a image folder using --folder " << std::endl; - return; - } - - if (!FLAGS_layer.size()) { - std::cerr << "specify a layer layer using --layer " << std::endl; - return; - } - - std::cout << "model: " << FLAGS_model << std::endl; - std::cout << "layer: " << FLAGS_layer << std::endl; - std::cout << "image_dir: " << FLAGS_folder << std::endl; - std::cout << "db_type: " << FLAGS_db_type << std::endl; - std::cout << "size_to_fit: " << FLAGS_size_to_fit << std::endl; - std::cout << "train_runs: " << FLAGS_train_runs << std::endl; - std::cout << "test_runs: " << FLAGS_test_runs << std::endl; - std::cout << "batch_size: " << FLAGS_batch_size << std::endl; - std::cout << "learning_rate: " << FLAGS_learning_rate << std::endl; - std::cout << "reshape_output: " << FLAGS_reshape_output << std::endl; - - std::string layer_safe = FLAGS_layer; - std::replace(layer_safe.begin(), layer_safe.end(), '/', '_'); - std::replace(layer_safe.begin(), layer_safe.end(), '.', '_'); - - std::string model_safe = FLAGS_model; - std::replace(model_safe.begin(), model_safe.end(), '/', '_'); - auto path_prefix = - FLAGS_folder + '/' + '_' + model_safe + '_' + layer_safe + '_'; - std::string db_paths[kRunNum]; - for (int i = 0; i < kRunNum; i++) { - db_paths[i] = path_prefix + name_for_run[i] + ".db"; - } - - std::cout << std::endl; - - auto load_time = -clock(); - std::vector class_labels; - std::vector> image_files; - load_labels(FLAGS_folder, path_prefix, class_labels, image_files); - - std::cout << "load model.." << std::endl; - NetDef full_init_model, full_predict_model; - NetDef init_model[kRunNum], predict_model[kRunNum]; - for (int i = 0; i < kRunNum; i++) { - init_model[i].set_name(name_for_run[i] + "_init_model"); - predict_model[i].set_name(name_for_run[i] + "_predict_model"); - } - Keeper(FLAGS_model).AddModel(full_init_model, full_predict_model, true); - - NetUtil(full_predict_model).CheckLayerAvailable(FLAGS_layer); - - NetDef first_init_model, first_predict_model, second_init_model, - second_predict_model; - split_model(full_init_model, full_predict_model, FLAGS_layer, - first_init_model, first_predict_model, second_init_model, - second_predict_model, FLAGS_device != "cudnn"); - - if (FLAGS_device != "cpu") { - NetUtil(first_init_model).SetDeviceCUDA(); - NetUtil(first_predict_model).SetDeviceCUDA(); - } - - pre_process(image_files, db_paths, first_init_model, first_predict_model, - FLAGS_db_type, FLAGS_batch_size, FLAGS_size_to_fit); - load_time += clock(); - - for (int i = 0; i < kRunNum; i++) { - ModelUtil(init_model[i], predict_model[i]) - .AddDatabaseOps(name_for_run[i], FLAGS_layer, db_paths[i], - FLAGS_db_type, FLAGS_batch_size); - } - copy_train_model(second_init_model, second_predict_model, FLAGS_layer, - class_labels.size(), init_model[kRunTrain], - predict_model[kRunTrain]); - copy_test_model(second_predict_model, predict_model[kRunValidate]); - copy_test_model(second_predict_model, predict_model[kRunTest]); - - auto output = predict_model[kRunTrain].external_output(0); - if (FLAGS_reshape_output) { - auto output_reshaped = output + "_reshaped"; - for (int i = 0; i < kRunNum; i++) { - NetUtil(predict_model[i]).AddReshapeOp(output, output_reshaped, {0, -1}); - } - output = output_reshaped; - } - - ModelUtil(init_model[kRunTrain], predict_model[kRunTrain]) - .AddTrainOps(output, FLAGS_learning_rate, FLAGS_optimizer); - ModelUtil(second_predict_model, predict_model[kRunValidate]) - .AddTestOps(output); - ModelUtil(second_predict_model, predict_model[kRunTest]).AddTestOps(output); - - if (FLAGS_device != "cpu") { - for (int i = 0; i < kRunNum; i++) { - NetUtil(init_model[i]).SetDeviceCUDA(); - NetUtil(predict_model[i]).SetDeviceCUDA(); - } - } - - if (FLAGS_dump_model) { - std::cout << NetUtil(init_model[kRunTrain]).Short(); - std::cout << NetUtil(predict_model[kRunTrain]).Short(); - } - - std::cout << std::endl; - - Workspace workspace("tmp"); - unique_ptr predict_net[kRunNum]; - for (int i = 0; i < kRunNum; i++) { - auto init_net = CreateNet(init_model[i], &workspace); - init_net->Run(); - predict_net[i] = CreateNet(predict_model[i], &workspace); - } - - clock_t train_time = 0; - clock_t validate_time = 0; - clock_t test_time = 0; - - auto last_time = clock(); - auto last_i = 0; - - std::cout << "training.." << std::endl; - for (auto i = 1; i <= FLAGS_train_runs; i++) { - train_time -= clock(); - predict_net[kRunTrain]->Run(); - train_time += clock(); - - auto steps_time = (float)(clock() - last_time) / CLOCKS_PER_SEC; - if (steps_time > 5 || i == FLAGS_train_runs) { - auto iter = BlobUtil(*workspace.GetBlob("iter")).Get().data()[0]; - auto lr = BlobUtil(*workspace.GetBlob("lr")).Get().data()[0]; - auto train_accuracy = - BlobUtil(*workspace.GetBlob("accuracy")).Get().data()[0]; - auto train_loss = - BlobUtil(*workspace.GetBlob("loss")).Get().data()[0]; - validate_time -= clock(); - predict_net[kRunValidate]->Run(); - validate_time += clock(); - auto validate_accuracy = - BlobUtil(*workspace.GetBlob("accuracy")).Get().data()[0]; - std::cout << "step: " << iter << " rate: " << lr - << " loss: " << train_loss << " accuracy: " << train_accuracy - << " | " << validate_accuracy - << " step_time: " << std::setprecision(3) - << steps_time / (i - last_i) << "s" << std::endl; - last_i = i; - last_time = clock(); - } - } - - std::cout << std::endl; - - std::cout << "testing.." << std::endl; - for (auto i = 1; i <= FLAGS_test_runs; i++) { - test_time -= clock(); - predict_net[kRunTest]->Run(); - test_time += clock(); - - if (i % 10 == 0) { - auto accuracy = - BlobUtil(*workspace.GetBlob("accuracy")).Get().data()[0]; - auto loss = BlobUtil(*workspace.GetBlob("loss")).Get().data()[0]; - std::cout << "step: " << i << " loss: " << loss - << " accuracy: " << accuracy << std::endl; - } - } - - NetDef deploy_init_model; // the final initialization model - deploy_init_model.set_name("retrain_" + full_init_model.name()); - for (const auto &op : full_init_model.op()) { - auto &output = op.output(0); - auto blob = workspace.GetBlob(output); - if (blob) { - auto tensor = BlobUtil(*blob).Get(); - auto init_op = deploy_init_model.add_op(); - init_op->set_type("GivenTensorFill"); - auto arg1 = init_op->add_arg(); - arg1->set_name("shape"); - for (auto dim : tensor.dims()) { - arg1->add_ints(dim); - } - auto arg2 = init_op->add_arg(); - arg2->set_name("values"); - const auto &data = tensor.data(); - for (auto i = 0; i < tensor.size(); ++i) { - arg2->add_floats(data[i]); - } - init_op->add_output(output); - } else { - deploy_init_model.add_op()->CopyFrom(op); - } - } - - WriteProtoToBinaryFile(deploy_init_model, path_prefix + "init_net.pb"); - WriteProtoToBinaryFile(full_predict_model, path_prefix + "predict_net.pb"); - auto init_size = std::ifstream(path_prefix + "init_net.pb", - std::ifstream::ate | std::ifstream::binary) - .tellg(); - auto predict_size = std::ifstream(path_prefix + "predict_net.pb", - std::ifstream::ate | std::ifstream::binary) - .tellg(); - auto model_size = init_size + predict_size; - - std::cout << std::endl; - - std::cout << std::setprecision(3) - << "load: " << ((float)load_time / CLOCKS_PER_SEC) - << "s train: " << ((float)train_time / CLOCKS_PER_SEC) - << "s validate: " << ((float)validate_time / CLOCKS_PER_SEC) - << "s test: " << ((float)test_time / CLOCKS_PER_SEC) - << "s model: " << ((float)model_size / 1000000) << "MB" - << std::endl; -} - -} // namespace caffe2 - -int main(int argc, char **argv) { - caffe2::GlobalInit(&argc, &argv); - caffe2::run(); - google::protobuf::ShutdownProtobufLibrary(); - return 0; -} diff --git a/src/caffe2/binaries/rnn.cc b/src/caffe2/binaries/rnn.cc index 6589da17..b48cdd81 100644 --- a/src/caffe2/binaries/rnn.cc +++ b/src/caffe2/binaries/rnn.cc @@ -1,5 +1,6 @@ #include #include "caffe2/util/blob.h" +#include "caffe2/util/model.h" #include "caffe2/util/net.h" #include "caffe2/util/cmd.h" @@ -18,56 +19,58 @@ CAFFE2_DEFINE_int(gen_length, 500, "One forward example sequence length"); namespace caffe2 { -void AddFC(NetUtil &init, NetUtil &predict, const std::string &input, +void AddFC(ModelUtil &model, const std::string &input, const std::string &output, int in_size, int out_size) { - init.AddXavierFillOp({out_size, in_size}, output + "_w"); - predict.AddInput(output + "_w"); - init.AddConstantFillOp({out_size}, output + "_b"); - predict.AddInput(output + "_b"); - predict.AddFcOp(input, output + "_w", output + "_b", output, 2) + model.init.AddXavierFillOp({out_size, in_size}, output + "_w"); + model.predict.AddInput(output + "_w"); + model.init.AddConstantFillOp({out_size}, output + "_b"); + model.predict.AddInput(output + "_b"); + model.predict.AddFcOp(input, output + "_w", output + "_b", output, 2) ->set_engine("CUDNN"); } -void AddLSTM(NetUtil &init, NetUtil &predict, const std::string &input_blob, +void AddLSTM(ModelUtil &model, const std::string &input_blob, const std::string &seq_lengths, const std::string &hidden_init, const std::string &cell_init, int vocab_size, int hidden_size, const std::string &scope, std::string *hidden_output, std::string *cell_state) { *hidden_output = scope + "/hidden_t_last"; *cell_state = scope + "/cell_t_last"; - AddFC(init, predict, input_blob, scope + "/i2h", vocab_size, 4 * hidden_size); + AddFC(model, input_blob, scope + "/i2h", vocab_size, 4 * hidden_size); // sight hack - init.AddXavierFillOp({4 * hidden_size, hidden_size}, scope + "/gates_t_w"); - predict.AddInput(scope + "/gates_t_w"); - init.AddConstantFillOp({4 * hidden_size}, scope + "/gates_t_b"); - predict.AddInput(scope + "/gates_t_b"); - predict.AddRecurrentNetworkOp(seq_lengths, hidden_init, cell_init, scope, - *hidden_output, *cell_state, - FLAGS_device == "cpu"); + model.init.AddXavierFillOp({4 * hidden_size, hidden_size}, + scope + "/gates_t_w"); + model.predict.AddInput(scope + "/gates_t_w"); + model.init.AddConstantFillOp({4 * hidden_size}, scope + "/gates_t_b"); + model.predict.AddInput(scope + "/gates_t_b"); + model.predict.AddRecurrentNetworkOp(seq_lengths, hidden_init, cell_init, + scope, *hidden_output, *cell_state, + FLAGS_device == "cpu"); } -void AddSGD(NetUtil &init, NetUtil &predict, float base_learning_rate, +void AddSGD(ModelUtil &model, float base_learning_rate, const std::string &policy, int stepsize, float gamma) { - predict.AddAtomicIterOp("iteration_mutex", "optimizer_iteration") + model.predict.AddAtomicIterOp("iteration_mutex", "optimizer_iteration") ->mutable_device_option() ->set_device_type(CPU); - init.AddConstantFillOp({1}, (int64_t)0, "optimizer_iteration") + model.init.AddConstantFillOp({1}, (int64_t)0, "optimizer_iteration") ->mutable_device_option() ->set_device_type(CPU); - init.AddCreateMutexOp("iteration_mutex") + model.init.AddCreateMutexOp("iteration_mutex") ->mutable_device_option() ->set_device_type(CPU); - predict.AddInput("iteration_mutex"); - predict.AddInput("optimizer_iteration"); - init.AddConstantFillOp({1}, 1.f, "ONE"); - predict.AddInput("ONE"); - predict.AddLearningRateOp("optimizer_iteration", "lr", base_learning_rate, - gamma); + model.predict.AddInput("iteration_mutex"); + model.predict.AddInput("optimizer_iteration"); + model.init.AddConstantFillOp({1}, 1.f, "ONE"); + model.predict.AddInput("ONE"); + model.predict.AddLearningRateOp("optimizer_iteration", "lr", + base_learning_rate, gamma); std::vector params({"LSTM/gates_t_w", "LSTM/i2h_b", "char_rnn_blob_0_w", "char_rnn_blob_0_b", "LSTM/gates_t_b", "LSTM/i2h_w"}); for (auto ¶m : params) { - predict.AddWeightedSumOp({param, "ONE", param + "_grad", "lr"}, param); + model.predict.AddWeightedSumOp({param, "ONE", param + "_grad", "lr"}, + param); } } @@ -85,21 +88,23 @@ void run() { return; } + auto cuda = (FLAGS_device != "cpu" && cmd_setup_cuda()); + std::cout << "model: " << FLAGS_model << std::endl; - std::cout << "train_data: " << FLAGS_train_data << std::endl; - std::cout << "train_runs: " << FLAGS_train_runs << std::endl; - std::cout << "seq_length: " << FLAGS_seq_length << std::endl; - std::cout << "batch_size: " << FLAGS_batch_size << std::endl; - std::cout << "iters_to_report: " << FLAGS_iters_to_report << std::endl; - std::cout << "hidden_size: " << FLAGS_hidden_size << std::endl; - std::cout << "gen_length: " << FLAGS_gen_length << std::endl; + std::cout << "train-data: " << FLAGS_train_data << std::endl; + std::cout << "train-runs: " << FLAGS_train_runs << std::endl; + std::cout << "seq-length: " << FLAGS_seq_length << std::endl; + std::cout << "batch-size: " << FLAGS_batch_size << std::endl; + std::cout << "iters-to-report: " << FLAGS_iters_to_report << std::endl; + std::cout << "hidden-size: " << FLAGS_hidden_size << std::endl; + std::cout << "gen-length: " << FLAGS_gen_length << std::endl; std::cout << "device: " << FLAGS_device << std::endl; - std::cout << "dump_model: " << (FLAGS_dump_model ? "true" : "false") + std::cout << "using cuda: " << (cuda ? "true" : "false") << std::endl; + ; + std::cout << "dump-model: " << (FLAGS_dump_model ? "true" : "false") << std::endl; - if (FLAGS_device != "cpu") cmd_setup_cuda(); - std::cout << std::endl; // >>> with open(args.train_data) as f: self.text = f.read() @@ -140,44 +145,40 @@ void run() { std::cout << "Start training" << std::endl; // >>> model = model_helper.ModelHelper(name="char_rnn") - NetDef initModel, forwardModel; - NetUtil init(initModel), forward(forwardModel); - init.SetName("char_rnn_init"); - forward.SetName("char_rnn"); + NetDef init_model, predict_model; + ModelUtil model(init_model, predict_model, "char_rnn"); // >>> input_blob, seq_lengths, hidden_init, cell_init, target = // model.net.AddExternalInputs('input_blob', 'seq_lengths', 'hidden_init', // 'cell_init', 'target') - forward.AddInput("input_blob"); - forward.AddInput("seq_lengths"); - forward.AddInput("hidden_init"); - forward.AddInput("cell_init"); - forward.AddInput("target"); + model.predict.AddInput("input_blob"); + model.predict.AddInput("seq_lengths"); + model.predict.AddInput("hidden_init"); + model.predict.AddInput("cell_init"); + model.predict.AddInput("target"); // >>> hidden_output_all, self.hidden_output, _, self.cell_state = LSTM(model, // input_blob, seq_lengths, (hidden_init, cell_init), self.D, // self.hidden_size, scope="LSTM") std::string hidden_output; std::string cell_state; - AddLSTM(init, forward, "input_blob", "seq_lengths", "hidden_init", - "cell_init", D, FLAGS_hidden_size, "LSTM", &hidden_output, - &cell_state); + AddLSTM(model, "input_blob", "seq_lengths", "hidden_init", "cell_init", D, + FLAGS_hidden_size, "LSTM", &hidden_output, &cell_state); // >>> output = brew.fc(model, hidden_output_all, None, // dim_in=self.hidden_size, dim_out=self.D, axis=2) - AddFC(init, forward, "LSTM/hidden_t_all", "char_rnn_blob_0", - FLAGS_hidden_size, D); + AddFC(model, "LSTM/hidden_t_all", "char_rnn_blob_0", FLAGS_hidden_size, D); // >>> softmax = model.net.Softmax(output, 'softmax', axis=2) - forward.AddSoftmaxOp("char_rnn_blob_0", "softmax", 2); + model.predict.AddSoftmaxOp("char_rnn_blob_0", "softmax", 2); // >>> softmax_reshaped, _ = model.net.Reshape(softmax, ['softmax_reshaped', // '_'], shape=[-1, self.D]) - forward.AddReshapeOp("softmax", "softmax_reshaped", {-1, D}); + model.predict.AddReshapeOp("softmax", "softmax_reshaped", {-1, D}); // >>> self.forward_net = core.Net(model.net.Proto()) - NetDef trainModel(forwardModel); - NetUtil train(trainModel); + NetDef train_model(model.predict.net); + NetUtil train(train_model); // >>> xent = model.net.LabelCrossEntropy([softmax_reshaped, target], 'xent') train.AddLabelCrossEntropyOp("softmax_reshaped", "target", "xent"); @@ -191,7 +192,8 @@ void run() { // >>> build_sgd(model, base_learning_rate=0.1 * self.seq_length, // policy="step", stepsize=1, gamma=0.9999) - AddSGD(init, train, 0.1 * FLAGS_seq_length, "step", 1, 0.9999); + ModelUtil t(model.init, train); + AddSGD(t, 0.1 * FLAGS_seq_length, "step", 1, 0.9999); // >>> self.model = model // >>> self.predictions = softmax @@ -202,22 +204,21 @@ void run() { // >>> self.prepare_state = core.Net("prepare_state") // >>> self.prepare_state.Copy(self.hidden_output, hidden_init) // >>> self.prepare_state.Copy(self.cell_state, cell_init) - NetDef prepareModel; - NetUtil prepare(prepareModel); + NetDef prepare_model; + NetUtil prepare(prepare_model); prepare.AddCopyOp(hidden_output, "hidden_init"); prepare.AddCopyOp(cell_state, "cell_init"); prepare.AddInput(hidden_output); prepare.AddInput(cell_state); if (FLAGS_device != "cpu") { - init.SetDeviceCUDA(); - forward.SetDeviceCUDA(); + model.SetDeviceCUDA(); train.SetDeviceCUDA(); prepare.SetDeviceCUDA(); } if (FLAGS_dump_model) { - std::cout << init.Short(); + std::cout << model.init.Short(); std::cout << train.Short(); std::cout << prepare.Short(); } @@ -229,7 +230,7 @@ void run() { std::cout << "Train model" << std::endl; // >>> workspace.RunNetOnce(self.model.param_init_net) - auto initNet = CreateNet(initModel, &workspace); + auto initNet = CreateNet(model.init.net, &workspace); initNet->Run(); // >>> smooth_loss = -np.log(1.0 / self.D) * self.seq_length @@ -278,7 +279,7 @@ void run() { BlobUtil(*workspace.CreateBlob(cell_state)).Set(value, true); } // >>> workspace.CreateNet(self.prepare_state) - auto prepareNet = CreateNet(prepareModel, &workspace); + auto prepareNet = CreateNet(prepare.net, &workspace); // >>> last_time = datetime.now() auto last_time = clock(); @@ -289,10 +290,10 @@ void run() { workspace.CreateBlob("input_blob"); workspace.CreateBlob("seq_lengths"); workspace.CreateBlob("target"); - auto trainNet = CreateNet(trainModel, &workspace); + auto trainNet = CreateNet(train.net, &workspace); // >>> CreateNetOnce(self.forward_net) - auto forwardNet = CreateNet(forwardModel, &workspace); + auto forwardNet = CreateNet(model.predict.net, &workspace); // >>> while True: while (num_iter < FLAGS_train_runs) { diff --git a/src/caffe2/binaries/train.cc b/src/caffe2/binaries/train.cc index 83daddf4..b9c23b35 100644 --- a/src/caffe2/binaries/train.cc +++ b/src/caffe2/binaries/train.cc @@ -1,16 +1,18 @@ -#include "caffe2/util/misc.h" - +#include "caffe2/util/train.h" #include #include #include +#include #include "caffe2/util/plot.h" +#include "caffe2/util/preprocess.h" #include "caffe2/util/window.h" -#include "caffe2/utils/proto_utils.h" #include "caffe2/zoo/keeper.h" #include "res/imagenet_classes.h" CAFFE2_DEFINE_string(model, "", "Name of one of the pre-trained models."); +CAFFE2_DEFINE_string(layer, "", + "Name of the layer on which to split the model."); CAFFE2_DEFINE_string(folder, "", "Folder with subfolders with images"); CAFFE2_DEFINE_string(db_type, "leveldb", "The database type."); @@ -31,7 +33,7 @@ CAFFE2_DEFINE_bool(reshape_output, false, namespace caffe2 { void run() { - if (!cmd_init("Full Train Example")) { + if (!cmd_init("CNN Training Example")) { return; } @@ -49,22 +51,30 @@ void run() { } std::cout << "model: " << FLAGS_model << std::endl; - std::cout << "image_dir: " << FLAGS_folder << std::endl; - std::cout << "db_type: " << FLAGS_db_type << std::endl; - std::cout << "size_to_fit: " << FLAGS_size_to_fit << std::endl; - std::cout << "train_runs: " << FLAGS_train_runs << std::endl; - std::cout << "test_runs: " << FLAGS_test_runs << std::endl; - std::cout << "batch_size: " << FLAGS_batch_size << std::endl; - std::cout << "learning_rate: " << FLAGS_learning_rate << std::endl; - std::cout << "zero_one: " << (FLAGS_zero_one ? "true" : "false") << std::endl; + std::cout << "layer: " << FLAGS_layer << std::endl; + std::cout << "image-dir: " << FLAGS_folder << std::endl; + std::cout << "db-type: " << FLAGS_db_type << std::endl; + std::cout << "size-to-fit: " << FLAGS_size_to_fit << std::endl; + std::cout << "train-runs: " << FLAGS_train_runs << std::endl; + std::cout << "test-runs: " << FLAGS_test_runs << std::endl; + std::cout << "batch-size: " << FLAGS_batch_size << std::endl; + std::cout << "learning-rate: " << FLAGS_learning_rate << std::endl; + std::cout << "zero-one: " << (FLAGS_zero_one ? "true" : "false") << std::endl; std::cout << "display: " << (FLAGS_display ? "true" : "false") << std::endl; - std::cout << "reshape_output: " << FLAGS_reshape_output << std::endl; + std::cout << "reshape-output: " << (FLAGS_reshape_output ? "true" : "false") + << std::endl; - auto path_prefix = FLAGS_folder + '/' + '_'; - std::string db_paths[kRunNum]; - for (int i = 0; i < kRunNum; i++) { - db_paths[i] = path_prefix + name_for_run[i] + ".db"; + auto has_split = FLAGS_layer.size() > 0; + std::string layer_prefix; + std::string model_safe = FLAGS_model; + std::replace(model_safe.begin(), model_safe.end(), '/', '_'); + if (has_split) { + std::string layer_safe = FLAGS_layer; + std::replace(layer_safe.begin(), layer_safe.end(), '/', '_'); + std::replace(layer_safe.begin(), layer_safe.end(), '.', '_'); + layer_prefix = layer_safe + '_'; } + auto path_prefix = FLAGS_folder + '/' + '_' + model_safe + '_' + layer_prefix; if (FLAGS_display) { superWindow("Full Train Example"); @@ -80,200 +90,133 @@ void run() { resizeWindow("loss", 500, 300); } - std::cout << std::endl; + std::string db_paths[kRunNum]; + for (int i = 0; i < kRunNum; i++) { + db_paths[i] = path_prefix + name_for_run[i] + ".db"; + } - auto load_time = -clock(); - std::vector class_labels; - std::vector> image_files; - load_labels(FLAGS_folder, path_prefix, class_labels, image_files); + std::cout << std::endl; std::cout << "load model.." << std::endl; NetDef full_init_model, full_predict_model; - Keeper(FLAGS_model).AddModel(full_init_model, full_predict_model, false); + ModelUtil full(full_init_model, full_predict_model); + Keeper(FLAGS_model).AddModel(full, has_split); if (FLAGS_device == "cudnn") { - NetUtil(full_init_model).SetEngineOps("CUDNN"); - NetUtil(full_predict_model).SetEngineOps("CUDNN"); - } - - if (FLAGS_dump_model) { - std::cout << NetUtil(full_init_model).Short(); - std::cout << NetUtil(full_predict_model).Short(); + full.init.SetEngineOps("CUDNN"); + full.predict.SetEngineOps("CUDNN"); + } + + NetDef init_model[kRunNum], predict_model[kRunNum]; + ModelUtil models[kRunNum] = { + {init_model[kRunTrain], predict_model[kRunTrain], + name_for_run[kRunTrain]}, + {init_model[kRunTest], predict_model[kRunTest], name_for_run[kRunTest]}, + {init_model[kRunValidate], predict_model[kRunValidate], + name_for_run[kRunValidate]}, + }; + + NetDef first_init_model, first_predict_model; + ModelUtil first(first_init_model, first_predict_model); + NetDef second_init_model, second_predict_model; + ModelUtil second(second_init_model, second_predict_model); + + if (has_split) { + full.predict.CheckLayerAvailable(FLAGS_layer); + std::cout << "split model.. (at " << FLAGS_layer << ")" << std::endl; + full.Split(FLAGS_layer, first, second, FLAGS_device != "cudnn"); + if (FLAGS_device != "cpu") { + first.SetDeviceCUDA(); + } + } else { + second.init.net = full.init.net; + second.predict.net = full.predict.net; } - NetDef init_model[kRunNum]; - NetDef predict_model[kRunNum]; - for (int i = 0; i < kRunNum; i++) { - init_model[i].set_name(name_for_run[i] + "_init_model"); - predict_model[i].set_name(name_for_run[i] + "_predict_model"); - } + std::cout << "collect images.." << std::endl; + auto load_time = -clock(); + std::vector class_labels; + std::vector> image_files; + load_labels(FLAGS_folder, path_prefix, class_labels, image_files); + std::cout << class_labels.size() << " labels found" << std::endl; + std::cout << image_files.size() << " images found" << std::endl; - pre_process(image_files, db_paths, FLAGS_db_type, FLAGS_size_to_fit); + std::cout << "cache images.." << std::endl; + auto count = preprocess(image_files, db_paths, first, FLAGS_db_type, + FLAGS_batch_size, FLAGS_size_to_fit); + std::cout << count << " images processed" << std::endl; load_time += clock(); + auto model_in = has_split ? FLAGS_layer : full.predict.Input(0); for (int i = 0; i < kRunNum; i++) { - ModelUtil(init_model[i], predict_model[i]) - .AddDatabaseOps(name_for_run[i], full_predict_model.external_input(0), - db_paths[i], FLAGS_db_type, FLAGS_batch_size); + models[i].AddDatabaseOps(name_for_run[i], model_in, db_paths[i], + FLAGS_db_type, FLAGS_batch_size); } - copy_train_model(full_init_model, full_predict_model, - full_predict_model.external_input(0), class_labels.size(), - init_model[kRunTrain], predict_model[kRunTrain]); - copy_test_model(full_predict_model, predict_model[kRunValidate]); - copy_test_model(full_predict_model, predict_model[kRunTest]); + second.CopyTrain(model_in, class_labels.size(), models[kRunTrain]); + second.CopyTest(models[kRunValidate]); + second.CopyTest(models[kRunTest]); - auto output = predict_model[kRunTrain].external_output(0); + auto output = models[kRunTrain].predict.Output(0); if (FLAGS_reshape_output) { auto output_reshaped = output + "_reshaped"; for (int i = 0; i < kRunNum; i++) { - NetUtil(predict_model[i]).AddReshapeOp(output, output_reshaped, {0, -1}); + models[i].predict.AddReshapeOp(output, output_reshaped, {0, -1}); } output = output_reshaped; } - ModelUtil(init_model[kRunTrain], predict_model[kRunTrain]) - .AddTrainOps(output, FLAGS_learning_rate, FLAGS_optimizer); - ModelUtil(full_predict_model, predict_model[kRunValidate]).AddTestOps(output); - ModelUtil(full_predict_model, predict_model[kRunTest]).AddTestOps(output); + models[kRunTrain].AddTrainOps(output, FLAGS_learning_rate, FLAGS_optimizer); + ModelUtil(second.predict, models[kRunValidate].predict).AddTestOps(output); + ModelUtil(second.predict, models[kRunTest].predict).AddTestOps(output); if (FLAGS_zero_one) { - NetUtil(predict_model[kRunValidate]) - .AddZeroOneOp(output, "label"); - } - if (FLAGS_display) { - NetUtil(predict_model[kRunValidate]) - .AddShowWorstOp(output, "label", - full_predict_model.external_input(0)); + models[kRunValidate].predict.AddZeroOneOp(output, "label"); } if (FLAGS_display) { - NetUtil(predict_model[kRunTrain]) - .AddTimePlotOp("accuracy", "iter", "accuracy", "train", 10); - NetUtil(predict_model[kRunValidate]) - .AddTimePlotOp("accuracy", "iter", "accuracy", "test"); - NetUtil(predict_model[kRunTrain]) - .AddTimePlotOp("loss", "iter", "loss", "train", 10); - NetUtil(predict_model[kRunValidate]) - .AddTimePlotOp("loss", "iter", "loss", "test"); + models[kRunValidate].predict.AddShowWorstOp(output, "label", + second.predict.Input(0)); + models[kRunTrain].predict.AddTimePlotOp("accuracy", "iter", "accuracy", + "train", 10); + models[kRunValidate].predict.AddTimePlotOp("accuracy", "iter", "accuracy", + "test"); + models[kRunTrain].predict.AddTimePlotOp("loss", "iter", "loss", "train", + 10); + models[kRunValidate].predict.AddTimePlotOp("loss", "iter", "loss", "test"); } if (FLAGS_device != "cpu") { for (int i = 0; i < kRunNum; i++) { - NetUtil(init_model[i]).SetDeviceCUDA(); - NetUtil(predict_model[i]).SetDeviceCUDA(); + models[i].SetDeviceCUDA(); } } + if (FLAGS_dump_model) { + std::cout << models[kRunTrain].Short(); + } + std::cout << std::endl; Workspace workspace("tmp"); - unique_ptr predict_net[kRunNum]; - for (int i = 0; i < kRunNum; i++) { - auto init_net = CreateNet(init_model[i], &workspace); - init_net->Run(); - predict_net[i] = CreateNet(predict_model[i], &workspace); - } clock_t train_time = 0; clock_t validate_time = 0; clock_t test_time = 0; - auto last_time = clock(); - auto last_i = 0; - auto sum_accuracy = 0.f, sum_loss = 0.f; - std::cout << "training.." << std::endl; - for (auto i = 1; i <= FLAGS_train_runs; i++) { - train_time -= clock(); - predict_net[kRunTrain]->Run(); - train_time += clock(); - - sum_accuracy += - BlobUtil(*workspace.GetBlob("accuracy")).Get().data()[0]; - sum_loss += BlobUtil(*workspace.GetBlob("loss")).Get().data()[0]; - - auto steps_time = (float)(clock() - last_time) / CLOCKS_PER_SEC; - if (steps_time > 5 || i == FLAGS_train_runs) { - auto iter = BlobUtil(*workspace.GetBlob("iter")).Get().data()[0]; - auto lr = BlobUtil(*workspace.GetBlob("lr")).Get().data()[0]; - auto train_loss = sum_loss / (i - last_i), - train_accuracy = sum_accuracy / (i - last_i); - sum_loss = 0; - sum_accuracy = 0; - validate_time -= clock(); - predict_net[kRunValidate]->Run(); - validate_time += clock(); - auto validate_accuracy = - BlobUtil(*workspace.GetBlob("accuracy")).Get().data()[0]; - std::cout << "step: " << iter << " rate: " << lr - << " loss: " << train_loss << " accuracy: " << train_accuracy - << " | " << validate_accuracy - << " step_time: " << std::setprecision(3) - << steps_time / (i - last_i) << "s" << std::endl; - last_i = i; - last_time = clock(); - } - } + run_trainer(FLAGS_train_runs, models[kRunTrain], models[kRunValidate], + workspace, train_time, validate_time); std::cout << std::endl; - std::cout << "testing.." << std::endl; - auto test_step = 10; - for (auto i = 1; i <= FLAGS_test_runs; i++) { - test_time -= clock(); - predict_net[kRunTest]->Run(); - test_time += clock(); - - sum_accuracy += - BlobUtil(*workspace.GetBlob("accuracy")).Get().data()[0]; - sum_loss += BlobUtil(*workspace.GetBlob("loss")).Get().data()[0]; - - if (i % test_step == 0) { - auto loss = sum_loss / test_step, accuracy = sum_accuracy / test_step; - sum_loss = 0; - sum_accuracy = 0; - std::cout << "step: " << i << " loss: " << loss - << " accuracy: " << accuracy << std::endl; - } - } + run_tester(FLAGS_test_runs, models[kRunTest], workspace, test_time); NetDef deploy_init_model; // the final initialization model - deploy_init_model.set_name("train_" + full_init_model.name()); - for (const auto &op : full_init_model.op()) { - auto &output = op.output(0); - auto blob = workspace.GetBlob(output); - if (blob) { - auto tensor = BlobUtil(*blob).Get(); - auto init_op = deploy_init_model.add_op(); - init_op->set_type("GivenTensorFill"); - auto arg1 = init_op->add_arg(); - arg1->set_name("shape"); - for (auto dim : tensor.dims()) { - arg1->add_ints(dim); - } - auto arg2 = init_op->add_arg(); - arg2->set_name("values"); - const auto &data = tensor.data(); - for (auto i = 0; i < tensor.size(); ++i) { - arg2->add_floats(data[i]); - } - init_op->add_output(output); - } else { - deploy_init_model.add_op()->CopyFrom(op); - } - } + ModelUtil deploy(deploy_init_model, full.predict.net, + "train_" + full.init.net.name()); + full.CopyDeploy(deploy, workspace); - auto init_path = path_prefix + FLAGS_model + "_init_net.pb"; - auto predict_path = path_prefix + FLAGS_model + "_predict_net.pb"; - WriteProtoToBinaryFile(deploy_init_model, init_path); - WriteProtoToBinaryFile(full_predict_model, predict_path); - auto init_size = - std::ifstream(init_path, std::ifstream::ate | std::ifstream::binary) - .tellg(); - auto predict_size = - std::ifstream(predict_path, std::ifstream::ate | std::ifstream::binary) - .tellg(); - auto model_size = init_size + predict_size; + size_t model_size = deploy.Write(path_prefix); std::cout << std::endl; diff --git a/src/caffe2/util/model.cc b/src/caffe2/util/model.cc index 096500d1..ac3b42a7 100644 --- a/src/caffe2/util/model.cc +++ b/src/caffe2/util/model.cc @@ -1,4 +1,5 @@ #include "caffe2/util/model.h" +#include "caffe2/util/blob.h" namespace caffe2 { @@ -6,6 +7,11 @@ const std::string gradient_suffix("_grad"); const std::string moment_suffix("_moment"); const std::string meansq_suffix("_meansq"); const std::string reader_suffix("_reader"); +const std::string init_net_suffix("_init_net.pb"); +const std::string predict_net_suffix("_predict_net.pb"); +const std::string init_name_suffix("_init"); +const std::string predict_name_suffix("_predict"); + const std::string iter_name("iter"); const std::string lr_name("lr"); const std::string one_name("one"); @@ -14,11 +20,6 @@ const std::string label_name("label"); const std::string xent_name("xent"); const std::string accuracy_name("accuracy"); -void ModelUtil::SetName(const std::string &name) { - init.SetName(name + "_init"); - predict.SetName(name + "_predict"); -} - void ModelUtil::AddDatabaseOps(const std::string &name, const std::string &data, const std::string &db, const std::string &db_type, int batch_size) { @@ -166,4 +167,208 @@ void ModelUtil::AddConvOps(const std::string &input, const std::string &output, padding, kernel); } +void ModelUtil::Split(const std::string &layer, ModelUtil &firstModel, + ModelUtil &secondModel, bool force_cpu, bool inclusive) { + std::set static_inputs = predict.CollectLayers(layer); + + // copy operators + for (const auto &op : init.net.op()) { + auto is_first = (static_inputs.find(op.output(0)) != static_inputs.end()); + auto new_op = + (is_first ? firstModel.init.net : secondModel.init.net).add_op(); + new_op->CopyFrom(op); + } + for (const auto &op : predict.net.op()) { + auto is_first = (static_inputs.find(op.output(0)) != static_inputs.end() && + (inclusive || op.input(0) != op.output(0))); + auto new_op = + (is_first ? firstModel.predict.net : secondModel.predict.net).add_op(); + new_op->CopyFrom(op); + if (!force_cpu) { + new_op->set_engine("CUDNN"); // TODO: not here + } + } + + // copy externals + if (firstModel.predict.net.op().size()) { + // firstModel.predict.net.add_external_input(predict.Input(0)); + } + if (secondModel.predict.net.op().size()) { + // secondModel.predict.net.add_external_input(layer); + } + for (const auto &output : init.net.external_output()) { + auto is_first = (static_inputs.find(output) != static_inputs.end()); + if (is_first) { + firstModel.init.net.add_external_output(output); + } else { + secondModel.init.net.add_external_output(output); + } + } + for (const auto &input : predict.net.external_input()) { + auto is_first = (static_inputs.find(input) != static_inputs.end()); + if (is_first) { + firstModel.predict.net.add_external_input(input); + } else { + secondModel.predict.net.add_external_input(input); + } + } + if (firstModel.predict.net.op().size()) { + firstModel.predict.net.add_external_output(layer); + } + if (secondModel.predict.net.op().size()) { + secondModel.predict.net.add_external_output(predict.Output(0)); + } + + if (init.net.has_name()) { + if (!firstModel.init.net.has_name()) { + firstModel.init.SetName(init.net.name() + "_first"); + } + if (!secondModel.init.net.has_name()) { + secondModel.init.SetName(init.net.name() + "_second"); + } + } + if (predict.net.has_name()) { + if (!firstModel.predict.net.has_name()) { + firstModel.predict.SetName(predict.net.name() + "_first"); + } + if (!secondModel.predict.net.has_name()) { + secondModel.predict.SetName(predict.net.name() + "_second"); + } + } +} + +void set_trainable(OperatorDef &op, bool train) { + if (op.type() == "Dropout") { + for (auto &arg : *op.mutable_arg()) { + if (arg.name() == "is_test") { + arg.set_i(!train); + } + } + } +} + +void ModelUtil::CopyTrain(const std::string &layer, int out_size, + ModelUtil &train) const { + std::string last_w, last_b; + for (const auto &op : predict.net.op()) { + auto new_op = train.predict.net.add_op(); + new_op->CopyFrom(op); + set_trainable(*new_op, true); + if (op.type() == "FC") { + last_w = op.input(1); + last_b = op.input(2); + } + } + train.predict.SetRenameInplace(); + for (const auto &op : init.net.op()) { + auto &output = op.output(0); + auto init_op = train.init.net.add_op(); + bool uniform = (output.find("_b") != std::string::npos); + init_op->set_type(uniform ? "ConstantFill" : "XavierFill"); + for (const auto &arg : op.arg()) { + if (arg.name() == "shape") { + auto init_arg = init_op->add_arg(); + init_arg->set_name("shape"); + if (output == last_w) { + init_arg->add_ints(out_size); + init_arg->add_ints(arg.ints(1)); + } else if (output == last_b) { + init_arg->add_ints(out_size); + } else { + init_arg->CopyFrom(arg); + } + } + } + init_op->add_output(output); + } + std::set existing_inputs; + existing_inputs.insert(train.predict.net.external_input().begin(), + train.predict.net.external_input().end()); + for (const auto &op : train.predict.net.op()) { + for (auto &output : op.output()) { + existing_inputs.insert(output); + } + } + for (const auto &input : predict.net.external_input()) { + if (existing_inputs.find(input) == existing_inputs.end()) { + train.predict.net.add_external_input(input); + } + } + for (const auto &output : predict.net.external_output()) { + train.predict.net.add_external_output(output); + } + // auto op = train_init_model.add_op(); + // op->set_type("ConstantFill"); + // auto arg = op->add_arg(); + // arg->set_name("shape"); + // arg->add_ints(1); + // op->add_output(layer); +} + +void ModelUtil::CopyTest(ModelUtil &test) const { + for (const auto &op : predict.net.op()) { + auto new_op = test.predict.net.add_op(); + new_op->CopyFrom(op); + set_trainable(*new_op, false); + } + for (const auto &input : predict.net.external_input()) { + test.predict.net.add_external_input(input); + } + for (const auto &output : predict.net.external_output()) { + test.predict.net.add_external_output(output); + } +} + +void ModelUtil::CopyDeploy(ModelUtil &deploy, Workspace &workspace) const { + for (const auto &op : init.net.op()) { + auto &output = op.output(0); + auto blob = workspace.GetBlob(output); + if (blob) { + auto tensor = BlobUtil(*blob).Get(); + auto init_op = deploy.init.net.add_op(); + init_op->set_type("GivenTensorFill"); + auto arg1 = init_op->add_arg(); + arg1->set_name("shape"); + for (auto dim : tensor.dims()) { + arg1->add_ints(dim); + } + auto arg2 = init_op->add_arg(); + arg2->set_name("values"); + const auto &data = tensor.data(); + for (auto i = 0; i < tensor.size(); ++i) { + arg2->add_floats(data[i]); + } + init_op->add_output(output); + } else { + deploy.init.net.add_op()->CopyFrom(op); + } + } +} + +size_t ModelUtil::Write(const std::string &path_prefix) const { + size_t size = 0; + size += init.Write(path_prefix + init_net_suffix); + size += predict.Write(path_prefix + predict_net_suffix); + return size; +} + +size_t ModelUtil::Read(const std::string &path_prefix) { + size_t size = 0; + size += init.Read(path_prefix + init_net_suffix); + size += predict.Read(path_prefix + predict_net_suffix); + return size; +} + +void ModelUtil::SetName(const std::string &name) { + init.SetName(name + init_name_suffix); + predict.SetName(name + predict_name_suffix); +} + +void ModelUtil::SetDeviceCUDA() { + init.SetDeviceCUDA(); + predict.SetDeviceCUDA(); +} + +std::string ModelUtil::Short() { return predict.Short() + init.Short(); } + } // namespace caffe2 diff --git a/src/caffe2/util/net.cc b/src/caffe2/util/net.cc index 9626f74b..9bc976d1 100644 --- a/src/caffe2/util/net.cc +++ b/src/caffe2/util/net.cc @@ -700,11 +700,11 @@ OperatorDef* NetUtil::AddGradientOp( GradientOpsMeta meta = GetGradientForOp(op, output); if (meta.ops_.size()) { if (meta.ops_.size() > 1) { - std::cout << "multiple gradients for operator (" << op.type(); + std::cerr << "multiple gradients for operator (" << op.type(); for (auto& o : meta.ops_) { - std::cout << " " << o.type(); + std::cerr << " " << o.type(); } - std::cout << ")" << std::endl; + std::cerr << ")" << std::endl; } grad->CopyFrom(meta.ops_[0]); } else { @@ -794,7 +794,6 @@ std::vector NetUtil::CollectGradientOps( for (auto& op : net.op()) { if (trainable_ops.find(op.type()) != trainable_ops.end()) { gradient_ops.push_back(op); - // std::cout << "type: " << op.type() << std::endl; for (auto& input : op.input()) { auto& output = op.output(); if (std::find(output.begin(), output.end(), input) == output.end()) { @@ -806,7 +805,7 @@ std::vector NetUtil::CollectGradientOps( } } } else if (non_trainable_ops.find(op.type()) == non_trainable_ops.end()) { - std::cout << "unknown backprop operator type: " << op.type() << std::endl; + CAFFE_THROW("unknown backprop operator type: " + op.type()); } } std::reverse(gradient_ops.begin(), gradient_ops.end()); @@ -850,9 +849,9 @@ void NetUtil::CheckLayerAvailable(const std::string& layer) { } } if (!layer_found) { - std::cout << "available layers:" << std::endl; + std::cerr << "available layers:" << std::endl; for (auto& layer : available_layers) { - std::cout << " " << layer.first << " (" << layer.second << ")" + std::cerr << " " << layer.first << " (" << layer.second << ")" << std::endl; } LOG(FATAL) << "~ no layer with name " << layer << " in model."; @@ -969,6 +968,24 @@ void NetUtil::Print() { google::protobuf::TextFormat::Print(net, &stream); } +size_t NetUtil::Write(const std::string& path) const { + WriteProtoToBinaryFile(net, path); + return std::ifstream(path, std::ifstream::ate | std::ifstream::binary) + .tellg(); +} + +size_t NetUtil::WriteText(const std::string& path) const { + WriteProtoToTextFile(net, path); + return std::ifstream(path, std::ifstream::ate | std::ifstream::binary) + .tellg(); +} + +size_t NetUtil::Read(const std::string& path) { + CAFFE_ENFORCE(ReadProtoFromFile(path.c_str(), &net)); + return std::ifstream(path, std::ifstream::ate | std::ifstream::binary) + .tellg(); +} + void NetUtil::SetDeviceCUDA() { #ifdef WITH_CUDA net.mutable_device_option()->set_device_type(CUDA); @@ -982,8 +999,8 @@ OperatorDef* NetUtil::AddRecurrentNetworkOp(const std::string& seq_lengths, const std::string& hidden_output, const std::string& cell_state, bool force_cpu) { - NetDef forwardModel; - NetUtil forward(forwardModel); + NetDef forward_model; + NetUtil forward(forward_model); forward.SetName(scope); forward.SetType("rnn"); forward.AddInput("input_t"); @@ -1009,12 +1026,12 @@ OperatorDef* NetUtil::AddRecurrentNetworkOp(const std::string& seq_lengths, fc->mutable_device_option()->set_device_type(CUDA); sum->mutable_device_option()->set_device_type(CUDA); lstm->mutable_device_option()->set_device_type(CUDA); - forwardModel.mutable_device_option()->set_device_type(CUDA); + forward.SetDeviceCUDA(); } #endif - NetDef backwardModel; - NetUtil backward(backwardModel); + NetDef backward_model; + NetUtil backward(backward_model); backward.SetName("RecurrentBackwardStep"); backward.SetType("simple"); backward.AddGradientOp(*lstm); @@ -1035,11 +1052,9 @@ OperatorDef* NetUtil::AddRecurrentNetworkOp(const std::string& seq_lengths, backward.AddInput(seq_lengths); backward.AddInput(scope + "/hidden_t"); backward.AddInput(scope + "/cell_t"); -#ifdef WITH_CUDA if (!force_cpu) { - backwardModel.mutable_device_option()->set_device_type(CUDA); + backward.SetDeviceCUDA(); } -#endif auto op = AddOp("RecurrentNetwork",