-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBOT_convert_imgs_2_lmdb.cpp
135 lines (111 loc) · 3.57 KB
/
BOT_convert_imgs_2_lmdb.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#include <algorithm>
#include <fstream>
#include <string>
#include <utility>
#include <vector>
#include <algorithm>
#include "boost/scoped_ptr.hpp"
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/db.hpp"
#include "caffe/util/format.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/rng.hpp"
using namespace caffe;
using std::pair;
using boost::scoped_ptr;
std::vector<std::pair<std::string, int> > read_imgs_filelist(
const std::string& file_path)
{
std::ifstream infile(file_path.c_str());
std::vector<std::pair<std::string, int> > imgs;
std::string line;
size_t pos;
int label;
while(std::getline(infile, line))
{
pos = line.find_last_of(' ');
label = atoi(line.substr(pos + 1).c_str());
imgs.push_back(std::make_pair(line.substr(0, pos), label));
}
infile.close();
LOG(INFO) << "Finished load image file list";
return imgs;
}
void convert_imgs_format(std::vector<std::pair<std::string, int> >& imgs,
const std::string& format, const std::string& db_name, const std::string& root)
{
const bool is_color = true;
const bool check_size = false;
const bool encoded = false;
const std::string encode_type = "";
LOG(INFO) << "data total size " << imgs.size();
LOG(INFO) << "Shuffle data...";
shuffle(imgs.begin(), imgs.end());
int resize_height = 224;
int resize_width = 224;
scoped_ptr<db::DB> db(db::GetDB(format));
db->Open(db_name, db::NEW);
scoped_ptr<db::Transaction> txn(db->NewTransaction());
std::string root_floder = root;
//std::string root_floder = "";
Datum datum;
int count = 0;
int data_size = 0;
bool data_size_initialized = false;
LOG(INFO) << "Start convert images";
for (int i = 0; i < imgs.size(); ++ i)
{
bool status;
std::string enc = encode_type;
if(encoded && !enc.size())
{}
status = ReadImageToDatum(root_floder + imgs[i].first,
imgs[i].second, resize_height, resize_width, is_color,
enc, &datum);
if (status == false)
{
continue;
}
if (check_size)
{
if (!data_size_initialized)
{
data_size = datum.channels() * datum.height() * datum.width();
data_size_initialized = true;
}
else
{
const std::string& data = datum.data();
CHECK_EQ(data.size(), data_size) << "Incorrect data field size "
<< data.size();
}
}
std::string key_str = caffe::format_int(i, 8) + "_" + imgs[i].first;
std::string out;
CHECK(datum.SerializeToString(&out));
txn->Put(key_str, out);
if (++ count % 1000 == 0)
{
txn->Commit();
txn.reset(db->NewTransaction());
LOG(INFO) << "Processed " << count << " files.";
}
}
if (count % 1000 != 0)
{
txn->Commit();
LOG(INFO) << "Processed " << count << " files.";
}
LOG(INFO) << "finished convert images";
}
int main(int argc, char** argv)
{
const std::string root = "examples/BOT/";
std::vector<std::pair<std::string, int> > train_imgs = read_imgs_filelist(root + "train.txt");
std::vector<std::pair<std::string, int> > val_imgs = read_imgs_filelist(root + "val.txt");
convert_imgs_format(train_imgs, "lmdb", root + "train_imgs_lmdb", root);
//convert_imgs_format(val_imgs, "lmdb", root + "val_imgs_lmdb", root);
return 0;
}