-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregular_distributed.cc
152 lines (123 loc) · 4.48 KB
/
regular_distributed.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#include <mpi.h>
#include <algorithm>
#include <chrono>
#include <fstream>
#include <iostream>
#include "matrix_operators.hpp"
#include "mpi_operators.hpp"
using namespace std;
void conv2(int const &ah, int const &aw, int const &bh, int const &bw, int &ch,
int &cw, double const *const *const a, double const *const *const b,
double **&c, int const &prank, int const &csize) {
// initialize height & width
cw = aw + bw - 1;
ch = ah + bh - 1;
// sendcounts, displs
int *sendcounts = create_elementwise_sendcounts(csize, ch * cw);
int *displs = create_displs(sendcounts, csize);
// allocate memory
if (!prank) alloc_matrix(ch, cw, c);
double *c_local = new double[sendcounts[prank]]();
// convolution
for (int c_it = 0; c_it < sendcounts[prank]; ++c_it) {
int ci = (displs[prank] + c_it) / cw;
int cj = (displs[prank] + c_it) % cw;
int aw_start = std::max(0, cj - bw + 1);
// int bw_start = std::max(0, cj - aw + 1);
int aw_end = std::min(aw, cj + 1);
int bw_end = std::min(bw, cj + 1);
int ah_start = std::max(0, ci - bh + 1);
// int bh_start = std::max(0, ci - ah + 1);
int ah_end = std::min(ah, ci + 1);
int bh_end = std::min(bh, ci + 1);
int wlen = aw_end - aw_start; // same as bw_end - bw_start
int hlen = ah_end - ah_start; // same as bh_end - bh_start
for (int i = 0; i < hlen; ++i)
for (int j = 0; j < wlen; ++j)
c_local[c_it] +=
a[ah_start + i][aw_start + j] * b[bh_end - i - 1][bw_end - j - 1];
}
// gatherv
if (!prank)
MPI_Gatherv(c_local, sendcounts[prank], MPI_DOUBLE, c[0], sendcounts, displs,
MPI_DOUBLE, 0, MPI_COMM_WORLD);
else
MPI_Gatherv(c_local, sendcounts[prank], MPI_DOUBLE, NULL, sendcounts,
displs, MPI_DOUBLE, 0, MPI_COMM_WORLD);
}
void read(char const *const file_name, int &ah, int &aw, int &bh, int &bw,
double **&matrix, double **&kernel) {
ifstream f(file_name);
f >> ah >> aw >> bh >> bw;
alloc_matrix(ah, aw, matrix);
alloc_matrix(bh, bw, kernel);
for (int i = 0; i < ah; ++i)
for (int j = 0; j < aw; ++j) f >> matrix[i][j];
for (int i = 0; i < bh; ++i)
for (int j = 0; j < bw; ++j) f >> kernel[i][j];
f.close();
}
void write(char const *const matrix_file_name, char const *const time_file_name,
double const &mind, int const &ch, int const &cw,
double const *const *const conv) {
// print_matrix(ch, cw, conv);
fprint_matrix(matrix_file_name, ch, cw, conv);
// // cout << "Elapsed time distributed (" << csize
// // << " processes) = " << long(mind * 1000000000) << " [ns] " << endl;
ofstream f(time_file_name, std::ios_base::app);
f << long(mind * 1000000000) << std::endl;
f.close();
}
int main(int argc, char *argv[]) {
// init mpi
int initialized, csize, prank;
MPI_Initialized(&initialized);
if (!initialized) MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &csize);
MPI_Comm_rank(MPI_COMM_WORLD, &prank);
double **matrix, **kernel, **conv;
int ah, aw, bh, bw, ch, cw;
// read sizes and matrices
if (!prank) {
char const file_name[] = "input/input.txt";
read(file_name, ah, aw, bh, bw, matrix, kernel);
}
// start time measurement
double start_time = MPI_Wtime();
MPI_Bcast(&ah, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&aw, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&bh, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&bw, 1, MPI_INT, 0, MPI_COMM_WORLD);
// alloc matrices
if (prank) {
alloc_matrix(ah, aw, matrix);
alloc_matrix(bh, bw, kernel);
}
// distribute matrices
MPI_Bcast(matrix[0], ah * aw, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(kernel[0], bh * bw, MPI_DOUBLE, 0, MPI_COMM_WORLD);
conv2(ah, aw, bh, bw, ch, cw, matrix, kernel, conv, prank, csize);
// dealloc matrices
if (prank) {
dealloc_matrix(ah, matrix);
dealloc_matrix(bh, kernel);
}
// end time measurement
double end_time = MPI_Wtime();
double total_time = end_time - start_time;
double mind;
MPI_Reduce(&total_time, &mind, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
if (!prank) {
char const matrix_file_name[] = "outputs/regular_distributed_matrix.txt";
char const time_file_name[] = "outputs/regular_distributed_time.txt";
write(matrix_file_name, time_file_name, mind, ch, cw, conv);
dealloc_matrix(ah, matrix);
dealloc_matrix(bh, kernel);
dealloc_matrix(ch, conv);
}
// finalize mpi
int finalized;
MPI_Finalized(&finalized);
if (!finalized) MPI_Finalize();
return 0;
}