const size_t L = 150;
const size_t M = 225;
const size_t N = 300;
class MatrixMultiplyBody2D {
const float (*my_a)[L];
const float (*my_b)[N];
float (*my_c)[N];
public:
void operator()( const blocked_range2d<size_t>& r ) const {
for( size_t i=r.rows().begin(); i!=r.rows().end(); ++i ){
for( size_t j=r.cols().begin(); j!=r.cols().end(); ++j ) {
float sum = 0;
for( size_t k=0; k<L; ++k )
sum += my_a[i][k]*my_b[k][j];
my_c[i][j] = sum;
}
}
}
MatrixMultiplyBody2D( float c[M][N], const float a[M][L], const float b[L][N] ) :
my_a(a), my_b(b), my_c(c)
{}
};
//define parallel function
void ParallelMatrixMultiply(float c[M][N], const float a[M][L], const float b[L][N]){
//first 0: lower bound for rows, M: upper bound for rows
//second 0: lower bound for rows, N: upper bound for rows
//2: grain size for rows, 5: grain size for cols
parallel_for( blocked_range2d<size_t>(0, M, 2, 0, N, 5),
MatrixMultiplyBody2D(c,a,b) );
}
int main(int argc, char** argv)
{
float c[M][N], a[M][L], b[L][N];
//opencv random geration class
RNG rng;
//generate random number for a
for (int i = 0; i < M; i++)
{
for (int j = 0; j < L; j++)
{
a[i][j] = rng.gaussian(0.5);
}
}
//generate random number for b
for (int i = 0; i < L; i++)
{
for (int j = 0; j < N; j++)
{
b[i][j] = rng.gaussian(0.5);
}
}
//use parallel function
ParallelMatrixMultiply(c,a,b);
//verification
cout << "c[224][299] = " << c[224][299] <<endl;
float sum = 0;
for( size_t k=0; k<L; ++k )
sum += a[224][k]*b[k][299];
cout << "a[224][149]* b[149][299] = " << sum << endl;
return 0;
}