c++ - Basic OpenMP Parallel Program Not Scaling As Expected -
#include <iostream> #include <vector> #include <stdexcept> #include <sstream> #include <omp.h> std::vector<int> col_sums(const std::vector<std::vector<short>>& data) { unsigned int height = data.size(), width = data[0].size(); std::vector<int> totalsums(width, 0), threadsums(width, 0); #pragma omp parallel firstprivate(threadsums) { #pragma omp parallel (unsigned int = 0; < height; i++) { threadsums.data()[0:width] += data[i].data()[0:width]; } #pragma omp critical { totalsums.data()[0:width] += threadsums.data()[0:width]; } } return totalsums; } int main(int argc, char** argv) { if (argc < 3) { std::cout << "run program \"executable <rows> <columns>\n"; } else { std::stringstream args; args << argv[1] << " " << argv[2]; int rows, columns; args >> rows >> columns; std::vector<std::vector<short>> data(rows, std::vector<short>(columns)); std::vector<int> columnsums = col_sums(data); } }
export omp_num_threads=4
icpc -ofast -fopenmp -g dummy.cpp -o dummy
/usr/bin/time -v ./dummy 115000 20000
- cpu% = 225% (should 380%+)
i'm experienced openmp , cilkplus, barrier scaling here eludes me, , rudimentary program. know has obvious, feel i've erased data hazards , control hazards. i'm totally stumped.
Comments
Post a Comment