Hello there. I'm a student and i'm trying some experiments with CilkPlus of icc 15. I'm using Ubuntu 12.04 with x64 Intel Processor.
The following code is an implementation of a radix sorting algorithm of an octree using points' morton codes. The problem is that it seems that though cilk decides not to spawn a new thread in one of the 8 recursive calls, it also skips calling the function serially. This results in producing a non-complete sorted index vector, whose size is less than the original index vector's size and thus it doesn't apply sorting to all points. This is not happening if i implement serially the bin splitting and i apply cilk_for only to the recursive calls. Can you explain to me what's happening? Is there an alternative implementation? Should i correct something?
#include <cstdlib> #include <cstdio> #include <vector> #include <cilk/cilk.h> #include <cilk/reducer_vector.h> #define MAXBINS 8 typedef std::vector<unsigned int> UIVector; typedef std::vector<unsigned long int> ULIVector; typedef cilk::reducer< cilk::op_vector<unsigned int> > UIVectorReducer; typedef cilk::reducer< cilk::op_vector<unsigned long int> > ULIVectorReducer; void truncated_radix_sort(const ULIVector& morton_codes, ULIVector* sorted_morton_codes, const UIVector& index, UIVector* permutation_vector, unsigned int *level_record, int population_threshold, int sft, int lv) { int N = index.size(); if (N <= 0) { return; } else if (N <= population_threshold || sft < 0) { // Base case. The node is a leaf level_record[0] = lv; // record the level of the node *permutation_vector = index; *sorted_morton_codes = morton_codes; return; } else { int i, j; level_record[0] = lv; /* Place point to a bin according to its morton code */ UIVectorReducer* bins_reducer = new UIVectorReducer[MAXBINS]; ULIVectorReducer* bin_codes_reducer = new ULIVectorReducer[MAXBINS]; cilk_for (j = 0; j < N; j++) { unsigned int ii = (morton_codes[j]>>sft) & 0x07; (bins_reducer[ii])->push_back(index[j]); (bin_codes_reducer[ii])->push_back(morton_codes[j]); } UIVector* sorted_bins = new UIVector[MAXBINS]; ULIVector* sorted_codes = new ULIVector[MAXBINS]; int offsets[MAXBINS]; offsets[0] = 0; for (i = 1; i < MAXBINS; i++) { offsets[i] = offsets[i-1] + bins_reducer[i-1].get_value().size(); } /* Call the function recursively to split the lower levels */ for (i = 0; i < MAXBINS; i++) { cilk_spawn truncated_radix_sort( bin_codes_reducer[i].get_value(), &sorted_codes[i], bins_reducer[i].get_value(), &sorted_bins[i],&level_record[offsets[i]], population_threshold, sft-3, lv+1); } cilk_sync; /* Merge sorted vectors */ permutation_vector->reserve(N); sorted_morton_codes->reserve(N); for (i = 0; i < MAXBINS; i++) { permutation_vector->insert(permutation_vector->end(), sorted_bins[i].begin(), sorted_bins[i].end()); sorted_morton_codes->insert(sorted_morton_codes->end(), sorted_codes[i].begin(), sorted_codes[i].end()); } delete[] sorted_bins; delete[] sorted_codes; delete[] bins_reducer; delete[] bin_codes_reducer; } }
Thank you in advance.