Hello there. I'm a student and i'm trying some experiments with CilkPlus of icc 15. I'm using Ubuntu 12.04 with x64 Intel Processor.
The following code is an implementation of a radix sorting algorithm of an octree using points' morton codes. The problem is that it seems that though cilk decides not to spawn a new thread in one of the 8 recursive calls, it also skips calling the function serially. This results in producing a non-complete sorted index vector, whose size is less than the original index vector's size and thus it doesn't apply sorting to all points. This is not happening if i implement serially the bin splitting and i apply cilk_for only to the recursive calls. Can you explain to me what's happening? Is there an alternative implementation? Should i correct something?
#include <cstdlib>
#include <cstdio>
#include <vector>
#include <cilk/cilk.h>
#include <cilk/reducer_vector.h>
#define MAXBINS 8
typedef std::vector<unsigned int> UIVector;
typedef std::vector<unsigned long int> ULIVector;
typedef cilk::reducer< cilk::op_vector<unsigned int> > UIVectorReducer;
typedef cilk::reducer< cilk::op_vector<unsigned long int> > ULIVectorReducer;
void truncated_radix_sort(const ULIVector& morton_codes,
ULIVector* sorted_morton_codes,
const UIVector& index,
UIVector* permutation_vector,
unsigned int *level_record,
int population_threshold,
int sft, int lv)
{
int N = index.size();
if (N <= 0)
{
return;
}
else if (N <= population_threshold || sft < 0)
{ // Base case. The node is a leaf
level_record[0] = lv; // record the level of the node
*permutation_vector = index;
*sorted_morton_codes = morton_codes;
return;
}
else
{
int i, j;
level_record[0] = lv;
/* Place point to a bin according to its morton code */
UIVectorReducer* bins_reducer = new UIVectorReducer[MAXBINS];
ULIVectorReducer* bin_codes_reducer = new ULIVectorReducer[MAXBINS];
cilk_for (j = 0; j < N; j++)
{
unsigned int ii = (morton_codes[j]>>sft) & 0x07;
(bins_reducer[ii])->push_back(index[j]);
(bin_codes_reducer[ii])->push_back(morton_codes[j]);
}
UIVector* sorted_bins = new UIVector[MAXBINS];
ULIVector* sorted_codes = new ULIVector[MAXBINS];
int offsets[MAXBINS];
offsets[0] = 0;
for (i = 1; i < MAXBINS; i++)
{
offsets[i] = offsets[i-1] +
bins_reducer[i-1].get_value().size();
}
/* Call the function recursively to split the lower levels */
for (i = 0; i < MAXBINS; i++)
{
cilk_spawn truncated_radix_sort(
bin_codes_reducer[i].get_value(), &sorted_codes[i],
bins_reducer[i].get_value(), &sorted_bins[i],&level_record[offsets[i]],
population_threshold,
sft-3, lv+1);
}
cilk_sync;
/* Merge sorted vectors */
permutation_vector->reserve(N);
sorted_morton_codes->reserve(N);
for (i = 0; i < MAXBINS; i++)
{
permutation_vector->insert(permutation_vector->end(), sorted_bins[i].begin(), sorted_bins[i].end());
sorted_morton_codes->insert(sorted_morton_codes->end(), sorted_codes[i].begin(), sorted_codes[i].end());
}
delete[] sorted_bins;
delete[] sorted_codes;
delete[] bins_reducer;
delete[] bin_codes_reducer;
}
}Thank you in advance.