CUDPP  2.3
CUDA Data-Parallel Primitives Library
multisplit_app.cu File Reference

CUDPP application-level multisplit routines. More...

#include <cub/cub.cuh>
#include "cuda_util.h"
#include "cudpp.h"
#include "cudpp_util.h"
#include "kernel/multisplit_kernel.cuh"

MultiSplit Functions

#define MULTISPLIT_WMS_K_ONE_ROLL   8
 
#define MULTISPLIT_WMS_K_TWO_ROLL   8
 
#define MULTISPLIT_WMS_K_THREE_ROLL   4
 
#define MULTISPLIT_WMS_K_FOUR_ROLL   4
 
#define MULTISPLIT_WMS_K_FIVE_ROLL   2
 
#define MULTISPLIT_WMS_KV_ONE_ROLL   4
 
#define MULTISPLIT_WMS_KV_TWO_ROLL   4
 
#define MULTISPLIT_WMS_KV_THREE_ROLL   2
 
#define MULTISPLIT_WMS_KV_FOUR_ROLL   2
 
#define MULTISPLIT_WMS_KV_FIVE_ROLL   2
 
#define MULTISPLIT_BMS_K_ONE_ROLL   8
 
#define MULTISPLIT_BMS_K_TWO_ROLL   8
 
#define MULTISPLIT_BMS_K_THREE_ROLL   4
 
#define MULTISPLIT_BMS_K_FOUR_ROLL   4
 
#define MULTISPLIT_BMS_K_FIVE_ROLL   4
 
#define MULTISPLIT_BMS_KV_ONE_ROLL   4
 
#define MULTISPLIT_BMS_KV_TWO_ROLL   4
 
#define MULTISPLIT_BMS_KV_THREE_ROLL   2
 
#define MULTISPLIT_BMS_KV_FOUR_ROLL   2
 
#define MULTISPLIT_BMS_KV_FIVE_ROLL   2
 
#define MULTISPLIT_SWITCH_STRATEGY_K   8
 
#define MULTISPLIT_SWITCH_STRATEGY_KV   8
 
#define MULTISPLIT_NUM_WARPS   8
 
#define MULTISPLIT_LOG_WARPS   3
 
#define MULTISPLIT_WARP_WIDTH   32
 
#define MULTISPLIT_TRHEADS_PER_BLOCK   (MULTISPLIT_WARP_WIDTH * MULTISPLIT_NUM_WARPS)
 
template<typename bucket_t , typename key_type >
void multisplit_WMS_prescan_function (key_type *d_key_in, uint32_t num_elements, bucket_t bucket_identifier, uint32_t num_buckets, uint32_t num_blocks_raw, uint32_t &num_blocks_pre, uint32_t &num_sub_problems, multisplit_context &context)
 
template<typename bucket_t , typename key_type >
void multisplit_WMS_pairs_prescan_function (key_type *d_key_in, uint32_t num_elements, bucket_t bucket_identifier, uint32_t num_buckets, uint32_t num_blocks_raw, uint32_t &num_blocks_pre, uint32_t &num_sub_problems, multisplit_context &context)
 
template<typename bucket_t , typename key_type >
void multisplit_WMS_postscan_function (key_type *d_key_in, key_type *d_key_out, uint32_t num_elements, bucket_t bucket_identifier, uint32_t num_buckets, uint32_t num_blocks_post, multisplit_context &context)
 
template<typename bucket_t , typename key_type , typename value_type >
void multisplit_WMS_pairs_postscan_function (key_type *d_key_in, value_type *d_value_in, key_type *d_key_out, value_type *d_value_out, uint32_t num_elements, bucket_t bucket_identifier, uint32_t num_buckets, uint32_t num_blocks_post, multisplit_context &context)
 
template<typename bucket_t , typename key_type >
void multisplit_BMS_prescan_function (key_type *d_key_in, uint32_t num_elements, bucket_t bucket_identifier, uint32_t num_buckets, uint32_t num_blocks_raw, uint32_t &num_blocks_pre, uint32_t &num_sub_problems, multisplit_context &context)
 
template<typename bucket_t , typename key_type >
void multisplit_BMS_pairs_prescan_function (key_type *d_key_in, uint32_t num_elements, bucket_t bucket_identifier, uint32_t num_buckets, uint32_t num_blocks_raw, uint32_t &num_blocks_pre, uint32_t &num_sub_problems, multisplit_context &context)
 
template<typename bucket_t , typename key_type >
void multisplit_BMS_postscan_function (key_type *d_key_in, key_type *d_key_out, uint32_t num_elements, bucket_t bucket_identifier, uint32_t num_buckets, uint32_t num_blocks_post, multisplit_context &context)
 
template<typename bucket_t , typename key_type , typename value_type >
void multisplit_BMS_pairs_postscan_function (key_type *d_key_in, value_type *d_value_in, key_type *d_key_out, value_type *d_value_out, uint32_t num_elements, bucket_t bucket_identifier, uint32_t num_buckets, uint32_t num_blocks_post, multisplit_context &context)
 
void multisplit_allocate_key_only (size_t num_elements, uint32_t num_buckets, multisplit_context &context)
 
void multisplit_allocate_key_value (size_t num_elements, uint32_t num_buckets, multisplit_context &context)
 
void multisplit_release_memory (multisplit_context &context)
 
template<typename key_type , typename bucket_t >
void multisplit_key_only (key_type *d_key_in, key_type *d_key_out, size_t num_elements, uint32_t num_buckets, multisplit_context &context, bucket_t bucket_identifier, bool in_place, uint32_t *bucket_offsets=NULL)
 Performs multisplit on keys only. More...
 
template<typename key_type , typename value_type , typename bucket_t >
void multisplit_key_value (key_type *d_key_in, value_type *d_value_in, key_type *d_key_out, value_type *d_value_out, size_t num_elements, uint32_t num_buckets, multisplit_context &context, bucket_t bucket_identifier, bool in_place, uint32_t *bucket_offsets=NULL)
 Performs multisplit on key-value pairs. More...
 
cub::CachingDeviceAllocator g_allocator (true)
 
template<class T >
void reducedBitSortKeysOnly (unsigned int *d_inp, uint numElements, uint numBuckets, T bucketMapper, const CUDPPMultiSplitPlan *plan)
 Performs multisplit on keys only using the reduced-bit sort method. More...
 
template<class T >
void reducedBitSortKeyValue (unsigned int *d_keys, unsigned int *d_values, unsigned int numElements, unsigned int numBuckets, T bucketMapper, const CUDPPMultiSplitPlan *plan)
 Performs multisplit on key-value pairs using a reduced-bit sort. More...
 
void allocMultiSplitStorage (CUDPPMultiSplitPlan *plan)
 From the programmer-specified multisplit configuration, creates internal memory for performing the multisplit. Different storage amounts are required depending on the number of buckets. More...
 
void freeMultiSplitStorage (CUDPPMultiSplitPlan *plan)
 Deallocates intermediate memory from allocMultiSplitStorage. More...
 
void cudppMultiSplitDispatch (unsigned int *d_keys, unsigned int *d_values, size_t numElements, size_t numBuckets, BucketMappingFunc bucketMappingFunc, const CUDPPMultiSplitPlan *plan)
 Dispatch function to perform multisplit on an array of elements into a number of buckets. More...
 

Detailed Description

CUDPP application-level multisplit routines.

multisplit_app.cu