CUDPP
2.3
CUDA Data-Parallel Primitives Library
|
CUDPP application-level multisplit routines. More...
#include <cub/cub.cuh>
#include "cuda_util.h"
#include "cudpp.h"
#include "cudpp_util.h"
#include "kernel/multisplit_kernel.cuh"
MultiSplit Functions | |
#define | MULTISPLIT_WMS_K_ONE_ROLL 8 |
#define | MULTISPLIT_WMS_K_TWO_ROLL 8 |
#define | MULTISPLIT_WMS_K_THREE_ROLL 4 |
#define | MULTISPLIT_WMS_K_FOUR_ROLL 4 |
#define | MULTISPLIT_WMS_K_FIVE_ROLL 2 |
#define | MULTISPLIT_WMS_KV_ONE_ROLL 4 |
#define | MULTISPLIT_WMS_KV_TWO_ROLL 4 |
#define | MULTISPLIT_WMS_KV_THREE_ROLL 2 |
#define | MULTISPLIT_WMS_KV_FOUR_ROLL 2 |
#define | MULTISPLIT_WMS_KV_FIVE_ROLL 2 |
#define | MULTISPLIT_BMS_K_ONE_ROLL 8 |
#define | MULTISPLIT_BMS_K_TWO_ROLL 8 |
#define | MULTISPLIT_BMS_K_THREE_ROLL 4 |
#define | MULTISPLIT_BMS_K_FOUR_ROLL 4 |
#define | MULTISPLIT_BMS_K_FIVE_ROLL 4 |
#define | MULTISPLIT_BMS_KV_ONE_ROLL 4 |
#define | MULTISPLIT_BMS_KV_TWO_ROLL 4 |
#define | MULTISPLIT_BMS_KV_THREE_ROLL 2 |
#define | MULTISPLIT_BMS_KV_FOUR_ROLL 2 |
#define | MULTISPLIT_BMS_KV_FIVE_ROLL 2 |
#define | MULTISPLIT_SWITCH_STRATEGY_K 8 |
#define | MULTISPLIT_SWITCH_STRATEGY_KV 8 |
#define | MULTISPLIT_NUM_WARPS 8 |
#define | MULTISPLIT_LOG_WARPS 3 |
#define | MULTISPLIT_WARP_WIDTH 32 |
#define | MULTISPLIT_TRHEADS_PER_BLOCK (MULTISPLIT_WARP_WIDTH * MULTISPLIT_NUM_WARPS) |
template<typename bucket_t , typename key_type > | |
void | multisplit_WMS_prescan_function (key_type *d_key_in, uint32_t num_elements, bucket_t bucket_identifier, uint32_t num_buckets, uint32_t num_blocks_raw, uint32_t &num_blocks_pre, uint32_t &num_sub_problems, multisplit_context &context) |
template<typename bucket_t , typename key_type > | |
void | multisplit_WMS_pairs_prescan_function (key_type *d_key_in, uint32_t num_elements, bucket_t bucket_identifier, uint32_t num_buckets, uint32_t num_blocks_raw, uint32_t &num_blocks_pre, uint32_t &num_sub_problems, multisplit_context &context) |
template<typename bucket_t , typename key_type > | |
void | multisplit_WMS_postscan_function (key_type *d_key_in, key_type *d_key_out, uint32_t num_elements, bucket_t bucket_identifier, uint32_t num_buckets, uint32_t num_blocks_post, multisplit_context &context) |
template<typename bucket_t , typename key_type , typename value_type > | |
void | multisplit_WMS_pairs_postscan_function (key_type *d_key_in, value_type *d_value_in, key_type *d_key_out, value_type *d_value_out, uint32_t num_elements, bucket_t bucket_identifier, uint32_t num_buckets, uint32_t num_blocks_post, multisplit_context &context) |
template<typename bucket_t , typename key_type > | |
void | multisplit_BMS_prescan_function (key_type *d_key_in, uint32_t num_elements, bucket_t bucket_identifier, uint32_t num_buckets, uint32_t num_blocks_raw, uint32_t &num_blocks_pre, uint32_t &num_sub_problems, multisplit_context &context) |
template<typename bucket_t , typename key_type > | |
void | multisplit_BMS_pairs_prescan_function (key_type *d_key_in, uint32_t num_elements, bucket_t bucket_identifier, uint32_t num_buckets, uint32_t num_blocks_raw, uint32_t &num_blocks_pre, uint32_t &num_sub_problems, multisplit_context &context) |
template<typename bucket_t , typename key_type > | |
void | multisplit_BMS_postscan_function (key_type *d_key_in, key_type *d_key_out, uint32_t num_elements, bucket_t bucket_identifier, uint32_t num_buckets, uint32_t num_blocks_post, multisplit_context &context) |
template<typename bucket_t , typename key_type , typename value_type > | |
void | multisplit_BMS_pairs_postscan_function (key_type *d_key_in, value_type *d_value_in, key_type *d_key_out, value_type *d_value_out, uint32_t num_elements, bucket_t bucket_identifier, uint32_t num_buckets, uint32_t num_blocks_post, multisplit_context &context) |
void | multisplit_allocate_key_only (size_t num_elements, uint32_t num_buckets, multisplit_context &context) |
void | multisplit_allocate_key_value (size_t num_elements, uint32_t num_buckets, multisplit_context &context) |
void | multisplit_release_memory (multisplit_context &context) |
template<typename key_type , typename bucket_t > | |
void | multisplit_key_only (key_type *d_key_in, key_type *d_key_out, size_t num_elements, uint32_t num_buckets, multisplit_context &context, bucket_t bucket_identifier, bool in_place, uint32_t *bucket_offsets=NULL) |
Performs multisplit on keys only. More... | |
template<typename key_type , typename value_type , typename bucket_t > | |
void | multisplit_key_value (key_type *d_key_in, value_type *d_value_in, key_type *d_key_out, value_type *d_value_out, size_t num_elements, uint32_t num_buckets, multisplit_context &context, bucket_t bucket_identifier, bool in_place, uint32_t *bucket_offsets=NULL) |
Performs multisplit on key-value pairs. More... | |
cub::CachingDeviceAllocator | g_allocator (true) |
template<class T > | |
void | reducedBitSortKeysOnly (unsigned int *d_inp, uint numElements, uint numBuckets, T bucketMapper, const CUDPPMultiSplitPlan *plan) |
Performs multisplit on keys only using the reduced-bit sort method. More... | |
template<class T > | |
void | reducedBitSortKeyValue (unsigned int *d_keys, unsigned int *d_values, unsigned int numElements, unsigned int numBuckets, T bucketMapper, const CUDPPMultiSplitPlan *plan) |
Performs multisplit on key-value pairs using a reduced-bit sort. More... | |
void | allocMultiSplitStorage (CUDPPMultiSplitPlan *plan) |
From the programmer-specified multisplit configuration, creates internal memory for performing the multisplit. Different storage amounts are required depending on the number of buckets. More... | |
void | freeMultiSplitStorage (CUDPPMultiSplitPlan *plan) |
Deallocates intermediate memory from allocMultiSplitStorage. More... | |
void | cudppMultiSplitDispatch (unsigned int *d_keys, unsigned int *d_values, size_t numElements, size_t numBuckets, BucketMappingFunc bucketMappingFunc, const CUDPPMultiSplitPlan *plan) |
Dispatch function to perform multisplit on an array of elements into a number of buckets. More... | |
CUDPP application-level multisplit routines.