CUDPP
2.1
CUDA Data-Parallel Primitives Library
|
CUDPP CTA-level sort routines. More...
#include <cudpp_globals.h>
#include "cudpp_mergesort.h"
#include <cudpp.h>
#include <stdio.h>
#include <cudpp_util.h>
#include <math.h>
#include "sharedmem.h"
Merge Sort Functions | |
#define | BLOCKSORT_SIZE 1024 |
#define | CTA_BLOCK 128 |
#define | DEPTH_simple 2 |
#define | DEPTH_multi 4 |
#define | CTASIZE_simple 256 |
#define | CTASIZE_multi 128 |
#define | INTERSECT_A_BLOCK_SIZE_simple DEPTH_simple*CTASIZE_simple |
#define | INTERSECT_B_BLOCK_SIZE_simple 2*DEPTH_simple*CTASIZE_simple |
#define | INTERSECT_A_BLOCK_SIZE_multi DEPTH_multi*CTASIZE_multi |
#define | INTERSECT_B_BLOCK_SIZE_multi 2*DEPTH_multi*CTASIZE_multi |
typedef unsigned int | uint |
template<class T , int depth> | |
__device__ void | bin_search_block (T &cmpValue, T tmpVal, T *in, unsigned int &j, unsigned int bump, unsigned int addPart) |
Binary search within a single block (blockSort) More... | |
template<class T , int depth> | |
__device__ void | lin_search_block (T &cmpValue, T mVal, unsigned int &tmpVal, T *in, unsigned int *addressPad, unsigned int &j, unsigned int offset, unsigned int last, unsigned int startAddress, unsigned int addPart) |
Linear search within a single block (blockSort) More... | |
template<class T > | |
__device__ void | compareSwapVal (T &A1, T &A2, unsigned int &ref1, unsigned int &ref2) |
For blockSort. Compares two values and decides to swap if A1 > A2. More... | |
template<class T > | |
__device__ void | binSearch_fragment_lower (T *binArray, int offset, int &mid, T testValue) |
template<class T > | |
__device__ void | binSearch_fragment_higher (T *binArray, int offset, int &mid, T testValue) |
template<class T > | |
__device__ void | binSearch_whole_lower (T *BKeys, int &index, T myKey) |
template<class T > | |
__device__ void | binSearch_whole_higher (T *BKeys, int &index, T myKey) |
template<class T , int depth> | |
__device__ void | linearMerge_lower (T *searchArray, T myKey, unsigned int myVal, int &index, T *saveGlobalArray, unsigned int *saveValueArray, int myStartIdxC, T nextMaxB, int localAPartSize, int localBPartSize, T localMaxB, T localMinB, int aIndex, int bIndex, int offset) |
Performs a linear search in our shared memory (done after binary search). It merges the partition on the left side with the associated partition on the right side. More... | |
template<class T , int depth> | |
__device__ void | linearMerge_higher (T *searchArray, T myKey, unsigned int myVal, int &index, T *saveGlobalArray, unsigned int *saveValueArray, int myStartIdxC, T localMinB, T nextMaxB, int aIndex, int bIndex, int offset, int localAPartSize, int localBPartSize) |
Performs a linear search in our shared memory (done after binary search). It merges the partition on the right side with the associated partition on the left side. More... | |
CUDPP CTA-level sort routines.
sort_cta.cu