|
CUDPP
2.2
CUDA Data-Parallel Primitives Library
|
CUDPP CTA-level sort routines. More...
#include <cudpp_globals.h>#include "cudpp_mergesort.h"#include <cudpp.h>#include <stdio.h>#include <cudpp_util.h>#include <math.h>#include "sharedmem.h"Merge Sort Functions | |
| #define | BLOCKSORT_SIZE 1024 |
| #define | CTA_BLOCK 128 |
| #define | DEPTH_simple 2 |
| #define | DEPTH_multi 4 |
| #define | CTASIZE_simple 256 |
| #define | CTASIZE_multi 128 |
| #define | INTERSECT_A_BLOCK_SIZE_simple DEPTH_simple*CTASIZE_simple |
| #define | INTERSECT_B_BLOCK_SIZE_simple 2*DEPTH_simple*CTASIZE_simple |
| #define | INTERSECT_A_BLOCK_SIZE_multi DEPTH_multi*CTASIZE_multi |
| #define | INTERSECT_B_BLOCK_SIZE_multi 2*DEPTH_multi*CTASIZE_multi |
| typedef unsigned int | uint |
| template<class T , int depth> | |
| __device__ void | bin_search_block (T &cmpValue, T tmpVal, T *in, unsigned int &j, unsigned int bump, unsigned int addPart) |
| Binary search within a single block (blockSort) More... | |
| template<class T , int depth> | |
| __device__ void | lin_search_block (T &cmpValue, T mVal, unsigned int &tmpVal, T *in, unsigned int *addressPad, unsigned int &j, unsigned int offset, unsigned int last, unsigned int startAddress, unsigned int addPart) |
| Linear search within a single block (blockSort) More... | |
| template<class T > | |
| __device__ void | compareSwapVal (T &A1, T &A2, unsigned int &ref1, unsigned int &ref2) |
| For blockSort. Compares two values and decides to swap if A1 > A2. More... | |
| template<class T > | |
| __device__ void | binSearch_fragment_lower (T *binArray, int offset, int &mid, T testValue) |
| template<class T > | |
| __device__ void | binSearch_fragment_higher (T *binArray, int offset, int &mid, T testValue) |
| template<class T > | |
| __device__ void | binSearch_whole_lower (T *BKeys, int &index, T myKey) |
| template<class T > | |
| __device__ void | binSearch_whole_higher (T *BKeys, int &index, T myKey) |
| template<class T , int depth> | |
| __device__ void | linearMerge_lower (T *searchArray, T myKey, unsigned int myVal, int &index, T *saveGlobalArray, unsigned int *saveValueArray, int myStartIdxC, T nextMaxB, int localAPartSize, int localBPartSize, T localMaxB, T localMinB, int aIndex, int bIndex, int offset) |
| Performs a linear search in our shared memory (done after binary search). It merges the partition on the left side with the associated partition on the right side. More... | |
| template<class T , int depth> | |
| __device__ void | linearMerge_higher (T *searchArray, T myKey, unsigned int myVal, int &index, T *saveGlobalArray, unsigned int *saveValueArray, int myStartIdxC, T localMinB, T nextMaxB, int aIndex, int bIndex, int offset, int localAPartSize, int localBPartSize) |
| Performs a linear search in our shared memory (done after binary search). It merges the partition on the right side with the associated partition on the left side. More... | |
CUDPP CTA-level sort routines.
sort_cta.cu
1.8.6