|
CUDPP
2.2
CUDA Data-Parallel Primitives Library
|
CUDPP CTA-level sort routines. More...
#include <cudpp_globals.h>#include "cudpp_stringsort.h"#include <cudpp.h>#include <stdio.h>#include <cudpp_util.h>#include <math.h>#include "sharedmem.h"Merge Sort Functions | |
| #define | BLOCKSORT_SIZE 1024 |
| #define | CTA_BLOCK 128 |
| #define | DEPTH_simple 2 |
| #define | DEPTH_multi 2 |
| #define | CTASIZE_simple 256 |
| #define | CTASIZE_multi 256 |
| #define | INTERSECT_A_BLOCK_SIZE_simple DEPTH_simple*CTASIZE_simple |
| #define | INTERSECT_B_BLOCK_SIZE_simple 2*DEPTH_simple*CTASIZE_simple |
| #define | INTERSECT_A_BLOCK_SIZE_multi DEPTH_multi*CTASIZE_multi |
| #define | INTERSECT_B_BLOCK_SIZE_multi 2*DEPTH_multi*CTASIZE_multi |
| typedef unsigned int | uint |
| __device__ int | tie_break_simp (unsigned int myLoc, unsigned int cmpLoc, unsigned int myBound, unsigned int cmpBound, unsigned int myAdd, unsigned int cmpAdd, unsigned int *stringLoc, unsigned int stringSize, unsigned char termC) |
| Breaks ties in keys (first four characters) returns true if cmpVal > myVal false otherwise. More... | |
| template<class T , int depth> | |
| __device__ void | bin_search_block_string (T &cmpValue, T tmpVal, T *in, T *addressPad, T *stringVals, int &j, int bump, int sizeRemain, unsigned int stringSize, unsigned char termC) |
| Binary search within a single block (blockSort) More... | |
| template<class T , int depth> | |
| __device__ void | lin_search_block_string (T &cmpValue, T &tmpVal, T *in, T *addressPad, T *stringVals, int &j, int offset, int last, int startAddress, int stringSize, unsigned char termC) |
| Linear search within a single block (blockSort) More... | |
| template<class T > | |
| __device__ void | compareSwapVal (T &A1, T &A2, const int index1, const int index2, T *scratch, T *stringVals, unsigned int size, unsigned char termC) |
| For blockSort. Compares two values and decides to swap if A1 > A2. More... | |
| template<class T , int depth> | |
| __device__ void | binSearch_fragment (T *keys, T *address, int offset, int &mid, T cmpValue, T testValue, T myAddress, int myLoc, int cmpLoc, int myBound, int cmpBound, T *globalStringArray, int stringSize, unsigned char termC) |
| Performs a binary search in our shared memory, with tie breaks for strings. More... | |
| template<class T , int depth> | |
| __device__ void | binSearch_frag_mult (T *keyArraySmem, T *valueArraySmem, int offset, int &mid, T cmpValue, T testValue, int myAddress, T *globalStringArray, int myStartIdxA, int myStartIdxB, int aIndex, int bIndex, int size, int stringSize, unsigned char termC) |
| template<class T , int depth> | |
| __device__ void | lin_merge_simple (T &cmpValue, T myKey, T myAddress, int &index, T *BKeys, T *BValues, T *stringValues, T *A_keys, T *A_values, T *A_keys_out, T *A_values_out, int myStartIdxA, int myStartIdxB, int myStartIdxC, T localMinB, T localMaxB, int aCont, int bCont, int totalSize, int mySizeA, int mySizeB, unsigned int stringSize, int i, int stepNum, bool &placed, unsigned char termC) |
| Performs a linear search in our shared memory (done after binary search), with tie breaks for strings. More... | |
| template<class T , int depth> | |
| __device__ void | linearStringMerge (T *BKeys, T *BValues, T myKey, T myAddress, bool &placed, int &index, T &cmpValue, T *A_keys, T *A_values, T *A_keys_out, T *A_values_out, T *stringValues, int myStartIdxC, int myStartIdxA, int myStartIdxB, int localAPartSize, int localBPartSize, int localCPartSize, T localMaxB, T localMinB, int tid, int aIndex, int bIndex, int i, int stringSize, int totalSize, unsigned char termC) |
| Performs a linear search in our shared memory, used by multiMerge kernel. More... | |
CUDPP CTA-level sort routines.
stringsort_cta.cu
1.8.6