CUDPP
2.1
CUDA Data-Parallel Primitives Library
|
CUDPP CTA-level sort routines. More...
#include <cudpp_globals.h>
#include "cudpp_stringsort.h"
#include <cudpp.h>
#include <stdio.h>
#include <cudpp_util.h>
#include <math.h>
#include "sharedmem.h"
Merge Sort Functions | |
#define | BLOCKSORT_SIZE 1024 |
#define | CTA_BLOCK 128 |
#define | DEPTH_simple 2 |
#define | DEPTH_multi 2 |
#define | CTASIZE_simple 256 |
#define | CTASIZE_multi 256 |
#define | INTERSECT_A_BLOCK_SIZE_simple DEPTH_simple*CTASIZE_simple |
#define | INTERSECT_B_BLOCK_SIZE_simple 2*DEPTH_simple*CTASIZE_simple |
#define | INTERSECT_A_BLOCK_SIZE_multi DEPTH_multi*CTASIZE_multi |
#define | INTERSECT_B_BLOCK_SIZE_multi 2*DEPTH_multi*CTASIZE_multi |
typedef unsigned int | uint |
__device__ int | tie_break_simp (unsigned int myLoc, unsigned int cmpLoc, unsigned int myBound, unsigned int cmpBound, unsigned int myAdd, unsigned int cmpAdd, unsigned int *stringLoc, unsigned int stringSize, unsigned char termC) |
Breaks ties in keys (first four characters) returns true if cmpVal > myVal false otherwise. More... | |
template<class T , int depth> | |
__device__ void | bin_search_block_string (T &cmpValue, T tmpVal, T *in, T *addressPad, T *stringVals, int &j, int bump, int sizeRemain, unsigned int stringSize, unsigned char termC) |
Binary search within a single block (blockSort) More... | |
template<class T , int depth> | |
__device__ void | lin_search_block_string (T &cmpValue, T &tmpVal, T *in, T *addressPad, T *stringVals, int &j, int offset, int last, int startAddress, int stringSize, unsigned char termC) |
Linear search within a single block (blockSort) More... | |
template<class T > | |
__device__ void | compareSwapVal (T &A1, T &A2, const int index1, const int index2, T *scratch, T *stringVals, unsigned int size, unsigned char termC) |
For blockSort. Compares two values and decides to swap if A1 > A2. More... | |
template<class T , int depth> | |
__device__ void | binSearch_fragment (T *keys, T *address, int offset, int &mid, T cmpValue, T testValue, T myAddress, int myLoc, int cmpLoc, int myBound, int cmpBound, T *globalStringArray, int stringSize, unsigned char termC) |
Performs a binary search in our shared memory, with tie breaks for strings. More... | |
template<class T , int depth> | |
__device__ void | binSearch_frag_mult (T *keyArraySmem, T *valueArraySmem, int offset, int &mid, T cmpValue, T testValue, int myAddress, T *globalStringArray, int myStartIdxA, int myStartIdxB, int aIndex, int bIndex, int size, int stringSize, unsigned char termC) |
template<class T , int depth> | |
__device__ void | lin_merge_simple (T &cmpValue, T myKey, T myAddress, int &index, T *BKeys, T *BValues, T *stringValues, T *A_keys, T *A_values, T *A_keys_out, T *A_values_out, int myStartIdxA, int myStartIdxB, int myStartIdxC, T localMinB, T localMaxB, int aCont, int bCont, int totalSize, int mySizeA, int mySizeB, unsigned int stringSize, int i, int stepNum, bool &placed, unsigned char termC) |
Performs a linear search in our shared memory (done after binary search), with tie breaks for strings. More... | |
template<class T , int depth> | |
__device__ void | linearStringMerge (T *BKeys, T *BValues, T myKey, T myAddress, bool &placed, int &index, T &cmpValue, T *A_keys, T *A_values, T *A_keys_out, T *A_values_out, T *stringValues, int myStartIdxC, int myStartIdxA, int myStartIdxB, int localAPartSize, int localBPartSize, int localCPartSize, T localMaxB, T localMinB, int tid, int aIndex, int bIndex, int i, int stringSize, int totalSize, unsigned char termC) |
Performs a linear search in our shared memory, used by multiMerge kernel. More... | |
CUDPP CTA-level sort routines.
stringsort_cta.cu