CUDPP  2.3
CUDA Data-Parallel Primitives Library
stringsort_cta.cuh File Reference

CUDPP CTA-level sort routines. More...

#include <cudpp_globals.h>
#include "cudpp_stringsort.h"
#include <cudpp.h>
#include <stdio.h>
#include <cudpp_util.h>
#include <math.h>
#include "sharedmem.h"

Merge Sort Functions

#define BLOCKSORT_SIZE   1024
 
#define CTA_BLOCK   128
 
#define DEPTH_simple   2
 
#define DEPTH_multi   2
 
#define CTASIZE_simple   256
 
#define CTASIZE_multi   256
 
#define INTERSECT_A_BLOCK_SIZE_simple   DEPTH_simple*CTASIZE_simple
 
#define INTERSECT_B_BLOCK_SIZE_simple   2*DEPTH_simple*CTASIZE_simple
 
#define INTERSECT_A_BLOCK_SIZE_multi   DEPTH_multi*CTASIZE_multi
 
#define INTERSECT_B_BLOCK_SIZE_multi   2*DEPTH_multi*CTASIZE_multi
 
typedef unsigned int uint
 
__device__ int tie_break_simp (unsigned int myLoc, unsigned int cmpLoc, unsigned int myBound, unsigned int cmpBound, unsigned int myAdd, unsigned int cmpAdd, unsigned int *stringLoc, unsigned int stringSize, unsigned char termC)
 Breaks ties in keys (first four characters) returns true if cmpVal > myVal false otherwise. More...
 
template<class T , int depth>
__device__ void bin_search_block_string (T &cmpValue, T tmpVal, T *in, T *addressPad, T *stringVals, int &j, int bump, int sizeRemain, unsigned int stringSize, unsigned char termC)
 Binary search within a single block (blockSort) More...
 
template<class T , int depth>
__device__ void lin_search_block_string (T &cmpValue, T &tmpVal, T *in, T *addressPad, T *stringVals, int &j, int offset, int last, int startAddress, int stringSize, unsigned char termC)
 Linear search within a single block (blockSort) More...
 
template<class T >
__device__ void compareSwapVal (T &A1, T &A2, const int index1, const int index2, T *scratch, T *stringVals, unsigned int size, unsigned char termC)
 For blockSort. Compares two values and decides to swap if A1 > A2. More...
 
template<class T , int depth>
__device__ void binSearch_fragment (T *keys, T *address, int offset, int &mid, T cmpValue, T testValue, T myAddress, int myLoc, int cmpLoc, int myBound, int cmpBound, T *globalStringArray, int stringSize, unsigned char termC)
 Performs a binary search in our shared memory, with tie breaks for strings. More...
 
template<class T , int depth>
__device__ void binSearch_frag_mult (T *keyArraySmem, T *valueArraySmem, int offset, int &mid, T cmpValue, T testValue, int myAddress, T *globalStringArray, int myStartIdxA, int myStartIdxB, int aIndex, int bIndex, int size, int stringSize, unsigned char termC)
 
template<class T , int depth>
__device__ void lin_merge_simple (T &cmpValue, T myKey, T myAddress, int &index, T *BKeys, T *BValues, T *stringValues, T *A_keys, T *A_values, T *A_keys_out, T *A_values_out, int myStartIdxA, int myStartIdxB, int myStartIdxC, T localMinB, T localMaxB, int aCont, int bCont, int totalSize, int mySizeA, int mySizeB, unsigned int stringSize, int i, int stepNum, bool &placed, unsigned char termC)
 Performs a linear search in our shared memory (done after binary search), with tie breaks for strings. More...
 
template<class T , int depth>
__device__ void linearStringMerge (T *BKeys, T *BValues, T myKey, T myAddress, bool &placed, int &index, T &cmpValue, T *A_keys, T *A_values, T *A_keys_out, T *A_values_out, T *stringValues, int myStartIdxC, int myStartIdxA, int myStartIdxB, int localAPartSize, int localBPartSize, int localCPartSize, T localMaxB, T localMinB, int tid, int aIndex, int bIndex, int i, int stringSize, int totalSize, unsigned char termC)
 Performs a linear search in our shared memory, used by multiMerge kernel. More...
 

Detailed Description

CUDPP CTA-level sort routines.

stringsort_cta.cu