CUDPP  2.3
CUDA Data-Parallel Primitives Library
mergesort_cta.cuh File Reference

CUDPP CTA-level sort routines. More...

#include <cudpp_globals.h>
#include "cudpp_mergesort.h"
#include <cudpp.h>
#include <stdio.h>
#include <cudpp_util.h>
#include <math.h>
#include "sharedmem.h"

Merge Sort Functions

#define BLOCKSORT_SIZE   1024
 
#define CTA_BLOCK   128
 
#define DEPTH_simple   2
 
#define DEPTH_multi   4
 
#define CTASIZE_simple   256
 
#define CTASIZE_multi   128
 
#define INTERSECT_A_BLOCK_SIZE_simple   DEPTH_simple*CTASIZE_simple
 
#define INTERSECT_B_BLOCK_SIZE_simple   2*DEPTH_simple*CTASIZE_simple
 
#define INTERSECT_A_BLOCK_SIZE_multi   DEPTH_multi*CTASIZE_multi
 
#define INTERSECT_B_BLOCK_SIZE_multi   2*DEPTH_multi*CTASIZE_multi
 
typedef unsigned int uint
 
template<class T , int depth>
__device__ void bin_search_block (T &cmpValue, T tmpVal, T *in, unsigned int &j, unsigned int bump, unsigned int addPart)
 Binary search within a single block (blockSort) More...
 
template<class T , int depth>
__device__ void lin_search_block (T &cmpValue, T mVal, unsigned int &tmpVal, T *in, unsigned int *addressPad, unsigned int &j, unsigned int offset, unsigned int last, unsigned int startAddress, unsigned int addPart)
 Linear search within a single block (blockSort) More...
 
template<class T >
__device__ void compareSwapVal (T &A1, T &A2, unsigned int &ref1, unsigned int &ref2)
 For blockSort. Compares two values and decides to swap if A1 > A2. More...
 
template<class T >
__device__ void binSearch_fragment_lower (T *binArray, int offset, int &mid, T testValue)
 
template<class T >
__device__ void binSearch_fragment_higher (T *binArray, int offset, int &mid, T testValue)
 
template<class T >
__device__ void binSearch_whole_lower (T *BKeys, int &index, T myKey)
 
template<class T >
__device__ void binSearch_whole_higher (T *BKeys, int &index, T myKey)
 
template<class T , int depth>
__device__ void linearMerge_lower (T *searchArray, T myKey, unsigned int myVal, int &index, T *saveGlobalArray, unsigned int *saveValueArray, int myStartIdxC, T nextMaxB, int localAPartSize, int localBPartSize, T localMaxB, T localMinB, int aIndex, int bIndex, int offset)
 Performs a linear search in our shared memory (done after binary search). It merges the partition on the left side with the associated partition on the right side. More...
 
template<class T , int depth>
__device__ void linearMerge_higher (T *searchArray, T myKey, unsigned int myVal, int &index, T *saveGlobalArray, unsigned int *saveValueArray, int myStartIdxC, T localMinB, T nextMaxB, int aIndex, int bIndex, int offset, int localAPartSize, int localBPartSize)
 Performs a linear search in our shared memory (done after binary search). It merges the partition on the right side with the associated partition on the left side. More...
 

Detailed Description

CUDPP CTA-level sort routines.

sort_cta.cu