CUDPP  2.3
CUDA Data-Parallel Primitives Library
Functions
stringsort_kernel.cuh File Reference

CUDPP kernel-level radix sorting routines. More...

#include "cudpp_stringsort.h"
#include <cudpp_globals.h>
#include "sharedmem.h"
#include "cta/stringsort_cta.cuh"

Functions

__global__ void alignedOffsets (unsigned int *numSpaces, unsigned int *d_address, unsigned char *d_stringVals, unsigned char termC, unsigned int numElements, unsigned int stringSize)
 Calculate the number of spaces required for each string to align the string array. More...
 
__global__ void alignString (unsigned int *packedStrings, unsigned char *d_stringVals, unsigned int *packedAddress, unsigned int *address, unsigned int numElements, unsigned int stringArrayLength, unsigned char termC)
 Packs strings into unsigned ints to be sorted later. These packed strings will also be aligned. More...
 
__global__ void createKeys (unsigned int *d_keys, unsigned int *packedStrings, unsigned int *packedAddress, unsigned int numElements)
 Create keys (first four characters stuffed in an uint) from the addresses to the strings, and the string array. More...
 
__global__ void unpackAddresses (unsigned int *packedAddress, unsigned int *packedAddressRef, unsigned int *address, unsigned int *addressRef, size_t numElements)
 Converts addresses from packed (unaligned) form to unpacked and unaligned form Resulting aligned strings begin in our string array packed in an unsigned int and aligned such that each string begins at the start of a uint (divisible by 4) More...
 
template<class T , int depth>
__global__ void blockWiseStringSort (T *A_keys, T *A_address, T *stringVals, int blockSize, int totalSize, unsigned int stringSize, unsigned char termC)
 Does an initial blockSort based on the size of our partition (limited by shared memory size) More...
 
template<class T , int depth>
__global__ void simpleStringMerge (T *A_keys, T *A_keys_out, T *A_values, T *A_values_out, T *stringValues, int sizePerPartition, int size, int step, int stringSize, unsigned char termC)
 Merges two independent sets. Each CUDA block works on two partitions of data without cooperating. More...
 
template<class T >
__global__ void findMultiPartitions (T *A_keys, T *A_address, T *stringValues, int splitsPP, int numPartitions, int partitionSize, unsigned int *partitionBeginA, unsigned int *partitionSizesA, unsigned int *partitionBeginB, unsigned int *partitionSizesB, size_t size, size_t stringSize, unsigned char termC)
 For our multiMerge kernels we need to divide our partitions into smaller partitions. This kernel breaks up a set of partitions into splitsPP*numPartitions subpartitions. More...
 
template<class T , int depth>
__global__ void stringMergeMulti (T *A_keys, T *A_keys_out, T *A_values, T *A_values_out, T *stringValues, int subPartitions, int numBlocks, unsigned int *partitionBeginA, unsigned int *partitionSizeA, unsigned int *partitionBeginB, unsigned int *partitionSizeB, int entirePartitionSize, int step, size_t size, size_t stringSize, unsigned char termC)
 Main merge kernel where multiple CUDA blocks cooperate to merge a partition(s) More...
 
MergeSort Functions
template<class T >
__global__ void simpleCopy (T *A_keys_dev, unsigned int *A_vals_dev, T *A_keys_out_dev, unsigned int *A_vals_out_dev, int offset, int numElementsToCopy)
 Copies unused portions of arrays in our ping-pong strategy. More...
 

Detailed Description

CUDPP kernel-level radix sorting routines.

stringsort_kernel.cu