CUDPP  2.2
CUDA Data-Parallel Primitives Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Groups Pages
radixsort_cta.cuh File Reference

CUDPP CTA-level sort routines. More...

#include <cudpp_globals.h>
#include "cudpp_radixsort.h"
#include "cta/scan_cta.cuh"
#include <cudpp.h>
#include <stdio.h>
#include <cudpp_util.h>
#include <math.h>
#include "sharedmem.h"

Radix Sort Functions

typedef unsigned int uint
 
template<bool doFlip>
__device__ uint floatFlip (uint f)
 Flips bits of single-precision floating-point number (parameterized by doFlip) More...
 
template<bool doFlip>
__device__ uint floatUnflip (uint f)
 Reverses bit-flip of single-precision floating-point number (parameterized by doFlip) More...
 
template<class T , int maxlevel>
__device__ T scanwarp (T val, volatile T *sData)
 Scans one warp quickly, optimized for 32-element warps, using shared memory. More...
 
__device__ uint4 scan4 (uint4 idata)
 Scans 4*CTA_SIZE unsigned ints in a block. More...
 
template<int ctasize>
__device__ uint4 rank4 (uint4 preds)
 Computes output position for each thread given predicate; trues come first then falses. More...
 
template<uint nbits, uint startbit>
__device__ void radixSortBlock (uint4 &key, uint4 &value)
 Sorts one block. More...
 
template<uint nbits, uint startbit>
__device__ void radixSortBlockKeysOnly (uint4 &key)
 Sorts one block. Key-only version. More...
 

Detailed Description

CUDPP CTA-level sort routines.

sort_cta.cu