CUDPP 2.0
CUDA Data-Parallel Primitives Library
/mnt/hgfs/VMWare/cudpp/src/cudpp/cta/radixsort_cta.cuh File Reference

CUDPP CTA-level sort routines. More...

#include <cudpp_globals.h>
#include "cudpp_radixsort.h"
#include "cta/scan_cta.cuh"
#include <cudpp.h>
#include <stdio.h>
#include <cudpp_util.h>
#include <math.h>
#include "sharedmem.h"

Radix Sort Functions

typedef unsigned int uint
template<bool doFlip>
__device__ uint floatFlip (uint f)
 Flips bits of single-precision floating-point number (parameterized by doFlip)
template<bool doFlip>
__device__ uint floatUnflip (uint f)
 Reverses bit-flip of single-precision floating-point number (parameterized by doFlip)
template<class T , int maxlevel>
__device__ T scanwarp (T val, volatile T *sData)
 Scans one warp quickly, optimized for 32-element warps, using shared memory.
__device__ uint4 scan4 (uint4 idata)
 Scans 4*CTA_SIZE unsigned ints in a block.
template<int ctasize>
__device__ uint4 rank4 (uint4 preds)
 Computes output position for each thread given predicate; trues come first then falses.
template<uint nbits, uint startbit>
__device__ void radixSortBlock (uint4 &key, uint4 &value)
 Sorts one block.
template<uint nbits, uint startbit>
__device__ void radixSortBlockKeysOnly (uint4 &key)
 Sorts one block. Key-only version.

Detailed Description

CUDPP CTA-level sort routines.

sort_cta.cu

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines