|
CUDPP 2.0
CUDA Data-Parallel Primitives Library
|
CUDPP CTA-level sort routines. More...
#include <cudpp_globals.h>#include "cudpp_radixsort.h"#include "cta/scan_cta.cuh"#include <cudpp.h>#include <stdio.h>#include <cudpp_util.h>#include <math.h>#include "sharedmem.h"Radix Sort Functions | |
| typedef unsigned int | uint |
| template<bool doFlip> | |
| __device__ uint | floatFlip (uint f) |
| Flips bits of single-precision floating-point number (parameterized by doFlip) | |
| template<bool doFlip> | |
| __device__ uint | floatUnflip (uint f) |
| Reverses bit-flip of single-precision floating-point number (parameterized by doFlip) | |
| template<class T , int maxlevel> | |
| __device__ T | scanwarp (T val, volatile T *sData) |
| Scans one warp quickly, optimized for 32-element warps, using shared memory. | |
| __device__ uint4 | scan4 (uint4 idata) |
| Scans 4*CTA_SIZE unsigned ints in a block. | |
| template<int ctasize> | |
| __device__ uint4 | rank4 (uint4 preds) |
| Computes output position for each thread given predicate; trues come first then falses. | |
| template<uint nbits, uint startbit> | |
| __device__ void | radixSortBlock (uint4 &key, uint4 &value) |
| Sorts one block. | |
| template<uint nbits, uint startbit> | |
| __device__ void | radixSortBlockKeysOnly (uint4 &key) |
| Sorts one block. Key-only version. | |
CUDPP CTA-level sort routines.
sort_cta.cu
1.7.4