| CUDPP
    2.1
    CUDA Data-Parallel Primitives Library | 
CUDPP CTA-level sort routines. More...
#include <cudpp_globals.h>#include "cudpp_radixsort.h"#include "cta/scan_cta.cuh"#include <cudpp.h>#include <stdio.h>#include <cudpp_util.h>#include <math.h>#include "sharedmem.h"| Radix Sort Functions | |
| typedef unsigned int | uint | 
| template<bool doFlip> | |
| __device__ uint | floatFlip (uint f) | 
| Flips bits of single-precision floating-point number (parameterized by doFlip)  More... | |
| template<bool doFlip> | |
| __device__ uint | floatUnflip (uint f) | 
| Reverses bit-flip of single-precision floating-point number (parameterized by doFlip)  More... | |
| template<class T , int maxlevel> | |
| __device__ T | scanwarp (T val, volatile T *sData) | 
| Scans one warp quickly, optimized for 32-element warps, using shared memory.  More... | |
| __device__ uint4 | scan4 (uint4 idata) | 
| Scans 4*CTA_SIZE unsigned ints in a block.  More... | |
| template<int ctasize> | |
| __device__ uint4 | rank4 (uint4 preds) | 
| Computes output position for each thread given predicate; trues come first then falses.  More... | |
| template<uint nbits, uint startbit> | |
| __device__ void | radixSortBlock (uint4 &key, uint4 &value) | 
| Sorts one block.  More... | |
| template<uint nbits, uint startbit> | |
| __device__ void | radixSortBlockKeysOnly (uint4 &key) | 
| Sorts one block. Key-only version.  More... | |
CUDPP CTA-level sort routines.
sort_cta.cu
 1.8.5
 1.8.5