| 
    CUDPP 1.1.1 
   | 
 
CUDPP CTA-level scan routines. More...
Classes | |
| class | ScanTraits< T, oper, backward, exclusive, multiRow, sums, fullBlock > | 
| Template class containing compile-time parameters to the scan functions.  More... | |
Scan Functions | |
| #define | __EMUSYNC | 
| Macro to insert necessary __syncthreads() in device emulation mode.  | |
| #define | DISALLOW_LOADSTORE_OVERLAP 1 | 
| template<class T , class traits > | |
| __device__ void | loadSharedChunkFromMem4 (T *s_out, T threadScan0[4], T threadScan1[4], const T *d_in, int numElements, int iDataOffset, int &ai, int &bi, int &aiDev, int &biDev) | 
| Handles loading input s_data from global memory to shared memory (vec4 version)   | |
| template<class T , class traits > | |
| __device__ void | storeSharedChunkToMem4 (T *d_out, T threadScan0[4], T threadScan1[4], T *s_in, int numElements, int oDataOffset, int ai, int bi, int aiDev, int biDev) | 
| Handles storing result s_data from shared memory to global memory (vec4 version)   | |
| template<class T , class traits , int maxlevel> | |
| __device__ T | warpscan (T val, volatile T *s_data) | 
| Scan all warps of a CTA without synchronization.   | |
| template<class T , class traits > | |
| __device__ void | scanWarps (T x, T y, T *s_data) | 
| Perform a full CTA scan using the warp-scan algorithm.   | |
| template<class T , class traits > | |
| __device__ void | scanCTA (T *s_data, T *d_blockSums, unsigned int blockSumIndex) | 
| CTA-level scan routine; scans s_data in shared memory in each thread block.   | |
CUDPP CTA-level scan routines.
 1.7.4