CUDPP  2.1
CUDA Data-Parallel Primitives Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Groups Pages
Classes
scan_cta.cuh File Reference

CUDPP CTA-level scan routines. More...

#include <cudpp_globals.h>
#include <cudpp_util.h>
#include <math.h>
#include <cudpp.h>

Classes

class  ScanTraits< T, Oper, backward, exclusive, multiRow, sums, fullBlock >
 Template class containing compile-time parameters to the scan functions. More...
 

Scan Functions

#define DISALLOW_LOADSTORE_OVERLAP   1
 
template<class T , class traits >
__device__ void loadSharedChunkFromMem4 (T *s_out, T threadScan[2][4], const T *d_in, int numElements, int iDataOffset, int &ai, int &bi, int &aiDev, int &biDev)
 Handles loading input s_data from global memory to shared memory (vec4 version) More...
 
template<class T , class traits >
__device__ void storeSharedChunkToMem4 (T *d_out, T threadScan[2][4], T *s_in, int numElements, int oDataOffset, int ai, int bi, int aiDev, int biDev)
 Handles storing result s_data from shared memory to global memory (vec4 version) More...
 
template<class T , class traits >
__device__ void loadSharedChunkFromMem2 (T *s_out, T threadScan[2][2], const T *d_in, int numElements, int iDataOffset, int &ai, int &bi, int &aiDev, int &biDev)
 Handles loading input s_data from global memory to shared memory (vec4 version) More...
 
template<class T , class traits >
__device__ void storeSharedChunkToMem2 (T *d_out, T threadScan[2][2], T *s_in, int numElements, int oDataOffset, int ai, int bi, int aiDev, int biDev)
 Handles storing result s_data from shared memory to global memory (vec4 version) More...
 
template<class T , class traits , int maxlevel>
__device__ T warpscan (T val, volatile T *s_data)
 Scan all warps of a CTA without synchronization. More...
 
template<class T , class traits >
__device__ void scanWarps (T x, T y, T *s_data)
 Perform a full CTA scan using the warp-scan algorithm. More...
 
template<class T , class traits >
__device__ void scanCTA (T *s_data, T *d_blockSums, unsigned int blockSumIndex)
 CTA-level scan routine; scans s_data in shared memory in each thread block. More...
 

Detailed Description

CUDPP CTA-level scan routines.

scan_cta.cu