CUDPP 1.1
Classes
scan_cta.cu File Reference

CUDPP CTA-level scan routines. More...

#include <cudpp_globals.h>
#include <cudpp_util.h>
#include <math.h>
#include <cudpp.h>

Classes

class  ScanTraits< T, oper, backward, exclusive, multiRow, sums, fullBlock >
 Template class containing compile-time parameters to the scan functions. More...

Scan Functions

#define __EMUSYNC
 Macro to insert necessary __syncthreads() in device emulation mode.
#define DISALLOW_LOADSTORE_OVERLAP   1
template<class T , class traits >
__device__ void loadSharedChunkFromMem4 (T *s_out, T threadScan0[4], T threadScan1[4], const T *d_in, int numElements, int iDataOffset, int &ai, int &bi, int &aiDev, int &biDev)
 Handles loading input s_data from global memory to shared memory (vec4 version)
template<class T , class traits >
__device__ void storeSharedChunkToMem4 (T *d_out, T threadScan0[4], T threadScan1[4], T *s_in, int numElements, int oDataOffset, int ai, int bi, int aiDev, int biDev)
 Handles storing result s_data from shared memory to global memory (vec4 version)
template<class T , class traits , int maxlevel>
__device__ T warpscan (T val, volatile T *s_data)
 Scan all warps of a CTA without synchronization.
template<class T , class traits >
__device__ void scanWarps (T x, T y, T *s_data)
 Perform a full CTA scan using the warp-scan algorithm.
template<class T , class traits >
__device__ void scanCTA (T *s_data, T *d_blockSums, unsigned int blockSumIndex)
 CTA-level scan routine; scans s_data in shared memory in each thread block.

Detailed Description

CUDPP CTA-level scan routines.

scan_cta.cu

 All Classes Files Functions Variables Enumerations Enumerator Defines