CUDPP 2.0
CUDA Data-Parallel Primitives Library
|
CUDPP application-level scan routines. More...
#include <cstdlib>
#include <cstdio>
#include <assert.h>
#include "cuda_util.h"
#include "cudpp.h"
#include "cudpp_util.h"
#include "cudpp_plan.h"
#include "cudpp_globals.h"
#include "kernel/spmvmult_kernel.cuh"
Functions | |
void | cudppSegmentedScanDispatch (void *d_out, const void *d_idata, const unsigned int *d_iflags, int numElements, const CUDPPSegmentedScanPlan *plan) |
Dispatch function to perform a scan (prefix sum) on an array with the specified configuration. | |
Sparse Matrix-Vector Multiply Functions | |
template<class T > | |
void | sparseMatrixVectorMultiply (T *d_y, const T *d_x, const CUDPPSparseMatrixVectorMultiplyPlan *plan) |
Perform matrix-vector multiply for sparse matrices and vectors of arbitrary size. | |
void | allocSparseMatrixVectorMultiplyStorage (CUDPPSparseMatrixVectorMultiplyPlan *plan, const void *A, const unsigned int *rowindx, const unsigned int *indx) |
Allocate intermediate product, flags and rowFindx (index of the last element of each row) array . | |
void | freeSparseMatrixVectorMultiplyStorage (CUDPPSparseMatrixVectorMultiplyPlan *plan) |
Deallocate intermediate product, flags and rowFindx (index of the last element of each row) array . | |
void | cudppSparseMatrixVectorMultiplyDispatch (void *d_y, const void *d_x, const CUDPPSparseMatrixVectorMultiplyPlan *plan) |
Dispatch function to perform a sparse matrix-vector multiply with the specified configuration. |
CUDPP application-level scan routines.
void cudppSegmentedScanDispatch | ( | void * | d_out, |
const void * | d_in, | ||
const unsigned int * | d_iflags, | ||
int | numElements, | ||
const CUDPPSegmentedScanPlan * | plan | ||
) |
Dispatch function to perform a scan (prefix sum) on an array with the specified configuration.
This is the dispatch routine which calls segmentedScanArrayRecursive() with appropriate template parameters and arguments to achieve the scan as specified in plan.
[in] | numElements | The number of elements to scan |
[in] | plan | Segmented Scan configuration (plan), initialized by CUDPPSegmentedScanPlan constructor |
[in] | d_in | The input array |
[in] | d_iflags | The input flags array |
[out] | d_out | The output array of segmented scan results |
void sparseMatrixVectorMultiply | ( | T * | d_y, |
const T * | d_x, | ||
const CUDPPSparseMatrixVectorMultiplyPlan * | plan | ||
) |
Perform matrix-vector multiply for sparse matrices and vectors of arbitrary size.
This function performs the sparse matrix-vector multiply by executing four steps.
1. The sparseMatrixVectorFetchAndMultiply() kernel does an element-wise multiplication of a each element e in CUDPPSparseMatrixVectorMultiplyPlan::m_d_A with the corresponding (i.e. in the same row as the column index of e in CUDPPSparseMatrixVectorMultiplyPlan::m_d_A) element in d_x and stores the product in CUDPPSparseMatrixVectorMultiplyPlan::m_d_prod. It also sets all elements of CUDPPSparseMatrixVectorMultiplyPlan::m_d_flags to 0.
2. The sparseMatrixVectorSetFlags() kernel iterates over each element in CUDPPSparseMatrixVectorMultiplyPlan::m_d_rowIndex and sets the corresponding position (indicated by CUDPPSparseMatrixVectorMultiplyPlan::m_d_rowIndex) in CUDPPSparseMatrixVectorMultiplyPlan::m_d_flags to 1.
3. Perform a segmented scan on CUDPPSparseMatrixVectorMultiplyPlan::m_d_prod with CUDPPSparseMatrixVectorMultiplyPlan::m_d_flags as the flag vector. The output is stored in CUDPPSparseMatrixVectorMultiplyPlan::m_d_prod.
4. The yGather() kernel goes over each element in CUDPPSparseMatrixVectorMultiplyPlan::m_d_rowFinalIndex and picks the corresponding element (indicated by CUDPPSparseMatrixVectorMultiplyPlan::m_d_rowFinalIndex) element from CUDPPSparseMatrixVectorMultiplyPlan::m_d_prod and stores it in d_y.
[out] | d_y | The output array for the sparse matrix-vector multiply (y vector) |
[in] | d_x | The input x vector |
[in] | plan | Pointer to the CUDPPSparseMatrixVectorMultiplyPlan object which stores the configuration and pointers to temporary buffers needed by this routine |
void allocSparseMatrixVectorMultiplyStorage | ( | CUDPPSparseMatrixVectorMultiplyPlan * | plan, |
const void * | A, | ||
const unsigned int * | rowindx, | ||
const unsigned int * | indx | ||
) |
Allocate intermediate product, flags and rowFindx (index of the last element of each row) array .
[in] | plan | Pointer to CUDPPSparseMatrixVectorMultiplyPlan class containing sparse matrix-vector multiply options, number of non-zero elements and number of rows which is used to compute storage requirements |
[in] | A | The matrix A |
[in] | rowindx | The indices of elements in A which are the first element of their row |
[in] | indx | The column number for each element in A |
void freeSparseMatrixVectorMultiplyStorage | ( | CUDPPSparseMatrixVectorMultiplyPlan * | plan | ) |
Deallocate intermediate product, flags and rowFindx (index of the last element of each row) array .
These arrays must have been allocated by allocSparseMatrixVectorMultiplyStorage(), which is called by the constructor of CUDPPSparseMatrixVectorMultiplyPlan.
[in] | plan | Pointer to CUDPPSparseMatrixVectorMultiplyPlan plan initialized by its constructor. |
void cudppSparseMatrixVectorMultiplyDispatch | ( | void * | d_y, |
const void * | d_x, | ||
const CUDPPSparseMatrixVectorMultiplyPlan * | plan | ||
) |
Dispatch function to perform a sparse matrix-vector multiply with the specified configuration.
This is the dispatch routine which calls sparseMatrixVectorMultiply() with appropriate template parameters and arguments
[out] | d_y | The output vector for y = A*x |
[in] | d_x | The x vector for y = A*x |
[in] | plan | The sparse matrix plan and data |