CUDPP
2.1
CUDA Data-Parallel Primitives Library
|
CUDPP application-level scan routines. More...
#include <cstdlib>
#include <cstdio>
#include <assert.h>
#include "cuda_util.h"
#include "cudpp.h"
#include "cudpp_util.h"
#include "cudpp_plan.h"
#include "cudpp_globals.h"
#include "kernel/spmvmult_kernel.cuh"
Functions | |
void | cudppSegmentedScanDispatch (void *d_out, const void *d_idata, const unsigned int *d_iflags, int numElements, const CUDPPSegmentedScanPlan *plan) |
Dispatch function to perform a scan (prefix sum) on an array with the specified configuration. More... | |
Sparse Matrix-Vector Multiply Functions | |
template<class T > | |
void | sparseMatrixVectorMultiply (T *d_y, const T *d_x, const CUDPPSparseMatrixVectorMultiplyPlan *plan) |
Perform matrix-vector multiply for sparse matrices and vectors of arbitrary size. More... | |
void | allocSparseMatrixVectorMultiplyStorage (CUDPPSparseMatrixVectorMultiplyPlan *plan, const void *A, const unsigned int *rowindx, const unsigned int *indx) |
Allocate intermediate product, flags and rowFindx (index of the last element of each row) array . More... | |
void | freeSparseMatrixVectorMultiplyStorage (CUDPPSparseMatrixVectorMultiplyPlan *plan) |
Deallocate intermediate product, flags and rowFindx (index of the last element of each row) array . More... | |
void | cudppSparseMatrixVectorMultiplyDispatch (void *d_y, const void *d_x, const CUDPPSparseMatrixVectorMultiplyPlan *plan) |
Dispatch function to perform a sparse matrix-vector multiply with the specified configuration. More... | |
CUDPP application-level scan routines.
void cudppSegmentedScanDispatch | ( | void * | d_out, |
const void * | d_in, | ||
const unsigned int * | d_iflags, | ||
int | numElements, | ||
const CUDPPSegmentedScanPlan * | plan | ||
) |
Dispatch function to perform a scan (prefix sum) on an array with the specified configuration.
This is the dispatch routine which calls segmentedScanArrayRecursive() with appropriate template parameters and arguments to achieve the scan as specified in plan.
[in] | numElements | The number of elements to scan |
[in] | plan | Segmented Scan configuration (plan), initialized by CUDPPSegmentedScanPlan constructor |
[in] | d_in | The input array |
[in] | d_iflags | The input flags array |
[out] | d_out | The output array of segmented scan results |
void sparseMatrixVectorMultiply | ( | T * | d_y, |
const T * | d_x, | ||
const CUDPPSparseMatrixVectorMultiplyPlan * | plan | ||
) |
Perform matrix-vector multiply for sparse matrices and vectors of arbitrary size.
This function performs the sparse matrix-vector multiply by executing four steps.
[out] | d_y | The output array for the sparse matrix-vector multiply (y vector) |
[in] | d_x | The input x vector |
[in] | plan | Pointer to the CUDPPSparseMatrixVectorMultiplyPlan object which stores the configuration and pointers to temporary buffers needed by this routine |
void allocSparseMatrixVectorMultiplyStorage | ( | CUDPPSparseMatrixVectorMultiplyPlan * | plan, |
const void * | A, | ||
const unsigned int * | rowindx, | ||
const unsigned int * | indx | ||
) |
Allocate intermediate product, flags and rowFindx (index of the last element of each row) array .
[in] | plan | Pointer to CUDPPSparseMatrixVectorMultiplyPlan class containing sparse matrix-vector multiply options, number of non-zero elements and number of rows which is used to compute storage requirements |
[in] | A | The matrix A |
[in] | rowindx | The indices of elements in A which are the first element of their row |
[in] | indx | The column number for each element in A |
void freeSparseMatrixVectorMultiplyStorage | ( | CUDPPSparseMatrixVectorMultiplyPlan * | plan | ) |
Deallocate intermediate product, flags and rowFindx (index of the last element of each row) array .
These arrays must have been allocated by allocSparseMatrixVectorMultiplyStorage(), which is called by the constructor of CUDPPSparseMatrixVectorMultiplyPlan.
[in] | plan | Pointer to CUDPPSparseMatrixVectorMultiplyPlan plan initialized by its constructor. |
void cudppSparseMatrixVectorMultiplyDispatch | ( | void * | d_y, |
const void * | d_x, | ||
const CUDPPSparseMatrixVectorMultiplyPlan * | plan | ||
) |
Dispatch function to perform a sparse matrix-vector multiply with the specified configuration.
This is the dispatch routine which calls sparseMatrixVectorMultiply() with appropriate template parameters and arguments
[out] | d_y | The output vector for y = A*x |
[in] | d_x | The x vector for y = A*x |
[in] | plan | The sparse matrix plan and data |