CUDPP application-level scan routines. More...

#include <cstdlib>
#include <cstdio>
#include <assert.h>
#include "cuda_util.h"
#include "cudpp.h"
#include "cudpp_util.h"
#include "cudpp_plan.h"
#include "cudpp_globals.h"
#include "kernel/spmvmult_kernel.cuh"

Functions
void	cudppSegmentedScanDispatch (void d_out, const void d_idata, const unsigned int d_iflags, int numElements, const CUDPPSegmentedScanPlan plan)
	Dispatch function to perform a scan (prefix sum) on an array with the specified configuration.
Sparse Matrix-Vector Multiply Functions
template<class T >
void	sparseMatrixVectorMultiply (T d_y, const T d_x, const CUDPPSparseMatrixVectorMultiplyPlan *plan)
	Perform matrix-vector multiply for sparse matrices and vectors of arbitrary size.
void	allocSparseMatrixVectorMultiplyStorage (CUDPPSparseMatrixVectorMultiplyPlan plan, const void A, const unsigned int rowindx, const unsigned int indx)
	Allocate intermediate product, flags and rowFindx (index of the last element of each row) array .
void	freeSparseMatrixVectorMultiplyStorage (CUDPPSparseMatrixVectorMultiplyPlan *plan)
	Deallocate intermediate product, flags and rowFindx (index of the last element of each row) array .
void	cudppSparseMatrixVectorMultiplyDispatch (void d_y, const void d_x, const CUDPPSparseMatrixVectorMultiplyPlan *plan)
	Dispatch function to perform a sparse matrix-vector multiply with the specified configuration.

Detailed Description

CUDPP application-level scan routines.

spmvmult_app.cu

Function Documentation

void cudppSegmentedScanDispatch	(	void *	d_out,
		const void *	d_in,
		const unsigned int *	d_iflags,
		int	numElements,
		const CUDPPSegmentedScanPlan *	plan
	)

Dispatch function to perform a scan (prefix sum) on an array with the specified configuration.

This is the dispatch routine which calls segmentedScanArrayRecursive() with appropriate template parameters and arguments to achieve the scan as specified in plan.

Parameters:

[in]	numElements	The number of elements to scan
[in]	plan	Segmented Scan configuration (plan), initialized by CUDPPSegmentedScanPlan constructor
[in]	d_in	The input array
[in]	d_iflags	The input flags array
[out]	d_out	The output array of segmented scan results

template<class T >

void sparseMatrixVectorMultiply	(	T *	d_y,
		const T *	d_x,
		const CUDPPSparseMatrixVectorMultiplyPlan *	plan
	)

Perform matrix-vector multiply for sparse matrices and vectors of arbitrary size.

This function performs the sparse matrix-vector multiply by executing four steps.

1. The sparseMatrixVectorFetchAndMultiply() kernel does an element-wise multiplication of a each element e in CUDPPSparseMatrixVectorMultiplyPlan::m_d_A with the corresponding (i.e. in the same row as the column index of e in CUDPPSparseMatrixVectorMultiplyPlan::m_d_A) element in d_x and stores the product in CUDPPSparseMatrixVectorMultiplyPlan::m_d_prod. It also sets all elements of CUDPPSparseMatrixVectorMultiplyPlan::m_d_flags to 0.

2. The sparseMatrixVectorSetFlags() kernel iterates over each element in CUDPPSparseMatrixVectorMultiplyPlan::m_d_rowIndex and sets the corresponding position (indicated by CUDPPSparseMatrixVectorMultiplyPlan::m_d_rowIndex) in CUDPPSparseMatrixVectorMultiplyPlan::m_d_flags to 1.

3. Perform a segmented scan on CUDPPSparseMatrixVectorMultiplyPlan::m_d_prod with CUDPPSparseMatrixVectorMultiplyPlan::m_d_flags as the flag vector. The output is stored in CUDPPSparseMatrixVectorMultiplyPlan::m_d_prod.

4. The yGather() kernel goes over each element in CUDPPSparseMatrixVectorMultiplyPlan::m_d_rowFinalIndex and picks the corresponding element (indicated by CUDPPSparseMatrixVectorMultiplyPlan::m_d_rowFinalIndex) element from CUDPPSparseMatrixVectorMultiplyPlan::m_d_prod and stores it in d_y.

Parameters:

[out]	d_y	The output array for the sparse matrix-vector multiply (y vector)
[in]	d_x	The input x vector
[in]	plan	Pointer to the CUDPPSparseMatrixVectorMultiplyPlan object which stores the configuration and pointers to temporary buffers needed by this routine

void allocSparseMatrixVectorMultiplyStorage	(	CUDPPSparseMatrixVectorMultiplyPlan *	plan,
		const void *	A,
		const unsigned int *	rowindx,
		const unsigned int *	indx
	)

Allocate intermediate product, flags and rowFindx (index of the last element of each row) array .

Parameters:

[in]	plan	Pointer to CUDPPSparseMatrixVectorMultiplyPlan class containing sparse matrix-vector multiply options, number of non-zero elements and number of rows which is used to compute storage requirements
[in]	A	The matrix A
[in]	rowindx	The indices of elements in A which are the first element of their row
[in]	indx	The column number for each element in A

void freeSparseMatrixVectorMultiplyStorage ( CUDPPSparseMatrixVectorMultiplyPlan * plan )

Deallocate intermediate product, flags and rowFindx (index of the last element of each row) array .

These arrays must have been allocated by allocSparseMatrixVectorMultiplyStorage(), which is called by the constructor of CUDPPSparseMatrixVectorMultiplyPlan.

Parameters:

[in] plan Pointer to CUDPPSparseMatrixVectorMultiplyPlan plan initialized by its constructor.

void cudppSparseMatrixVectorMultiplyDispatch	(	void *	d_y,
		const void *	d_x,
		const CUDPPSparseMatrixVectorMultiplyPlan *	plan
	)

Dispatch function to perform a sparse matrix-vector multiply with the specified configuration.

This is the dispatch routine which calls sparseMatrixVectorMultiply() with appropriate template parameters and arguments

Parameters:

[out]	d_y	The output vector for y = A*x
[in]	d_x	The x vector for y = A*x
[in]	plan	The sparse matrix plan and data

Functions

Detailed Description

Function Documentation