Skip to content
This repository has been archived by the owner on Apr 3, 2024. It is now read-only.

Commit

Permalink
spmv update
Browse files Browse the repository at this point in the history
Former-commit-id: 7c8cff3bc9294d2c1c9caac99348b25227d578f7
  • Loading branch information
dumerrill committed Feb 13, 2015
1 parent 63af747 commit e0ede7e
Show file tree
Hide file tree
Showing 4 changed files with 287 additions and 66 deletions.
127 changes: 127 additions & 0 deletions cub/device/device_spmv.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@

/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/

/**
* \file
* cub::DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * vector multiplication (SpMV).
*/

#pragma once

#include <stdio.h>
#include <iterator>
#include <limits>

#include "dispatch/device_histogram_dispatch.cuh"
#include "../util_namespace.cuh"

/// Optional outer namespace(s)
CUB_NS_PREFIX

/// CUB namespace
namespace cub {


/**
* \brief DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * vector multiplication (SpMV).
* \ingroup DeviceModule
*
* \par Overview
* TODO
*
* \par Usage Considerations
* \cdp_class{DeviceSpmv}
*
*/
struct DeviceSpmv
{
/******************************************************************//**
* \name CSR matrix operations
*********************************************************************/
//@{

/**
* \brief TODO
*
* \par Snippet
* TODO
*
* \par
* \code
* #include <cub/cub.cuh> // or equivalently <cub/device/device_spmv.cuh>
*
* // Declare, allocate, and initialize device pointers for input matrix A, input vector x,
* // and output vector y
* TODO
*
* \endcode
*
* \tparam VertexT <b>[inferred]</b> Integer type for vertex identifiers
* \tparam ValueT <b>[inferred]</b> Matrix and vector value type
* \tparam OffsetT <b>[inferred]</b> Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1
*/
template <
typename VertexT,
typename ValueT,
typename OffsetT>
CUB_RUNTIME_FUNCTION
static cudaError_t CsrMV(
void* d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done.
size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation
ValueT* d_matrix_values, ///< [in] Pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix <b>A</b>.
OffsetT* d_matrix_row_offsets, ///< [in] Pointer to the array of \p m + 1 offsets demarcating the start of every row in \p d_matrix_column_indices and \p d_matrix_values (with the final entry being equal to \p num_nonzeros)
VertexT* d_matrix_column_indices, ///< [in] Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix <b>A</b>. (Indices are zero-valued.)
ValueT* d_vector_x, ///< [in] Pointer to the array of \p num_cols values corresponding to the dense input vector <em>x</em>
ValueT* d_vector_y, ///< [out] Pointer to the array of \p num_rows values corresponding to the dense output vector <em>y</em>
int num_rows, ///< [in] number of rows of matrix <b>A</b>.
int num_cols, ///< [in] number of columns of matrix <b>A</b>.
int num_nonzeros, ///< [in] number of nonzero elements of matrix <b>A</b>.
cudaStream_t stream = 0, ///< [in] <b>[optional]</b> CUDA stream to launch kernels within. Default is stream<sub>0</sub>.
bool debug_synchronous = false) ///< [in] <b>[optional]</b> Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false.
{
if (!d_temp_storage)
temp_storage_bytes = 0;

return cudaSuccess;
}



//@} end member group
};


/**
* \example TODO
*/

} // CUB namespace
CUB_NS_POSTFIX // Optional outer namespace(s)


2 changes: 1 addition & 1 deletion experimental/histogram/histogram_cub.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
*
******************************************************************************/

#include <cub/cub.cuh>
#include <cub/device/device_histogram.cuh>

using namespace cub;

Expand Down
17 changes: 8 additions & 9 deletions experimental/matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -443,13 +443,13 @@ struct CooMatrix
/**
* CSR sparse format matrix
*/
template<typename VertexT, typename ValueT, typename SizeT>
template<typename VertexT, typename ValueT, typename OffsetT>
struct CsrMatrix
{
int num_rows;
int num_cols;
int num_nonzeros;
SizeT* row_offsets;
OffsetT* row_offsets;
VertexT* column_indices;
ValueT* values;

Expand Down Expand Up @@ -479,12 +479,12 @@ struct CsrMatrix
num_cols = coo_matrix.num_cols;
num_nonzeros = coo_matrix.num_nonzeros;

row_offsets = new SizeT[num_rows + 1];
row_offsets = new OffsetT[num_rows + 1];
column_indices = new VertexT[num_nonzeros];
values = new ValueT[num_nonzeros];

VertexT prev_row = -1;
for (SizeT current_edge = 0; current_edge < num_nonzeros; current_edge++)
for (OffsetT current_edge = 0; current_edge < num_nonzeros; current_edge++)
{
VertexT current_row = coo_matrix.coo_tuples[current_edge].row;

Expand Down Expand Up @@ -523,9 +523,9 @@ struct CsrMatrix

// Scan
int max_log_length = -1;
for (SizeT row = 0; row < num_rows; row++)
for (OffsetT row = 0; row < num_rows; row++)
{
SizeT length = row_offsets[row + 1] - row_offsets[row];
OffsetT length = row_offsets[row + 1] - row_offsets[row];

int log_length = -1;
while (length > 0)
Expand All @@ -545,7 +545,6 @@ struct CsrMatrix
{
printf("\tDegree 1e%d: \t%d (%.2f%%)\n", i, log_counts[i + 1], (float) log_counts[i + 1] * 100.0 / num_cols);
}
printf("\n");
fflush(stdout);
}

Expand All @@ -556,10 +555,10 @@ struct CsrMatrix
void Display()
{
cout << "Input Matrix:\n";
for (SizeT row = 0; row < num_rows; row++)
for (OffsetT row = 0; row < num_rows; row++)
{
cout << row << ": ";
for (SizeT current_edge = row_offsets[row]; current_edge < row_offsets[row + 1]; current_edge++)
for (OffsetT current_edge = row_offsets[row]; current_edge < row_offsets[row + 1]; current_edge++)
{
cout << column_indices[current_edge] << " (" << values[current_edge] << "), ";
}
Expand Down
Loading

0 comments on commit e0ede7e

Please sign in to comment.