ViennaCL - The Vienna Computing Library  1.5.0
viennacl/linalg/cuda/misc_operations.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_CUDA_MISC_OPERATIONS_HPP_
00002 #define VIENNACL_LINALG_CUDA_MISC_OPERATIONS_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2013, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00025 #include "viennacl/forwards.h"
00026 #include "viennacl/scalar.hpp"
00027 #include "viennacl/vector.hpp"
00028 #include "viennacl/tools/tools.hpp"
00029 #include "viennacl/linalg/cuda/common.hpp"
00030 
00031 
00032 namespace viennacl
00033 {
00034   namespace linalg
00035   {
00036     namespace cuda
00037     {
00038 
00039       namespace detail
00040       {
00041 
00042         template <typename T>
00043         __global__ void level_scheduling_substitute_kernel(
00044                   const unsigned int * row_index_array,
00045                   const unsigned int * row_indices,
00046                   const unsigned int * column_indices,
00047                   const T * elements,
00048                   T * vec,
00049                   unsigned int size)
00050         {
00051           for (unsigned int row  = blockDim.x * blockIdx.x + threadIdx.x;
00052                             row  < size;
00053                             row += gridDim.x * blockDim.x)
00054           {
00055             unsigned int eq_row = row_index_array[row];
00056             T vec_entry = vec[eq_row];
00057             unsigned int row_end = row_indices[row+1];
00058 
00059             for (unsigned int j = row_indices[row]; j < row_end; ++j)
00060               vec_entry -= vec[column_indices[j]] * elements[j];
00061 
00062             vec[eq_row] = vec_entry;
00063           }
00064         }
00065 
00066 
00067 
00068         template <typename ScalarType>
00069         void level_scheduling_substitute(vector<ScalarType> & vec,
00070                                      viennacl::backend::mem_handle const & row_index_array,
00071                                      viennacl::backend::mem_handle const & row_buffer,
00072                                      viennacl::backend::mem_handle const & col_buffer,
00073                                      viennacl::backend::mem_handle const & element_buffer,
00074                                      vcl_size_t num_rows
00075                                     )
00076         {
00077           level_scheduling_substitute_kernel<<<128, 128>>>(detail::cuda_arg<unsigned int>(row_index_array.cuda_handle()),
00078                                                        detail::cuda_arg<unsigned int>(row_buffer.cuda_handle()),
00079                                                        detail::cuda_arg<unsigned int>(col_buffer.cuda_handle()),
00080                                                        detail::cuda_arg<ScalarType>(element_buffer.cuda_handle()),
00081                                                        detail::cuda_arg<ScalarType>(vec),
00082                                                        static_cast<unsigned int>(num_rows)
00083                                                       );
00084         }
00085 
00086       }
00087 
00088     } // namespace cuda
00089   } //namespace linalg
00090 } //namespace viennacl
00091 
00092 
00093 #endif