ViennaCL - The Vienna Computing Library  1.5.0
viennacl/hyb_matrix.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_HYB_MATRIX_HPP_
00002 #define VIENNACL_HYB_MATRIX_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2013, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00027 #include "viennacl/forwards.h"
00028 #include "viennacl/vector.hpp"
00029 
00030 #include "viennacl/tools/tools.hpp"
00031 
00032 #include "viennacl/linalg/sparse_matrix_operations.hpp"
00033 
00034 namespace viennacl
00035 {
00037     template<typename SCALARTYPE, unsigned int ALIGNMENT  /* see forwards.h for default argument */>
00038     class hyb_matrix
00039     {
00040       public:
00041         typedef viennacl::backend::mem_handle                                                              handle_type;
00042         typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType>   value_type;
00043 
00044         hyb_matrix() : csr_threshold_(SCALARTYPE(0.8)), rows_(0), cols_(0) {}
00045 
00046         hyb_matrix(viennacl::context ctx) : csr_threshold_(SCALARTYPE(0.8)), rows_(0), cols_(0)
00047         {
00048             ell_coords_.switch_active_handle_id(ctx.memory_type());
00049           ell_elements_.switch_active_handle_id(ctx.memory_type());
00050 
00051               csr_rows_.switch_active_handle_id(ctx.memory_type());
00052               csr_cols_.switch_active_handle_id(ctx.memory_type());
00053           csr_elements_.switch_active_handle_id(ctx.memory_type());
00054 
00055 #ifdef VIENNACL_WITH_OPENCL
00056           if (ctx.memory_type() == OPENCL_MEMORY)
00057           {
00058               ell_coords_.opencl_handle().context(ctx.opencl_context());
00059             ell_elements_.opencl_handle().context(ctx.opencl_context());
00060 
00061                 csr_rows_.opencl_handle().context(ctx.opencl_context());
00062                 csr_cols_.opencl_handle().context(ctx.opencl_context());
00063             csr_elements_.opencl_handle().context(ctx.opencl_context());
00064           }
00065 #endif
00066         }
00067 
00068         SCALARTYPE  csr_threshold()  const { return csr_threshold_; }
00069         void csr_threshold(SCALARTYPE thr) { csr_threshold_ = thr; }
00070 
00071         vcl_size_t internal_size1() const { return viennacl::tools::align_to_multiple<vcl_size_t>(rows_, ALIGNMENT); }
00072         vcl_size_t internal_size2() const { return viennacl::tools::align_to_multiple<vcl_size_t>(cols_, ALIGNMENT); }
00073 
00074         vcl_size_t size1() const { return rows_; }
00075         vcl_size_t size2() const { return cols_; }
00076 
00077         vcl_size_t internal_ellnnz() const {return viennacl::tools::align_to_multiple<vcl_size_t>(ellnnz_, ALIGNMENT); }
00078         vcl_size_t ell_nnz() const { return ellnnz_; }
00079         vcl_size_t csr_nnz() const { return csrnnz_; }
00080 
00081         const handle_type & handle() const { return ell_elements_; }
00082         const handle_type & handle2() const { return ell_coords_; }
00083         const handle_type & handle3() const { return csr_rows_; }
00084         const handle_type & handle4() const { return csr_cols_; }
00085         const handle_type & handle5() const { return csr_elements_; }
00086 
00087       public:
00088       #if defined(_MSC_VER) && _MSC_VER < 1500          //Visual Studio 2005 needs special treatment
00089         template <typename CPU_MATRIX>
00090         friend void copy(const CPU_MATRIX & cpu_matrix, hyb_matrix & gpu_matrix );
00091       #else
00092         template <typename CPU_MATRIX, typename T, unsigned int ALIGN>
00093         friend void copy(const CPU_MATRIX & cpu_matrix, hyb_matrix<T, ALIGN> & gpu_matrix );
00094       #endif
00095 
00096       private:
00097         SCALARTYPE  csr_threshold_;
00098         vcl_size_t rows_;
00099         vcl_size_t cols_;
00100         vcl_size_t ellnnz_;
00101         vcl_size_t csrnnz_;
00102 
00103         handle_type ell_coords_; // ell coords
00104         handle_type ell_elements_; // ell elements
00105 
00106         handle_type csr_rows_;
00107         handle_type csr_cols_;
00108         handle_type csr_elements_;
00109     };
00110 
00111     template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00112     void copy(const CPU_MATRIX& cpu_matrix, hyb_matrix<SCALARTYPE, ALIGNMENT>& gpu_matrix )
00113     {
00114       assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
00115       assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
00116 
00117       if(cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0)
00118       {
00119         //determine max capacity for row
00120         vcl_size_t max_entries_per_row = 0;
00121         std::vector<vcl_size_t> hist_entries(cpu_matrix.size1() + 1, 0);
00122 
00123         for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
00124         {
00125             vcl_size_t num_entries = 0;
00126             for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
00127             {
00128                 ++num_entries;
00129             }
00130 
00131             hist_entries[num_entries] += 1;
00132             max_entries_per_row = std::max(max_entries_per_row, num_entries);
00133         }
00134 
00135         vcl_size_t sum = 0;
00136         for(vcl_size_t ind = 0; ind <= max_entries_per_row; ind++)
00137         {
00138             sum += hist_entries[ind];
00139 
00140             if(sum >= gpu_matrix.csr_threshold() * cpu_matrix.size1())
00141             {
00142                 max_entries_per_row = ind;
00143                 break;
00144             }
00145             }
00146 
00147         //setup GPU matrix
00148         gpu_matrix.ellnnz_ = max_entries_per_row;
00149         gpu_matrix.rows_ = cpu_matrix.size1();
00150         gpu_matrix.cols_ = cpu_matrix.size2();
00151 
00152         vcl_size_t nnz = gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz();
00153 
00154         viennacl::backend::typesafe_host_array<unsigned int>  ell_coords(gpu_matrix.ell_coords_, nnz);
00155         viennacl::backend::typesafe_host_array<unsigned int>  csr_rows(gpu_matrix.csr_rows_, cpu_matrix.size1() + 1);
00156         std::vector<unsigned int> csr_cols;
00157 
00158         std::vector<SCALARTYPE> ell_elements(nnz);
00159         std::vector<SCALARTYPE> csr_elements;
00160 
00161         vcl_size_t csr_index = 0;
00162 
00163         for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
00164         {
00165           vcl_size_t data_index = 0;
00166 
00167           csr_rows.set(row_it.index1(), csr_index);
00168 
00169           for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
00170           {
00171             if(data_index < max_entries_per_row)
00172             {
00173                 ell_coords.set(gpu_matrix.internal_size1() * data_index + col_it.index1(), col_it.index2());
00174                 ell_elements[gpu_matrix.internal_size1() * data_index + col_it.index1()] = *col_it;
00175             }
00176             else
00177             {
00178                 csr_cols.push_back(static_cast<unsigned int>(col_it.index2()));
00179                 csr_elements.push_back(*col_it);
00180 
00181                 csr_index++;
00182             }
00183 
00184             data_index++;
00185           }
00186 
00187         }
00188 
00189         if(csr_cols.empty())
00190         {
00191           csr_cols.push_back(0);
00192           csr_elements.push_back(0);
00193         }
00194 
00195         csr_rows.set(csr_rows.size() - 1, csr_index);
00196 
00197         gpu_matrix.csrnnz_ = csr_cols.size();
00198 
00199         viennacl::backend::typesafe_host_array<unsigned int> csr_cols_for_gpu(gpu_matrix.csr_cols_, csr_cols.size());
00200         for (vcl_size_t i=0; i<csr_cols.size(); ++i)
00201           csr_cols_for_gpu.set(i, csr_cols[i]);
00202 
00203         viennacl::backend::memory_create(gpu_matrix.ell_coords_,   ell_coords.raw_size(),                    traits::context(gpu_matrix.ell_coords_), ell_coords.get());
00204         viennacl::backend::memory_create(gpu_matrix.ell_elements_, sizeof(SCALARTYPE) * ell_elements.size(), traits::context(gpu_matrix.ell_elements_), &(ell_elements[0]));
00205 
00206         viennacl::backend::memory_create(gpu_matrix.csr_rows_,     csr_rows.raw_size(),                      traits::context(gpu_matrix.csr_rows_), csr_rows.get());
00207         viennacl::backend::memory_create(gpu_matrix.csr_cols_,     csr_cols_for_gpu.raw_size(),              traits::context(gpu_matrix.csr_cols_), csr_cols_for_gpu.get());
00208         viennacl::backend::memory_create(gpu_matrix.csr_elements_, sizeof(SCALARTYPE) * csr_elements.size(), traits::context(gpu_matrix.csr_elements_), &(csr_elements[0]));
00209       }
00210     }
00211 
00212     template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00213     void copy(const hyb_matrix<SCALARTYPE, ALIGNMENT>& gpu_matrix, CPU_MATRIX& cpu_matrix)
00214     {
00215       assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
00216       assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
00217 
00218       if(gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0)
00219       {
00220         std::vector<SCALARTYPE> ell_elements(gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz());
00221         viennacl::backend::typesafe_host_array<unsigned int> ell_coords(gpu_matrix.handle2(), gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz());
00222 
00223         std::vector<SCALARTYPE> csr_elements(gpu_matrix.csr_nnz());
00224         viennacl::backend::typesafe_host_array<unsigned int> csr_rows(gpu_matrix.handle3(), gpu_matrix.size1() + 1);
00225         viennacl::backend::typesafe_host_array<unsigned int> csr_cols(gpu_matrix.handle4(), gpu_matrix.csr_nnz());
00226 
00227         viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE) * ell_elements.size(), &(ell_elements[0]));
00228         viennacl::backend::memory_read(gpu_matrix.handle2(), 0, ell_coords.raw_size(), ell_coords.get());
00229         viennacl::backend::memory_read(gpu_matrix.handle3(), 0, csr_rows.raw_size(),   csr_rows.get());
00230         viennacl::backend::memory_read(gpu_matrix.handle4(), 0, csr_cols.raw_size(),   csr_cols.get());
00231         viennacl::backend::memory_read(gpu_matrix.handle5(), 0, sizeof(SCALARTYPE) * csr_elements.size(), &(csr_elements[0]));
00232 
00233 
00234         for(vcl_size_t row = 0; row < gpu_matrix.size1(); row++)
00235         {
00236           for(vcl_size_t ind = 0; ind < gpu_matrix.internal_ellnnz(); ind++)
00237           {
00238             vcl_size_t offset = gpu_matrix.internal_size1() * ind + row;
00239 
00240             if(ell_elements[offset] == static_cast<SCALARTYPE>(0.0))
00241             {
00242               continue;
00243             }
00244 
00245             if(ell_coords[offset] >= gpu_matrix.size2())
00246             {
00247               std::cerr << "ViennaCL encountered invalid data " << offset << " " << ind << " " << row << " " << ell_coords[offset] << " " << gpu_matrix.size2() << std::endl;
00248               return;
00249             }
00250 
00251             cpu_matrix(row, ell_coords[offset]) = ell_elements[offset];
00252           }
00253 
00254           for(vcl_size_t ind = csr_rows[row]; ind < csr_rows[row+1]; ind++)
00255           {
00256             if(csr_elements[ind] == static_cast<SCALARTYPE>(0.0))
00257             {
00258               continue;
00259             }
00260 
00261             if(csr_cols[ind] >= gpu_matrix.size2())
00262             {
00263               std::cerr << "ViennaCL encountered invalid data " << std::endl;
00264               return;
00265             }
00266 
00267             cpu_matrix(row, csr_cols[ind]) = csr_elements[ind];
00268           }
00269         }
00270       }
00271     }
00272 
00273 
00274     //
00275     // Specify available operations:
00276     //
00277 
00280     namespace linalg
00281     {
00282       namespace detail
00283       {
00284         // x = A * y
00285         template <typename T, unsigned int A>
00286         struct op_executor<vector_base<T>, op_assign, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> >
00287         {
00288             static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
00289             {
00290               // check for the special case x = A * x
00291               if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
00292               {
00293                 viennacl::vector<T> temp(lhs);
00294                 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
00295                 lhs = temp;
00296               }
00297               else
00298                 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
00299             }
00300         };
00301 
00302         template <typename T, unsigned int A>
00303         struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> >
00304         {
00305             static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
00306             {
00307               viennacl::vector<T> temp(lhs);
00308               viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
00309               lhs += temp;
00310             }
00311         };
00312 
00313         template <typename T, unsigned int A>
00314         struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> >
00315         {
00316             static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
00317             {
00318               viennacl::vector<T> temp(lhs);
00319               viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
00320               lhs -= temp;
00321             }
00322         };
00323 
00324 
00325         // x = A * vec_op
00326         template <typename T, unsigned int A, typename LHS, typename RHS, typename OP>
00327         struct op_executor<vector_base<T>, op_assign, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
00328         {
00329             static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
00330             {
00331               viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
00332               viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs);
00333             }
00334         };
00335 
00336         // x = A * vec_op
00337         template <typename T, unsigned int A, typename LHS, typename RHS, typename OP>
00338         struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
00339         {
00340             static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
00341             {
00342               viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
00343               viennacl::vector<T> temp_result(lhs);
00344               viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
00345               lhs += temp_result;
00346             }
00347         };
00348 
00349         // x = A * vec_op
00350         template <typename T, unsigned int A, typename LHS, typename RHS, typename OP>
00351         struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
00352         {
00353             static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
00354             {
00355               viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
00356               viennacl::vector<T> temp_result(lhs);
00357               viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
00358               lhs -= temp_result;
00359             }
00360         };
00361 
00362       } // namespace detail
00363     } // namespace linalg
00364 
00366 }
00367 
00368 #endif