ViennaCL - The Vienna Computing Library
1.5.0
|
00001 #ifndef VIENNACL_HYB_MATRIX_HPP_ 00002 #define VIENNACL_HYB_MATRIX_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2013, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00027 #include "viennacl/forwards.h" 00028 #include "viennacl/vector.hpp" 00029 00030 #include "viennacl/tools/tools.hpp" 00031 00032 #include "viennacl/linalg/sparse_matrix_operations.hpp" 00033 00034 namespace viennacl 00035 { 00037 template<typename SCALARTYPE, unsigned int ALIGNMENT /* see forwards.h for default argument */> 00038 class hyb_matrix 00039 { 00040 public: 00041 typedef viennacl::backend::mem_handle handle_type; 00042 typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType> value_type; 00043 00044 hyb_matrix() : csr_threshold_(SCALARTYPE(0.8)), rows_(0), cols_(0) {} 00045 00046 hyb_matrix(viennacl::context ctx) : csr_threshold_(SCALARTYPE(0.8)), rows_(0), cols_(0) 00047 { 00048 ell_coords_.switch_active_handle_id(ctx.memory_type()); 00049 ell_elements_.switch_active_handle_id(ctx.memory_type()); 00050 00051 csr_rows_.switch_active_handle_id(ctx.memory_type()); 00052 csr_cols_.switch_active_handle_id(ctx.memory_type()); 00053 csr_elements_.switch_active_handle_id(ctx.memory_type()); 00054 00055 #ifdef VIENNACL_WITH_OPENCL 00056 if (ctx.memory_type() == OPENCL_MEMORY) 00057 { 00058 ell_coords_.opencl_handle().context(ctx.opencl_context()); 00059 ell_elements_.opencl_handle().context(ctx.opencl_context()); 00060 00061 csr_rows_.opencl_handle().context(ctx.opencl_context()); 00062 csr_cols_.opencl_handle().context(ctx.opencl_context()); 00063 csr_elements_.opencl_handle().context(ctx.opencl_context()); 00064 } 00065 #endif 00066 } 00067 00068 SCALARTYPE csr_threshold() const { return csr_threshold_; } 00069 void csr_threshold(SCALARTYPE thr) { csr_threshold_ = thr; } 00070 00071 vcl_size_t internal_size1() const { return viennacl::tools::align_to_multiple<vcl_size_t>(rows_, ALIGNMENT); } 00072 vcl_size_t internal_size2() const { return viennacl::tools::align_to_multiple<vcl_size_t>(cols_, ALIGNMENT); } 00073 00074 vcl_size_t size1() const { return rows_; } 00075 vcl_size_t size2() const { return cols_; } 00076 00077 vcl_size_t internal_ellnnz() const {return viennacl::tools::align_to_multiple<vcl_size_t>(ellnnz_, ALIGNMENT); } 00078 vcl_size_t ell_nnz() const { return ellnnz_; } 00079 vcl_size_t csr_nnz() const { return csrnnz_; } 00080 00081 const handle_type & handle() const { return ell_elements_; } 00082 const handle_type & handle2() const { return ell_coords_; } 00083 const handle_type & handle3() const { return csr_rows_; } 00084 const handle_type & handle4() const { return csr_cols_; } 00085 const handle_type & handle5() const { return csr_elements_; } 00086 00087 public: 00088 #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment 00089 template <typename CPU_MATRIX> 00090 friend void copy(const CPU_MATRIX & cpu_matrix, hyb_matrix & gpu_matrix ); 00091 #else 00092 template <typename CPU_MATRIX, typename T, unsigned int ALIGN> 00093 friend void copy(const CPU_MATRIX & cpu_matrix, hyb_matrix<T, ALIGN> & gpu_matrix ); 00094 #endif 00095 00096 private: 00097 SCALARTYPE csr_threshold_; 00098 vcl_size_t rows_; 00099 vcl_size_t cols_; 00100 vcl_size_t ellnnz_; 00101 vcl_size_t csrnnz_; 00102 00103 handle_type ell_coords_; // ell coords 00104 handle_type ell_elements_; // ell elements 00105 00106 handle_type csr_rows_; 00107 handle_type csr_cols_; 00108 handle_type csr_elements_; 00109 }; 00110 00111 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT> 00112 void copy(const CPU_MATRIX& cpu_matrix, hyb_matrix<SCALARTYPE, ALIGNMENT>& gpu_matrix ) 00113 { 00114 assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); 00115 assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); 00116 00117 if(cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0) 00118 { 00119 //determine max capacity for row 00120 vcl_size_t max_entries_per_row = 0; 00121 std::vector<vcl_size_t> hist_entries(cpu_matrix.size1() + 1, 0); 00122 00123 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) 00124 { 00125 vcl_size_t num_entries = 0; 00126 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) 00127 { 00128 ++num_entries; 00129 } 00130 00131 hist_entries[num_entries] += 1; 00132 max_entries_per_row = std::max(max_entries_per_row, num_entries); 00133 } 00134 00135 vcl_size_t sum = 0; 00136 for(vcl_size_t ind = 0; ind <= max_entries_per_row; ind++) 00137 { 00138 sum += hist_entries[ind]; 00139 00140 if(sum >= gpu_matrix.csr_threshold() * cpu_matrix.size1()) 00141 { 00142 max_entries_per_row = ind; 00143 break; 00144 } 00145 } 00146 00147 //setup GPU matrix 00148 gpu_matrix.ellnnz_ = max_entries_per_row; 00149 gpu_matrix.rows_ = cpu_matrix.size1(); 00150 gpu_matrix.cols_ = cpu_matrix.size2(); 00151 00152 vcl_size_t nnz = gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz(); 00153 00154 viennacl::backend::typesafe_host_array<unsigned int> ell_coords(gpu_matrix.ell_coords_, nnz); 00155 viennacl::backend::typesafe_host_array<unsigned int> csr_rows(gpu_matrix.csr_rows_, cpu_matrix.size1() + 1); 00156 std::vector<unsigned int> csr_cols; 00157 00158 std::vector<SCALARTYPE> ell_elements(nnz); 00159 std::vector<SCALARTYPE> csr_elements; 00160 00161 vcl_size_t csr_index = 0; 00162 00163 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) 00164 { 00165 vcl_size_t data_index = 0; 00166 00167 csr_rows.set(row_it.index1(), csr_index); 00168 00169 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) 00170 { 00171 if(data_index < max_entries_per_row) 00172 { 00173 ell_coords.set(gpu_matrix.internal_size1() * data_index + col_it.index1(), col_it.index2()); 00174 ell_elements[gpu_matrix.internal_size1() * data_index + col_it.index1()] = *col_it; 00175 } 00176 else 00177 { 00178 csr_cols.push_back(static_cast<unsigned int>(col_it.index2())); 00179 csr_elements.push_back(*col_it); 00180 00181 csr_index++; 00182 } 00183 00184 data_index++; 00185 } 00186 00187 } 00188 00189 if(csr_cols.empty()) 00190 { 00191 csr_cols.push_back(0); 00192 csr_elements.push_back(0); 00193 } 00194 00195 csr_rows.set(csr_rows.size() - 1, csr_index); 00196 00197 gpu_matrix.csrnnz_ = csr_cols.size(); 00198 00199 viennacl::backend::typesafe_host_array<unsigned int> csr_cols_for_gpu(gpu_matrix.csr_cols_, csr_cols.size()); 00200 for (vcl_size_t i=0; i<csr_cols.size(); ++i) 00201 csr_cols_for_gpu.set(i, csr_cols[i]); 00202 00203 viennacl::backend::memory_create(gpu_matrix.ell_coords_, ell_coords.raw_size(), traits::context(gpu_matrix.ell_coords_), ell_coords.get()); 00204 viennacl::backend::memory_create(gpu_matrix.ell_elements_, sizeof(SCALARTYPE) * ell_elements.size(), traits::context(gpu_matrix.ell_elements_), &(ell_elements[0])); 00205 00206 viennacl::backend::memory_create(gpu_matrix.csr_rows_, csr_rows.raw_size(), traits::context(gpu_matrix.csr_rows_), csr_rows.get()); 00207 viennacl::backend::memory_create(gpu_matrix.csr_cols_, csr_cols_for_gpu.raw_size(), traits::context(gpu_matrix.csr_cols_), csr_cols_for_gpu.get()); 00208 viennacl::backend::memory_create(gpu_matrix.csr_elements_, sizeof(SCALARTYPE) * csr_elements.size(), traits::context(gpu_matrix.csr_elements_), &(csr_elements[0])); 00209 } 00210 } 00211 00212 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT> 00213 void copy(const hyb_matrix<SCALARTYPE, ALIGNMENT>& gpu_matrix, CPU_MATRIX& cpu_matrix) 00214 { 00215 assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); 00216 assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); 00217 00218 if(gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0) 00219 { 00220 std::vector<SCALARTYPE> ell_elements(gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz()); 00221 viennacl::backend::typesafe_host_array<unsigned int> ell_coords(gpu_matrix.handle2(), gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz()); 00222 00223 std::vector<SCALARTYPE> csr_elements(gpu_matrix.csr_nnz()); 00224 viennacl::backend::typesafe_host_array<unsigned int> csr_rows(gpu_matrix.handle3(), gpu_matrix.size1() + 1); 00225 viennacl::backend::typesafe_host_array<unsigned int> csr_cols(gpu_matrix.handle4(), gpu_matrix.csr_nnz()); 00226 00227 viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE) * ell_elements.size(), &(ell_elements[0])); 00228 viennacl::backend::memory_read(gpu_matrix.handle2(), 0, ell_coords.raw_size(), ell_coords.get()); 00229 viennacl::backend::memory_read(gpu_matrix.handle3(), 0, csr_rows.raw_size(), csr_rows.get()); 00230 viennacl::backend::memory_read(gpu_matrix.handle4(), 0, csr_cols.raw_size(), csr_cols.get()); 00231 viennacl::backend::memory_read(gpu_matrix.handle5(), 0, sizeof(SCALARTYPE) * csr_elements.size(), &(csr_elements[0])); 00232 00233 00234 for(vcl_size_t row = 0; row < gpu_matrix.size1(); row++) 00235 { 00236 for(vcl_size_t ind = 0; ind < gpu_matrix.internal_ellnnz(); ind++) 00237 { 00238 vcl_size_t offset = gpu_matrix.internal_size1() * ind + row; 00239 00240 if(ell_elements[offset] == static_cast<SCALARTYPE>(0.0)) 00241 { 00242 continue; 00243 } 00244 00245 if(ell_coords[offset] >= gpu_matrix.size2()) 00246 { 00247 std::cerr << "ViennaCL encountered invalid data " << offset << " " << ind << " " << row << " " << ell_coords[offset] << " " << gpu_matrix.size2() << std::endl; 00248 return; 00249 } 00250 00251 cpu_matrix(row, ell_coords[offset]) = ell_elements[offset]; 00252 } 00253 00254 for(vcl_size_t ind = csr_rows[row]; ind < csr_rows[row+1]; ind++) 00255 { 00256 if(csr_elements[ind] == static_cast<SCALARTYPE>(0.0)) 00257 { 00258 continue; 00259 } 00260 00261 if(csr_cols[ind] >= gpu_matrix.size2()) 00262 { 00263 std::cerr << "ViennaCL encountered invalid data " << std::endl; 00264 return; 00265 } 00266 00267 cpu_matrix(row, csr_cols[ind]) = csr_elements[ind]; 00268 } 00269 } 00270 } 00271 } 00272 00273 00274 // 00275 // Specify available operations: 00276 // 00277 00280 namespace linalg 00281 { 00282 namespace detail 00283 { 00284 // x = A * y 00285 template <typename T, unsigned int A> 00286 struct op_executor<vector_base<T>, op_assign, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> > 00287 { 00288 static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) 00289 { 00290 // check for the special case x = A * x 00291 if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) 00292 { 00293 viennacl::vector<T> temp(lhs); 00294 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); 00295 lhs = temp; 00296 } 00297 else 00298 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); 00299 } 00300 }; 00301 00302 template <typename T, unsigned int A> 00303 struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> > 00304 { 00305 static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) 00306 { 00307 viennacl::vector<T> temp(lhs); 00308 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); 00309 lhs += temp; 00310 } 00311 }; 00312 00313 template <typename T, unsigned int A> 00314 struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> > 00315 { 00316 static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) 00317 { 00318 viennacl::vector<T> temp(lhs); 00319 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); 00320 lhs -= temp; 00321 } 00322 }; 00323 00324 00325 // x = A * vec_op 00326 template <typename T, unsigned int A, typename LHS, typename RHS, typename OP> 00327 struct op_executor<vector_base<T>, op_assign, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > 00328 { 00329 static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) 00330 { 00331 viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); 00332 viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); 00333 } 00334 }; 00335 00336 // x = A * vec_op 00337 template <typename T, unsigned int A, typename LHS, typename RHS, typename OP> 00338 struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > 00339 { 00340 static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) 00341 { 00342 viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); 00343 viennacl::vector<T> temp_result(lhs); 00344 viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); 00345 lhs += temp_result; 00346 } 00347 }; 00348 00349 // x = A * vec_op 00350 template <typename T, unsigned int A, typename LHS, typename RHS, typename OP> 00351 struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > 00352 { 00353 static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) 00354 { 00355 viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); 00356 viennacl::vector<T> temp_result(lhs); 00357 viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); 00358 lhs -= temp_result; 00359 } 00360 }; 00361 00362 } // namespace detail 00363 } // namespace linalg 00364 00366 } 00367 00368 #endif