ViennaCL - The Vienna Computing Library
1.5.0
|
00001 #ifndef VIENNACL_COORDINATE_MATRIX_HPP_ 00002 #define VIENNACL_COORDINATE_MATRIX_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2013, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include <map> 00026 #include <vector> 00027 #include <list> 00028 00029 #include "viennacl/forwards.h" 00030 #include "viennacl/vector.hpp" 00031 00032 #include "viennacl/linalg/sparse_matrix_operations.hpp" 00033 00034 namespace viennacl 00035 { 00036 00037 00038 //provide copy-operation: 00046 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT> 00047 void copy(const CPU_MATRIX & cpu_matrix, 00048 coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix ) 00049 { 00050 assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); 00051 assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); 00052 00053 vcl_size_t group_num = 64; 00054 00055 // Step 1: Determine nonzeros: 00056 if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 ) 00057 { 00058 vcl_size_t num_entries = 0; 00059 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); 00060 row_it != cpu_matrix.end1(); 00061 ++row_it) 00062 { 00063 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); 00064 col_it != row_it.end(); 00065 ++col_it) 00066 { 00067 ++num_entries; 00068 } 00069 } 00070 00071 // Step 2: Set up matrix data: 00072 gpu_matrix.nonzeros_ = num_entries; 00073 gpu_matrix.rows_ = cpu_matrix.size1(); 00074 gpu_matrix.cols_ = cpu_matrix.size2(); 00075 00076 viennacl::backend::typesafe_host_array<unsigned int> group_boundaries(gpu_matrix.handle3(), group_num + 1); 00077 viennacl::backend::typesafe_host_array<unsigned int> coord_buffer(gpu_matrix.handle12(), 2*gpu_matrix.internal_nnz()); 00078 std::vector<SCALARTYPE> elements(gpu_matrix.internal_nnz()); 00079 00080 vcl_size_t data_index = 0; 00081 vcl_size_t current_fraction = 0; 00082 00083 group_boundaries.set(0, 0); 00084 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); 00085 row_it != cpu_matrix.end1(); 00086 ++row_it) 00087 { 00088 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); 00089 col_it != row_it.end(); 00090 ++col_it) 00091 { 00092 coord_buffer.set(2*data_index, col_it.index1()); 00093 coord_buffer.set(2*data_index + 1, col_it.index2()); 00094 elements[data_index] = *col_it; 00095 ++data_index; 00096 } 00097 00098 while (data_index > (current_fraction + 1) / static_cast<double>(group_num) * num_entries) //split data equally over 64 groups 00099 group_boundaries.set(++current_fraction, data_index); 00100 } 00101 00102 //write end of last group: 00103 group_boundaries.set(group_num, data_index); 00104 //group_boundaries[1] = data_index; //for one compute unit 00105 00106 //std::cout << "Group boundaries: " << std::endl; 00107 //for (vcl_size_t i=0; i<group_boundaries.size(); ++i) 00108 // std::cout << group_boundaries[i] << std::endl; 00109 00110 viennacl::backend::memory_create(gpu_matrix.group_boundaries_, group_boundaries.raw_size(), traits::context(gpu_matrix.group_boundaries_), group_boundaries.get()); 00111 viennacl::backend::memory_create(gpu_matrix.coord_buffer_, coord_buffer.raw_size(), traits::context(gpu_matrix.coord_buffer_), coord_buffer.get()); 00112 viennacl::backend::memory_create(gpu_matrix.elements_, sizeof(SCALARTYPE)*elements.size(), traits::context(gpu_matrix.elements_), &(elements[0])); 00113 } 00114 } 00115 00121 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00122 void copy(const std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix, 00123 coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix ) 00124 { 00125 copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(cpu_matrix, cpu_matrix.size(), cpu_matrix.size()), gpu_matrix); 00126 } 00127 00128 //gpu to cpu: 00138 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT> 00139 void copy(const coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix, 00140 CPU_MATRIX & cpu_matrix ) 00141 { 00142 assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); 00143 assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); 00144 00145 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) 00146 { 00147 //get raw data from memory: 00148 viennacl::backend::typesafe_host_array<unsigned int> coord_buffer(gpu_matrix.handle12(), 2*gpu_matrix.nnz()); 00149 std::vector<SCALARTYPE> elements(gpu_matrix.nnz()); 00150 00151 //std::cout << "GPU nonzeros: " << gpu_matrix.nnz() << std::endl; 00152 00153 viennacl::backend::memory_read(gpu_matrix.handle12(), 0, coord_buffer.raw_size(), coord_buffer.get()); 00154 viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE) * elements.size(), &(elements[0])); 00155 00156 //fill the cpu_matrix: 00157 for (vcl_size_t index = 0; index < gpu_matrix.nnz(); ++index) 00158 cpu_matrix(coord_buffer[2*index], coord_buffer[2*index+1]) = elements[index]; 00159 00160 } 00161 } 00162 00168 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00169 void copy(const coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix, 00170 std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix) 00171 { 00172 tools::sparse_matrix_adapter<SCALARTYPE> temp(cpu_matrix, gpu_matrix.size1(), gpu_matrix.size2()); 00173 copy(gpu_matrix, temp); 00174 } 00175 00176 00178 00185 template<class SCALARTYPE, unsigned int ALIGNMENT /* see forwards.h */ > 00186 class coordinate_matrix 00187 { 00188 public: 00189 typedef viennacl::backend::mem_handle handle_type; 00190 typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType> value_type; 00191 typedef vcl_size_t size_type; 00192 00194 coordinate_matrix() : rows_(0), cols_(0), nonzeros_(0), group_num_(64) {} 00195 00196 explicit coordinate_matrix(viennacl::context ctx) : rows_(0), cols_(0), nonzeros_(0), group_num_(64) 00197 { 00198 group_boundaries_.switch_active_handle_id(ctx.memory_type()); 00199 coord_buffer_.switch_active_handle_id(ctx.memory_type()); 00200 elements_.switch_active_handle_id(ctx.memory_type()); 00201 00202 #ifdef VIENNACL_WITH_OPENCL 00203 if (ctx.memory_type() == OPENCL_MEMORY) 00204 { 00205 group_boundaries_.opencl_handle().context(ctx.opencl_context()); 00206 coord_buffer_.opencl_handle().context(ctx.opencl_context()); 00207 elements_.opencl_handle().context(ctx.opencl_context()); 00208 } 00209 #endif 00210 } 00211 00219 coordinate_matrix(vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzeros = 0, viennacl::context ctx = viennacl::context()) : 00220 rows_(rows), cols_(cols), nonzeros_(nonzeros) 00221 { 00222 if (nonzeros > 0) 00223 { 00224 viennacl::backend::memory_create(group_boundaries_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (group_num_ + 1), ctx); 00225 viennacl::backend::memory_create(coord_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * 2 * internal_nnz(), ctx); 00226 viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE) * internal_nnz(), ctx); 00227 } 00228 else 00229 { 00230 group_boundaries_.switch_active_handle_id(ctx.memory_type()); 00231 coord_buffer_.switch_active_handle_id(ctx.memory_type()); 00232 elements_.switch_active_handle_id(ctx.memory_type()); 00233 00234 #ifdef VIENNACL_WITH_OPENCL 00235 if (ctx.memory_type() == OPENCL_MEMORY) 00236 { 00237 group_boundaries_.opencl_handle().context(ctx.opencl_context()); 00238 coord_buffer_.opencl_handle().context(ctx.opencl_context()); 00239 elements_.opencl_handle().context(ctx.opencl_context()); 00240 } 00241 #endif 00242 } 00243 } 00244 00251 explicit coordinate_matrix(vcl_size_t rows, vcl_size_t cols, viennacl::context ctx) 00252 : rows_(rows), cols_(cols), nonzeros_(0) 00253 { 00254 group_boundaries_.switch_active_handle_id(ctx.memory_type()); 00255 coord_buffer_.switch_active_handle_id(ctx.memory_type()); 00256 elements_.switch_active_handle_id(ctx.memory_type()); 00257 00258 #ifdef VIENNACL_WITH_OPENCL 00259 if (ctx.memory_type() == OPENCL_MEMORY) 00260 { 00261 group_boundaries_.opencl_handle().context(ctx.opencl_context()); 00262 coord_buffer_.opencl_handle().context(ctx.opencl_context()); 00263 elements_.opencl_handle().context(ctx.opencl_context()); 00264 } 00265 #endif 00266 } 00267 00268 00270 void reserve(vcl_size_t new_nonzeros) 00271 { 00272 if (new_nonzeros > nonzeros_) //TODO: Do we need to initialize new memory with zero? 00273 { 00274 handle_type coord_buffer_old; 00275 handle_type elements_old; 00276 viennacl::backend::memory_shallow_copy(coord_buffer_, coord_buffer_old); 00277 viennacl::backend::memory_shallow_copy(elements_, elements_old); 00278 00279 vcl_size_t internal_new_nnz = viennacl::tools::align_to_multiple<vcl_size_t>(new_nonzeros, ALIGNMENT); 00280 viennacl::backend::typesafe_host_array<unsigned int> size_deducer(coord_buffer_); 00281 viennacl::backend::memory_create(coord_buffer_, size_deducer.element_size() * 2 * internal_new_nnz, viennacl::traits::context(coord_buffer_)); 00282 viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE) * internal_new_nnz, viennacl::traits::context(elements_)); 00283 00284 viennacl::backend::memory_copy(coord_buffer_old, coord_buffer_, 0, 0, size_deducer.element_size() * 2 * nonzeros_); 00285 viennacl::backend::memory_copy(elements_old, elements_, 0, 0, sizeof(SCALARTYPE) * nonzeros_); 00286 00287 nonzeros_ = new_nonzeros; 00288 } 00289 } 00290 00297 void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve = true) 00298 { 00299 assert (new_size1 > 0 && new_size2 > 0); 00300 00301 if (new_size1 < rows_ || new_size2 < cols_) //enlarge buffer 00302 { 00303 std::vector<std::map<unsigned int, SCALARTYPE> > stl_sparse_matrix; 00304 if (rows_ > 0) 00305 stl_sparse_matrix.resize(rows_); 00306 00307 if (preserve && rows_ > 0) 00308 viennacl::copy(*this, stl_sparse_matrix); 00309 00310 stl_sparse_matrix.resize(new_size1); 00311 00312 //std::cout << "Cropping STL matrix of size " << stl_sparse_matrix.size() << std::endl; 00313 if (new_size2 < cols_ && rows_ > 0) 00314 { 00315 for (vcl_size_t i=0; i<stl_sparse_matrix.size(); ++i) 00316 { 00317 std::list<unsigned int> to_delete; 00318 for (typename std::map<unsigned int, SCALARTYPE>::iterator it = stl_sparse_matrix[i].begin(); 00319 it != stl_sparse_matrix[i].end(); 00320 ++it) 00321 { 00322 if (it->first >= new_size2) 00323 to_delete.push_back(it->first); 00324 } 00325 00326 for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it) 00327 stl_sparse_matrix[i].erase(*it); 00328 } 00329 //std::cout << "Cropping done..." << std::endl; 00330 } 00331 00332 rows_ = new_size1; 00333 cols_ = new_size2; 00334 viennacl::copy(stl_sparse_matrix, *this); 00335 } 00336 00337 rows_ = new_size1; 00338 cols_ = new_size2; 00339 } 00340 00341 00343 vcl_size_t size1() const { return rows_; } 00345 vcl_size_t size2() const { return cols_; } 00347 vcl_size_t nnz() const { return nonzeros_; } 00349 vcl_size_t internal_nnz() const { return viennacl::tools::align_to_multiple<vcl_size_t>(nonzeros_, ALIGNMENT); } 00350 00352 const handle_type & handle12() const { return coord_buffer_; } 00354 const handle_type & handle() const { return elements_; } 00356 const handle_type & handle3() const { return group_boundaries_; } 00357 00358 vcl_size_t groups() const { return group_num_; } 00359 00360 #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment 00361 template <typename CPU_MATRIX> 00362 friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix & gpu_matrix ); 00363 #else 00364 template <typename CPU_MATRIX, typename SCALARTYPE2, unsigned int ALIGNMENT2> 00365 friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix<SCALARTYPE2, ALIGNMENT2> & gpu_matrix ); 00366 #endif 00367 00368 private: 00370 coordinate_matrix(coordinate_matrix const &); 00371 00373 coordinate_matrix & operator=(coordinate_matrix const &); 00374 00375 00376 vcl_size_t rows_; 00377 vcl_size_t cols_; 00378 vcl_size_t nonzeros_; 00379 vcl_size_t group_num_; 00380 handle_type coord_buffer_; 00381 handle_type elements_; 00382 handle_type group_boundaries_; 00383 }; 00384 00385 00386 // 00387 // Specify available operations: 00388 // 00389 00392 namespace linalg 00393 { 00394 namespace detail 00395 { 00396 // x = A * y 00397 template <typename T, unsigned int A> 00398 struct op_executor<vector_base<T>, op_assign, vector_expression<const coordinate_matrix<T, A>, const vector_base<T>, op_prod> > 00399 { 00400 static void apply(vector_base<T> & lhs, vector_expression<const coordinate_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) 00401 { 00402 // check for the special case x = A * x 00403 if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) 00404 { 00405 viennacl::vector<T> temp(lhs); 00406 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); 00407 lhs = temp; 00408 } 00409 else 00410 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); 00411 } 00412 }; 00413 00414 template <typename T, unsigned int A> 00415 struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const coordinate_matrix<T, A>, const vector_base<T>, op_prod> > 00416 { 00417 static void apply(vector_base<T> & lhs, vector_expression<const coordinate_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) 00418 { 00419 viennacl::vector<T> temp(lhs); 00420 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); 00421 lhs += temp; 00422 } 00423 }; 00424 00425 template <typename T, unsigned int A> 00426 struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const coordinate_matrix<T, A>, const vector_base<T>, op_prod> > 00427 { 00428 static void apply(vector_base<T> & lhs, vector_expression<const coordinate_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) 00429 { 00430 viennacl::vector<T> temp(lhs); 00431 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); 00432 lhs -= temp; 00433 } 00434 }; 00435 00436 00437 // x = A * vec_op 00438 template <typename T, unsigned int A, typename LHS, typename RHS, typename OP> 00439 struct op_executor<vector_base<T>, op_assign, vector_expression<const coordinate_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > 00440 { 00441 static void apply(vector_base<T> & lhs, vector_expression<const coordinate_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) 00442 { 00443 viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); 00444 viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); 00445 } 00446 }; 00447 00448 // x += A * vec_op 00449 template <typename T, unsigned int A, typename LHS, typename RHS, typename OP> 00450 struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const coordinate_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > 00451 { 00452 static void apply(vector_base<T> & lhs, vector_expression<const coordinate_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) 00453 { 00454 viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); 00455 viennacl::vector<T> temp_result(lhs); 00456 viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); 00457 lhs += temp_result; 00458 } 00459 }; 00460 00461 // x -= A * vec_op 00462 template <typename T, unsigned int A, typename LHS, typename RHS, typename OP> 00463 struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const coordinate_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > 00464 { 00465 static void apply(vector_base<T> & lhs, vector_expression<const coordinate_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) 00466 { 00467 viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); 00468 viennacl::vector<T> temp_result(lhs); 00469 viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); 00470 lhs -= temp_result; 00471 } 00472 }; 00473 00474 } // namespace detail 00475 } // namespace linalg 00476 00478 } 00479 00480 #endif