ViennaCL - The Vienna Computing Library
1.5.0
|
00001 #ifndef VIENNACL_MATRIX_PROXY_HPP_ 00002 #define VIENNACL_MATRIX_PROXY_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2013, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include "viennacl/forwards.h" 00026 #include "viennacl/range.hpp" 00027 #include "viennacl/matrix.hpp" 00028 #include "viennacl/linalg/matrix_operations.hpp" 00029 00030 namespace viennacl 00031 { 00032 00037 template <typename MatrixType> 00038 class matrix_range : public matrix_base<typename MatrixType::cpu_value_type, typename MatrixType::orientation_functor> 00039 { 00040 typedef matrix_base<typename MatrixType::cpu_value_type, 00041 typename MatrixType::orientation_functor> base_type; 00042 typedef matrix_range<MatrixType> self_type; 00043 00044 public: 00045 typedef typename MatrixType::orientation_category orientation_category; 00046 00047 typedef typename MatrixType::value_type value_type; 00048 typedef typename viennacl::result_of::cpu_value_type<value_type>::type cpu_value_type; 00049 typedef range::size_type size_type; 00050 typedef range::difference_type difference_type; 00051 typedef value_type reference; 00052 typedef const value_type & const_reference; 00053 00054 matrix_range(MatrixType & A, 00055 range const & row_range, 00056 range const & col_range) : base_type(A.handle(), 00057 row_range.size(), row_range.start(), 1, A.internal_size1(), 00058 col_range.size(), col_range.start(), 1, A.internal_size2()) {} 00059 00060 using base_type::operator=; 00061 00062 }; 00063 00064 00068 00069 //row_major: 00070 template <typename CPU_MATRIX, typename SCALARTYPE> 00071 void copy(const CPU_MATRIX & cpu_matrix, 00072 matrix_range<matrix<SCALARTYPE, row_major, 1> > & gpu_matrix_range ) 00073 { 00074 assert( (cpu_matrix.size1() == gpu_matrix_range.size1()) 00075 && (cpu_matrix.size2() == gpu_matrix_range.size2()) 00076 && bool("Matrix size mismatch!")); 00077 00078 if ( gpu_matrix_range.start2() != 0) 00079 { 00080 std::vector<SCALARTYPE> entries(gpu_matrix_range.size2()); 00081 00082 //copy each stride separately: 00083 for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i) 00084 { 00085 for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j) 00086 entries[j] = cpu_matrix(i,j); 00087 00088 vcl_size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.internal_size2() + gpu_matrix_range.start2(); 00089 vcl_size_t num_entries = gpu_matrix_range.size2(); 00090 viennacl::backend::memory_write(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); 00091 //std::cout << "Strided copy worked!" << std::endl; 00092 } 00093 } 00094 else 00095 { 00096 //full block can be copied: 00097 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.internal_size2()); 00098 00099 //copy each stride separately: 00100 for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i) 00101 for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j) 00102 entries[i*gpu_matrix_range.internal_size2() + j] = cpu_matrix(i,j); 00103 00104 vcl_size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.internal_size2(); 00105 vcl_size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.internal_size2(); 00106 viennacl::backend::memory_write(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); 00107 //std::cout << "Block copy worked!" << std::endl; 00108 } 00109 } 00110 00111 //column_major: 00112 template <typename CPU_MATRIX, typename SCALARTYPE> 00113 void copy(const CPU_MATRIX & cpu_matrix, 00114 matrix_range<matrix<SCALARTYPE, column_major, 1> > & gpu_matrix_range ) 00115 { 00116 assert( (cpu_matrix.size1() == gpu_matrix_range.size1()) 00117 && (cpu_matrix.size2() == gpu_matrix_range.size2()) 00118 && bool("Matrix size mismatch!")); 00119 00120 if ( gpu_matrix_range.start1() != 0 || gpu_matrix_range.size1() != gpu_matrix_range.size1()) 00121 { 00122 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()); 00123 00124 //copy each stride separately: 00125 for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j) 00126 { 00127 for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i) 00128 entries[i] = cpu_matrix(i,j); 00129 00130 vcl_size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.internal_size1() + gpu_matrix_range.start1(); 00131 vcl_size_t num_entries = gpu_matrix_range.size1(); 00132 viennacl::backend::memory_write(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); 00133 //std::cout << "Strided copy worked!" << std::endl; 00134 } 00135 } 00136 else 00137 { 00138 //full block can be copied: 00139 std::vector<SCALARTYPE> entries(gpu_matrix_range.internal_size1()*gpu_matrix_range.size2()); 00140 00141 //copy each stride separately: 00142 for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i) 00143 for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j) 00144 entries[i + j*gpu_matrix_range.internal_size1()] = cpu_matrix(i,j); 00145 00146 vcl_size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.internal_size1(); 00147 vcl_size_t num_entries = gpu_matrix_range.internal_size1() * gpu_matrix_range.size2(); 00148 viennacl::backend::memory_write(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); 00149 //std::cout << "Block copy worked!" << std::endl; 00150 } 00151 00152 } 00153 00154 00158 00159 00160 //row_major: 00161 template <typename CPU_MATRIX, typename SCALARTYPE> 00162 void copy(matrix_range<matrix<SCALARTYPE, row_major, 1> > const & gpu_matrix_range, 00163 CPU_MATRIX & cpu_matrix) 00164 { 00165 assert( (cpu_matrix.size1() == gpu_matrix_range.size1()) 00166 && (cpu_matrix.size2() == gpu_matrix_range.size2()) 00167 && bool("Matrix size mismatch!")); 00168 00169 if ( gpu_matrix_range.start2() != 0) 00170 { 00171 std::vector<SCALARTYPE> entries(gpu_matrix_range.size2()); 00172 00173 //copy each stride separately: 00174 for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i) 00175 { 00176 vcl_size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.internal_size2() + gpu_matrix_range.start2(); 00177 vcl_size_t num_entries = gpu_matrix_range.size2(); 00178 viennacl::backend::memory_read(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); 00179 //std::cout << "Strided copy worked!" << std::endl; 00180 00181 for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j) 00182 cpu_matrix(i,j) = entries[j]; 00183 } 00184 } 00185 else 00186 { 00187 //full block can be copied: 00188 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.internal_size2()); 00189 00190 vcl_size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.internal_size2(); 00191 vcl_size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2(); 00192 viennacl::backend::memory_read(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); 00193 //std::cout << "Block copy worked!" << std::endl; 00194 00195 for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i) 00196 for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j) 00197 cpu_matrix(i,j) = entries[i*gpu_matrix_range.internal_size2() + j]; 00198 } 00199 00200 } 00201 00202 00203 //column_major: 00204 template <typename CPU_MATRIX, typename SCALARTYPE> 00205 void copy(matrix_range<matrix<SCALARTYPE, column_major, 1> > const & gpu_matrix_range, 00206 CPU_MATRIX & cpu_matrix) 00207 { 00208 assert( (cpu_matrix.size1() == gpu_matrix_range.size1()) 00209 && (cpu_matrix.size2() == gpu_matrix_range.size2()) 00210 && bool("Matrix size mismatch!")); 00211 00212 if ( gpu_matrix_range.start1() != 0) 00213 { 00214 std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()); 00215 00216 //copy each stride separately: 00217 for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j) 00218 { 00219 vcl_size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.internal_size1() + gpu_matrix_range.start1(); 00220 vcl_size_t num_entries = gpu_matrix_range.size1(); 00221 viennacl::backend::memory_read(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); 00222 //std::cout << "Strided copy worked!" << std::endl; 00223 00224 for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i) 00225 cpu_matrix(i,j) = entries[i]; 00226 } 00227 } 00228 else 00229 { 00230 //full block can be copied: 00231 std::vector<SCALARTYPE> entries(gpu_matrix_range.internal_size1()*gpu_matrix_range.size2()); 00232 00233 //copy each stride separately: 00234 vcl_size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.internal_size1(); 00235 vcl_size_t num_entries = gpu_matrix_range.internal_size1() * gpu_matrix_range.size2(); 00236 viennacl::backend::memory_read(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); 00237 //std::cout << "Block copy worked!" << std::endl; 00238 00239 for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i) 00240 for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j) 00241 cpu_matrix(i,j) = entries[i + j*gpu_matrix_range.internal_size1()]; 00242 } 00243 00244 } 00245 00246 00247 // 00248 // Convenience function 00249 // 00250 template <typename MatrixType> 00251 matrix_range<MatrixType> project(MatrixType & A, viennacl::range const & r1, viennacl::range const & r2) 00252 { 00253 assert(r1.size() <= A.size1() && r2.size() <= A.size2() && bool("Size of range invalid!")); 00254 00255 return matrix_range<MatrixType>(A, r1, r2); 00256 } 00257 00258 00259 template <typename MatrixType> 00260 matrix_range<MatrixType> project(matrix_range<MatrixType> & A, viennacl::range const & r1, viennacl::range const & r2) 00261 { 00262 assert(r1.size() <= A.size1() && r2.size() <= A.size2() && bool("Size of range invalid!")); 00263 00264 return matrix_range<MatrixType>(A, 00265 viennacl::range(A.start1() + r1.start(), A.start1() + r1.start() + r1.size()), 00266 viennacl::range(A.start2() + r2.start(), A.start2() + r2.start() + r2.size()) 00267 ); 00268 } 00269 00270 00271 00272 00273 // 00274 // 00275 // 00277 // 00278 // 00279 // 00280 00281 00282 00283 00284 00289 template <typename MatrixType> 00290 class matrix_slice : public matrix_base<typename MatrixType::cpu_value_type, typename MatrixType::orientation_functor> 00291 { 00292 typedef matrix_base<typename MatrixType::cpu_value_type, 00293 typename MatrixType::orientation_functor> base_type; 00294 typedef matrix_slice<MatrixType> self_type; 00295 00296 public: 00297 typedef typename MatrixType::orientation_category orientation_category; 00298 00299 typedef typename MatrixType::value_type value_type; 00300 typedef typename viennacl::result_of::cpu_value_type<value_type>::type cpu_value_type; 00301 typedef range::size_type size_type; 00302 typedef range::difference_type difference_type; 00303 typedef value_type reference; 00304 typedef const value_type & const_reference; 00305 00306 matrix_slice(MatrixType & A, 00307 slice const & row_slice, 00308 slice const & col_slice) : base_type(A.handle(), 00309 row_slice.size(), row_slice.start(), row_slice.stride(), A.internal_size1(), 00310 col_slice.size(), col_slice.start(), col_slice.stride(), A.internal_size2()) {} 00311 00312 using base_type::operator=; 00313 00314 }; 00315 00316 00317 00321 00322 //row_major: 00323 template <typename CPU_MATRIX, typename SCALARTYPE> 00324 void copy(const CPU_MATRIX & cpu_matrix, 00325 matrix_slice<matrix<SCALARTYPE, row_major, 1> > & gpu_matrix_slice ) 00326 { 00327 assert( (cpu_matrix.size1() == gpu_matrix_slice.size1()) 00328 && (cpu_matrix.size2() == gpu_matrix_slice.size2()) 00329 && bool("Matrix size mismatch!")); 00330 00331 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) ) 00332 { 00333 vcl_size_t num_entries = gpu_matrix_slice.size2() * gpu_matrix_slice.stride2(); //no. of entries per stride 00334 00335 std::vector<SCALARTYPE> entries(num_entries); 00336 00337 //copy each stride separately: 00338 for (vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i) 00339 { 00340 vcl_size_t start_offset = (gpu_matrix_slice.start1() + i * gpu_matrix_slice.stride1()) * gpu_matrix_slice.internal_size2() + gpu_matrix_slice.start2(); 00341 viennacl::backend::memory_read(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); 00342 00343 for (vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j) 00344 entries[j * gpu_matrix_slice.stride2()] = cpu_matrix(i,j); 00345 00346 viennacl::backend::memory_write(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); 00347 } 00348 } 00349 } 00350 00351 //column_major: 00352 template <typename CPU_MATRIX, typename SCALARTYPE> 00353 void copy(const CPU_MATRIX & cpu_matrix, 00354 matrix_slice<matrix<SCALARTYPE, column_major, 1> > & gpu_matrix_slice ) 00355 { 00356 assert( (cpu_matrix.size1() == gpu_matrix_slice.size1()) 00357 && (cpu_matrix.size2() == gpu_matrix_slice.size2()) 00358 && bool("Matrix size mismatch!")); 00359 00360 00361 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) ) 00362 { 00363 vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1(); //no. of entries per stride 00364 00365 std::vector<SCALARTYPE> entries(num_entries); 00366 00367 //copy each column stride separately: 00368 for (vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j) 00369 { 00370 vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1(); 00371 00372 viennacl::backend::memory_read(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); 00373 00374 for (vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i) 00375 entries[i * gpu_matrix_slice.stride1()] = cpu_matrix(i,j); 00376 00377 viennacl::backend::memory_write(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); 00378 } 00379 } 00380 00381 } 00382 00383 00387 00388 00389 //row_major: 00390 template <typename CPU_MATRIX, typename SCALARTYPE> 00391 void copy(matrix_slice<matrix<SCALARTYPE, row_major, 1> > const & gpu_matrix_slice, 00392 CPU_MATRIX & cpu_matrix) 00393 { 00394 assert( (cpu_matrix.size1() == gpu_matrix_slice.size1()) 00395 && (cpu_matrix.size2() == gpu_matrix_slice.size2()) 00396 && bool("Matrix size mismatch!")); 00397 00398 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) ) 00399 { 00400 vcl_size_t num_entries = gpu_matrix_slice.size2() * gpu_matrix_slice.stride2(); //no. of entries per stride 00401 00402 std::vector<SCALARTYPE> entries(num_entries); 00403 00404 //copy each stride separately: 00405 for (vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i) 00406 { 00407 vcl_size_t start_offset = (gpu_matrix_slice.start1() + i * gpu_matrix_slice.stride1()) * gpu_matrix_slice.internal_size2() + gpu_matrix_slice.start2(); 00408 00409 viennacl::backend::memory_read(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); 00410 00411 for (vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j) 00412 cpu_matrix(i,j) = entries[j * gpu_matrix_slice.stride2()]; 00413 } 00414 } 00415 00416 } 00417 00418 00419 //column_major: 00420 template <typename CPU_MATRIX, typename SCALARTYPE> 00421 void copy(matrix_slice<matrix<SCALARTYPE, column_major, 1> > const & gpu_matrix_slice, 00422 CPU_MATRIX & cpu_matrix) 00423 { 00424 assert( (cpu_matrix.size1() == gpu_matrix_slice.size1()) 00425 && (cpu_matrix.size2() == gpu_matrix_slice.size2()) 00426 && bool("Matrix size mismatch!")); 00427 00428 if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) ) 00429 { 00430 vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1(); //no. of entries per stride 00431 00432 std::vector<SCALARTYPE> entries(num_entries); 00433 00434 //copy each column stride separately: 00435 for (vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j) 00436 { 00437 vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1(); 00438 00439 viennacl::backend::memory_read(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0])); 00440 00441 for (vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i) 00442 cpu_matrix(i,j) = entries[i * gpu_matrix_slice.stride1()]; 00443 } 00444 } 00445 00446 } 00447 00448 00449 // 00450 // Convenience function 00451 // 00452 template <typename MatrixType> 00453 matrix_slice<MatrixType> project(MatrixType & A, viennacl::slice const & r1, viennacl::slice const & r2) 00454 { 00455 assert(r1.size() <= A.size1() && r2.size() <= A.size2() && bool("Size of slice invalid!")); 00456 00457 return matrix_slice<MatrixType>(A, r1, r2); 00458 } 00459 00460 template <typename MatrixType> 00461 matrix_slice<MatrixType> project(matrix_range<MatrixType> & A, viennacl::slice const & r1, viennacl::slice const & r2) 00462 { 00463 assert(r1.size() <= A.size1() && r2.size() <= A.size2() && bool("Size of slice invalid!")); 00464 00465 return matrix_slice<MatrixType>(A, 00466 viennacl::slice(A.start1() + r1.start(), r1.stride(), r1.size()), 00467 viennacl::slice(A.start2() + r2.start(), r2.stride(), r2.size()) 00468 ); 00469 } 00470 00471 template <typename MatrixType> 00472 matrix_slice<MatrixType> project(matrix_slice<MatrixType> & A, viennacl::slice const & r1, viennacl::slice const & r2) 00473 { 00474 assert(r1.size() <= A.size1() && r2.size() <= A.size2() && bool("Size of slice invalid!")); 00475 00476 return matrix_slice<MatrixType>(A, 00477 viennacl::slice(A.start1() + r1.start(), A.stride1() * r1.stride(), r1.size()), 00478 viennacl::slice(A.start2() + r2.start(), A.stride2() * r2.stride(), r2.size()) 00479 ); 00480 } 00481 00482 // TODO: Allow mix of range/slice 00483 00484 } 00485 00486 #endif