ViennaCL - The Vienna Computing Library
1.5.0
|
00001 #ifndef VIENNACL_BACKEND_MEMORY_HPP 00002 #define VIENNACL_BACKEND_MEMORY_HPP 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2013, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include <vector> 00026 #include <cassert> 00027 #include "viennacl/forwards.h" 00028 #include "viennacl/backend/mem_handle.hpp" 00029 #include "viennacl/context.hpp" 00030 #include "viennacl/traits/handle.hpp" 00031 #include "viennacl/traits/context.hpp" 00032 #include "viennacl/backend/util.hpp" 00033 00034 #include "viennacl/backend/cpu_ram.hpp" 00035 00036 #ifdef VIENNACL_WITH_OPENCL 00037 #include "viennacl/backend/opencl.hpp" 00038 #include "viennacl/ocl/backend.hpp" 00039 #endif 00040 00041 #ifdef VIENNACL_WITH_CUDA 00042 #include "viennacl/backend/cuda.hpp" 00043 #endif 00044 00045 00046 namespace viennacl 00047 { 00048 namespace backend 00049 { 00050 00051 00052 // if a user compiles with CUDA, it is reasonable to expect that CUDA should be the default 00054 inline void finish() 00055 { 00056 #ifdef VIENNACL_WITH_CUDA 00057 cudaDeviceSynchronize(); 00058 #endif 00059 #ifdef VIENNACL_WITH_OPENCL 00060 viennacl::ocl::get_queue().finish(); 00061 #endif 00062 } 00063 00064 00065 00066 00067 // Requirements for backend: 00068 00069 // ---- Memory ---- 00070 // 00071 // * memory_create(size, host_ptr) 00072 // * memory_copy(src, dest, offset_src, offset_dest, size) 00073 // * memory_write(src, offset, size, ptr) 00074 // * memory_read(src, offset, size, ptr) 00075 // 00076 00087 inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, viennacl::context const & ctx, const void * host_ptr = NULL) 00088 { 00089 if (size_in_bytes > 0) 00090 { 00091 if (handle.get_active_handle_id() == MEMORY_NOT_INITIALIZED) 00092 handle.switch_active_handle_id(ctx.memory_type()); 00093 00094 switch(handle.get_active_handle_id()) 00095 { 00096 case MAIN_MEMORY: 00097 handle.ram_handle() = cpu_ram::memory_create(size_in_bytes, host_ptr); 00098 handle.raw_size(size_in_bytes); 00099 break; 00100 #ifdef VIENNACL_WITH_OPENCL 00101 case OPENCL_MEMORY: 00102 handle.opencl_handle().context(ctx.opencl_context()); 00103 handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr); 00104 handle.raw_size(size_in_bytes); 00105 break; 00106 #endif 00107 #ifdef VIENNACL_WITH_CUDA 00108 case CUDA_MEMORY: 00109 handle.cuda_handle() = cuda::memory_create(size_in_bytes, host_ptr); 00110 handle.raw_size(size_in_bytes); 00111 break; 00112 #endif 00113 case MEMORY_NOT_INITIALIZED: 00114 throw memory_exception("not initialised!"); 00115 default: 00116 throw memory_exception("unknown memory handle!"); 00117 } 00118 } 00119 } 00120 00121 /* 00122 inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, const void * host_ptr = NULL) 00123 { 00124 viennacl::context ctx(default_memory_type()); 00125 memory_create(handle, size_in_bytes, ctx, host_ptr); 00126 }*/ 00127 00128 00140 inline void memory_copy(mem_handle const & src_buffer, 00141 mem_handle & dst_buffer, 00142 vcl_size_t src_offset, 00143 vcl_size_t dst_offset, 00144 vcl_size_t bytes_to_copy) 00145 { 00146 assert( src_buffer.get_active_handle_id() == dst_buffer.get_active_handle_id() && bool("memory_copy() must be called on buffers from the same domain") ); 00147 00148 if (bytes_to_copy > 0) 00149 { 00150 switch(src_buffer.get_active_handle_id()) 00151 { 00152 case MAIN_MEMORY: 00153 cpu_ram::memory_copy(src_buffer.ram_handle(), dst_buffer.ram_handle(), src_offset, dst_offset, bytes_to_copy); 00154 break; 00155 #ifdef VIENNACL_WITH_OPENCL 00156 case OPENCL_MEMORY: 00157 opencl::memory_copy(src_buffer.opencl_handle(), dst_buffer.opencl_handle(), src_offset, dst_offset, bytes_to_copy); 00158 break; 00159 #endif 00160 #ifdef VIENNACL_WITH_CUDA 00161 case CUDA_MEMORY: 00162 cuda::memory_copy(src_buffer.cuda_handle(), dst_buffer.cuda_handle(), src_offset, dst_offset, bytes_to_copy); 00163 break; 00164 #endif 00165 case MEMORY_NOT_INITIALIZED: 00166 throw memory_exception("not initialised!"); 00167 default: 00168 throw memory_exception("unknown memory handle!"); 00169 } 00170 } 00171 } 00172 00173 // TODO: Refine this concept. Maybe move to constructor? 00177 inline void memory_shallow_copy(mem_handle const & src_buffer, 00178 mem_handle & dst_buffer) 00179 { 00180 assert( (dst_buffer.get_active_handle_id() == MEMORY_NOT_INITIALIZED) && bool("Shallow copy on already initialized memory not supported!")); 00181 00182 switch(src_buffer.get_active_handle_id()) 00183 { 00184 case MAIN_MEMORY: 00185 dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id()); 00186 dst_buffer.ram_handle() = src_buffer.ram_handle(); 00187 dst_buffer.raw_size(src_buffer.raw_size()); 00188 break; 00189 #ifdef VIENNACL_WITH_OPENCL 00190 case OPENCL_MEMORY: 00191 dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id()); 00192 dst_buffer.opencl_handle() = src_buffer.opencl_handle(); 00193 dst_buffer.raw_size(src_buffer.raw_size()); 00194 break; 00195 #endif 00196 #ifdef VIENNACL_WITH_CUDA 00197 case CUDA_MEMORY: 00198 dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id()); 00199 dst_buffer.cuda_handle() = src_buffer.cuda_handle(); 00200 dst_buffer.raw_size(src_buffer.raw_size()); 00201 break; 00202 #endif 00203 case MEMORY_NOT_INITIALIZED: 00204 throw memory_exception("not initialised!"); 00205 default: 00206 throw memory_exception("unknown memory handle!"); 00207 } 00208 } 00209 00220 inline void memory_write(mem_handle & dst_buffer, 00221 vcl_size_t dst_offset, 00222 vcl_size_t bytes_to_write, 00223 const void * ptr, 00224 bool async = false) 00225 { 00226 if (bytes_to_write > 0) 00227 { 00228 switch(dst_buffer.get_active_handle_id()) 00229 { 00230 case MAIN_MEMORY: 00231 cpu_ram::memory_write(dst_buffer.ram_handle(), dst_offset, bytes_to_write, ptr, async); 00232 break; 00233 #ifdef VIENNACL_WITH_OPENCL 00234 case OPENCL_MEMORY: 00235 opencl::memory_write(dst_buffer.opencl_handle(), dst_offset, bytes_to_write, ptr, async); 00236 break; 00237 #endif 00238 #ifdef VIENNACL_WITH_CUDA 00239 case CUDA_MEMORY: 00240 cuda::memory_write(dst_buffer.cuda_handle(), dst_offset, bytes_to_write, ptr, async); 00241 break; 00242 #endif 00243 case MEMORY_NOT_INITIALIZED: 00244 throw memory_exception("not initialised!"); 00245 default: 00246 throw memory_exception("unknown memory handle!"); 00247 } 00248 } 00249 } 00250 00261 inline void memory_read(mem_handle const & src_buffer, 00262 vcl_size_t src_offset, 00263 vcl_size_t bytes_to_read, 00264 void * ptr, 00265 bool async = false) 00266 { 00267 //finish(); //Fixes some issues with AMD APP SDK. However, might sacrifice a few percents of performance in some cases. 00268 00269 if (bytes_to_read > 0) 00270 { 00271 switch(src_buffer.get_active_handle_id()) 00272 { 00273 case MAIN_MEMORY: 00274 cpu_ram::memory_read(src_buffer.ram_handle(), src_offset, bytes_to_read, ptr, async); 00275 break; 00276 #ifdef VIENNACL_WITH_OPENCL 00277 case OPENCL_MEMORY: 00278 opencl::memory_read(src_buffer.opencl_handle(), src_offset, bytes_to_read, ptr, async); 00279 break; 00280 #endif 00281 #ifdef VIENNACL_WITH_CUDA 00282 case CUDA_MEMORY: 00283 cuda::memory_read(src_buffer.cuda_handle(), src_offset, bytes_to_read, ptr, async); 00284 break; 00285 #endif 00286 case MEMORY_NOT_INITIALIZED: 00287 throw memory_exception("not initialised!"); 00288 default: 00289 throw memory_exception("unknown memory handle!"); 00290 } 00291 } 00292 } 00293 00294 00295 00296 namespace detail 00297 { 00298 template <typename T> 00299 vcl_size_t element_size(memory_types /* mem_type */) 00300 { 00301 return sizeof(T); 00302 } 00303 00304 00305 template <> 00306 inline vcl_size_t element_size<unsigned long>(memory_types 00307 #ifdef VIENNACL_WITH_OPENCL 00308 mem_type //in order to compile cleanly at -Wextra in GCC 00309 #endif 00310 ) 00311 { 00312 #ifdef VIENNACL_WITH_OPENCL 00313 if (mem_type == OPENCL_MEMORY) 00314 return sizeof(cl_ulong); 00315 #endif 00316 return sizeof(unsigned long); 00317 } 00318 00319 template <> 00320 inline vcl_size_t element_size<long>(memory_types 00321 #ifdef VIENNACL_WITH_OPENCL 00322 mem_type //in order to compile cleanly at -Wextra in GCC 00323 #endif 00324 ) 00325 { 00326 #ifdef VIENNACL_WITH_OPENCL 00327 if (mem_type == OPENCL_MEMORY) 00328 return sizeof(cl_long); 00329 #endif 00330 return sizeof(long); 00331 } 00332 00333 00334 template <> 00335 inline vcl_size_t element_size<unsigned int>(memory_types 00336 #ifdef VIENNACL_WITH_OPENCL 00337 mem_type //in order to compile cleanly at -Wextra in GCC 00338 #endif 00339 ) 00340 { 00341 #ifdef VIENNACL_WITH_OPENCL 00342 if (mem_type == OPENCL_MEMORY) 00343 return sizeof(cl_uint); 00344 #endif 00345 return sizeof(unsigned int); 00346 } 00347 00348 template <> 00349 inline vcl_size_t element_size<int>(memory_types 00350 #ifdef VIENNACL_WITH_OPENCL 00351 mem_type //in order to compile cleanly at -Wextra in GCC 00352 #endif 00353 ) 00354 { 00355 #ifdef VIENNACL_WITH_OPENCL 00356 if (mem_type == OPENCL_MEMORY) 00357 return sizeof(cl_int); 00358 #endif 00359 return sizeof(int); 00360 } 00361 00362 00363 } 00364 00365 00367 template <typename DataType> 00368 void switch_memory_context(mem_handle & handle, viennacl::context new_ctx) 00369 { 00370 if (handle.get_active_handle_id() == new_ctx.memory_type()) 00371 return; 00372 00373 if (handle.get_active_handle_id() == viennacl::MEMORY_NOT_INITIALIZED || handle.raw_size() == 0) 00374 { 00375 handle.switch_active_handle_id(new_ctx.memory_type()); 00376 #ifdef VIENNACL_WITH_OPENCL 00377 if (new_ctx.memory_type() == OPENCL_MEMORY) 00378 handle.opencl_handle().context(new_ctx.opencl_context()); 00379 #endif 00380 return; 00381 } 00382 00383 vcl_size_t size_dst = detail::element_size<DataType>(handle.get_active_handle_id()); 00384 vcl_size_t size_src = detail::element_size<DataType>(new_ctx.memory_type()); 00385 00386 if (size_dst != size_src) // OpenCL data element size not the same as host data element size 00387 { 00388 throw "Heterogeneous data element sizes not yet supported!"; 00389 } 00390 else //no data conversion required 00391 { 00392 if (handle.get_active_handle_id() == MAIN_MEMORY) //we can access the existing data directly 00393 { 00394 switch (new_ctx.memory_type()) 00395 { 00396 #ifdef VIENNACL_WITH_OPENCL 00397 case OPENCL_MEMORY: 00398 handle.opencl_handle().context(new_ctx.opencl_context()); 00399 handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), handle.raw_size(), handle.ram_handle().get()); 00400 break; 00401 #endif 00402 #ifdef VIENNACL_WITH_CUDA 00403 case CUDA_MEMORY: 00404 handle.cuda_handle() = cuda::memory_create(handle.raw_size(), handle.ram_handle().get()); 00405 break; 00406 #endif 00407 case MAIN_MEMORY: 00408 default: 00409 throw "Invalid destination domain"; 00410 } 00411 } 00412 #ifdef VIENNACL_WITH_OPENCL 00413 else if (handle.get_active_handle_id() == OPENCL_MEMORY) // data can be dumped into destination directly 00414 { 00415 std::vector<DataType> buffer; 00416 00417 switch (new_ctx.memory_type()) 00418 { 00419 case MAIN_MEMORY: 00420 handle.ram_handle() = cpu_ram::memory_create(handle.raw_size()); 00421 opencl::memory_read(handle.opencl_handle(), 0, handle.raw_size(), handle.ram_handle().get()); 00422 break; 00423 #ifdef VIENNACL_WITH_CUDA 00424 case CUDA_MEMORY: 00425 buffer.resize(handle.raw_size() / sizeof(DataType)); 00426 opencl::memory_read(handle.opencl_handle(), 0, handle.raw_size(), &(buffer[0])); 00427 cuda::memory_create(handle.cuda_handle(), handle.raw_size(), &(buffer[0])); 00428 break; 00429 #endif 00430 default: 00431 throw "Invalid destination domain"; 00432 } 00433 } 00434 #endif 00435 #ifdef VIENNACL_WITH_CUDA 00436 else //CUDA_MEMORY 00437 { 00438 std::vector<DataType> buffer; 00439 00440 // write 00441 switch (new_ctx.memory_type()) 00442 { 00443 case MAIN_MEMORY: 00444 handle.ram_handle() = cpu_ram::memory_create(handle.raw_size()); 00445 cuda::memory_read(handle.cuda_handle(), 0, handle.raw_size(), handle.ram_handle().get()); 00446 break; 00447 #ifdef VIENNACL_WITH_OPENCL 00448 case OPENCL_MEMORY: 00449 buffer.resize(handle.raw_size() / sizeof(DataType)); 00450 cuda::memory_read(handle.cuda_handle(), 0, handle.raw_size(), &(buffer[0])); 00451 handle.opencl_handle() = opencl::memory_create(handle.raw_size(), &(buffer[0])); 00452 break; 00453 #endif 00454 default: 00455 throw "Unsupported source memory domain"; 00456 } 00457 } 00458 #endif 00459 00460 // everything succeeded so far, now switch to new domain: 00461 handle.switch_active_handle_id(new_ctx.memory_type()); 00462 00463 } // no data conversion 00464 } 00465 00466 00467 00469 template <typename DataType> 00470 void typesafe_memory_copy(mem_handle const & handle_src, mem_handle & handle_dst) 00471 { 00472 if (handle_dst.get_active_handle_id() == MEMORY_NOT_INITIALIZED) 00473 handle_dst.switch_active_handle_id(default_memory_type()); 00474 00475 vcl_size_t element_size_src = detail::element_size<DataType>(handle_src.get_active_handle_id()); 00476 vcl_size_t element_size_dst = detail::element_size<DataType>(handle_dst.get_active_handle_id()); 00477 00478 if (element_size_src != element_size_dst) 00479 { 00480 // Data needs to be converted. 00481 00482 typesafe_host_array<DataType> buffer_src(handle_src); 00483 typesafe_host_array<DataType> buffer_dst(handle_dst, handle_src.raw_size() / element_size_src); 00484 00485 // 00486 // Step 1: Fill buffer_dst depending on where the data resides: 00487 // 00488 DataType const * src_data; 00489 switch (handle_src.get_active_handle_id()) 00490 { 00491 case MAIN_MEMORY: 00492 src_data = reinterpret_cast<DataType const *>(handle_src.ram_handle().get()); 00493 for (vcl_size_t i=0; i<buffer_dst.size(); ++i) 00494 buffer_dst.set(i, src_data[i]); 00495 break; 00496 00497 #ifdef VIENNACL_WITH_OPENCL 00498 case OPENCL_MEMORY: 00499 buffer_src.resize(handle_src, handle_src.raw_size() / element_size_src); 00500 opencl::memory_read(handle_src.opencl_handle(), 0, buffer_src.raw_size(), buffer_src.get()); 00501 for (vcl_size_t i=0; i<buffer_dst.size(); ++i) 00502 buffer_dst.set(i, buffer_src[i]); 00503 break; 00504 #endif 00505 #ifdef VIENNACL_WITH_CUDA 00506 case CUDA_MEMORY: 00507 buffer_src.resize(handle_src, handle_src.raw_size() / element_size_src); 00508 cuda::memory_read(handle_src.cuda_handle(), 0, buffer_src.raw_size(), buffer_src.get()); 00509 for (vcl_size_t i=0; i<buffer_dst.size(); ++i) 00510 buffer_dst.set(i, buffer_src[i]); 00511 break; 00512 #endif 00513 00514 default: 00515 throw "unsupported memory domain"; 00516 } 00517 00518 // 00519 // Step 2: Write to destination 00520 // 00521 if (handle_dst.raw_size() == buffer_dst.raw_size()) 00522 viennacl::backend::memory_write(handle_dst, 0, buffer_dst.raw_size(), buffer_dst.get()); 00523 else 00524 viennacl::backend::memory_create(handle_dst, buffer_dst.raw_size(), viennacl::traits::context(handle_dst), buffer_dst.get()); 00525 00526 } 00527 else 00528 { 00529 // No data conversion required. 00530 typesafe_host_array<DataType> buffer(handle_src); 00531 00532 switch (handle_src.get_active_handle_id()) 00533 { 00534 case MAIN_MEMORY: 00535 switch (handle_dst.get_active_handle_id()) 00536 { 00537 case MAIN_MEMORY: 00538 case OPENCL_MEMORY: 00539 case CUDA_MEMORY: 00540 if (handle_dst.raw_size() == handle_src.raw_size()) 00541 viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), handle_src.ram_handle().get()); 00542 else 00543 viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst), handle_src.ram_handle().get()); 00544 break; 00545 00546 default: 00547 throw "unsupported destination memory domain"; 00548 } 00549 break; 00550 00551 case OPENCL_MEMORY: 00552 switch (handle_dst.get_active_handle_id()) 00553 { 00554 case MAIN_MEMORY: 00555 if (handle_dst.raw_size() != handle_src.raw_size()) 00556 viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); 00557 viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), handle_dst.ram_handle().get()); 00558 break; 00559 00560 case OPENCL_MEMORY: 00561 if (handle_dst.raw_size() != handle_src.raw_size()) 00562 viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); 00563 viennacl::backend::memory_copy(handle_src, handle_dst, 0, 0, handle_src.raw_size()); 00564 break; 00565 00566 case CUDA_MEMORY: 00567 if (handle_dst.raw_size() != handle_src.raw_size()) 00568 viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); 00569 buffer.resize(handle_src, handle_src.raw_size() / element_size_src); 00570 viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), buffer.get()); 00571 viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), buffer.get()); 00572 break; 00573 00574 default: 00575 throw "unsupported destination memory domain"; 00576 } 00577 break; 00578 00579 case CUDA_MEMORY: 00580 switch (handle_dst.get_active_handle_id()) 00581 { 00582 case MAIN_MEMORY: 00583 if (handle_dst.raw_size() != handle_src.raw_size()) 00584 viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); 00585 viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), handle_dst.ram_handle().get()); 00586 break; 00587 00588 case OPENCL_MEMORY: 00589 if (handle_dst.raw_size() != handle_src.raw_size()) 00590 viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); 00591 buffer.resize(handle_src, handle_src.raw_size() / element_size_src); 00592 viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), buffer.get()); 00593 viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), buffer.get()); 00594 break; 00595 00596 case CUDA_MEMORY: 00597 if (handle_dst.raw_size() != handle_src.raw_size()) 00598 viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); 00599 viennacl::backend::memory_copy(handle_src, handle_dst, 0, 0, handle_src.raw_size()); 00600 break; 00601 00602 default: 00603 throw "unsupported destination memory domain"; 00604 } 00605 break; 00606 00607 default: 00608 throw "unsupported source memory domain"; 00609 } 00610 00611 } 00612 } 00613 00614 00615 } //backend 00616 00617 00618 // 00619 // Convenience layer: 00620 // 00621 00623 template <typename T> 00624 void switch_memory_context(T & obj, viennacl::context new_ctx) 00625 { 00626 obj.switch_memory_context(new_ctx); 00627 } 00628 00629 } //viennacl 00630 #endif