ViennaCL - The Vienna Computing Library  1.5.0
viennacl/backend/memory.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_BACKEND_MEMORY_HPP
00002 #define VIENNACL_BACKEND_MEMORY_HPP
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2013, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00025 #include <vector>
00026 #include <cassert>
00027 #include "viennacl/forwards.h"
00028 #include "viennacl/backend/mem_handle.hpp"
00029 #include "viennacl/context.hpp"
00030 #include "viennacl/traits/handle.hpp"
00031 #include "viennacl/traits/context.hpp"
00032 #include "viennacl/backend/util.hpp"
00033 
00034 #include "viennacl/backend/cpu_ram.hpp"
00035 
00036 #ifdef VIENNACL_WITH_OPENCL
00037   #include "viennacl/backend/opencl.hpp"
00038   #include "viennacl/ocl/backend.hpp"
00039 #endif
00040 
00041 #ifdef VIENNACL_WITH_CUDA
00042   #include "viennacl/backend/cuda.hpp"
00043 #endif
00044 
00045 
00046 namespace viennacl
00047 {
00048   namespace backend
00049   {
00050 
00051 
00052 // if a user compiles with CUDA, it is reasonable to expect that CUDA should be the default
00054     inline void finish()
00055     {
00056 #ifdef VIENNACL_WITH_CUDA
00057       cudaDeviceSynchronize();
00058 #endif
00059 #ifdef VIENNACL_WITH_OPENCL
00060       viennacl::ocl::get_queue().finish();
00061 #endif
00062     }
00063 
00064 
00065 
00066 
00067     // Requirements for backend:
00068 
00069     // ---- Memory ----
00070     //
00071     // * memory_create(size, host_ptr)
00072     // * memory_copy(src, dest, offset_src, offset_dest, size)
00073     // * memory_write(src, offset, size, ptr)
00074     // * memory_read(src, offset, size, ptr)
00075     //
00076 
00087     inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, viennacl::context const & ctx, const void * host_ptr = NULL)
00088     {
00089       if (size_in_bytes > 0)
00090       {
00091         if (handle.get_active_handle_id() == MEMORY_NOT_INITIALIZED)
00092           handle.switch_active_handle_id(ctx.memory_type());
00093 
00094         switch(handle.get_active_handle_id())
00095         {
00096           case MAIN_MEMORY:
00097             handle.ram_handle() = cpu_ram::memory_create(size_in_bytes, host_ptr);
00098             handle.raw_size(size_in_bytes);
00099             break;
00100 #ifdef VIENNACL_WITH_OPENCL
00101           case OPENCL_MEMORY:
00102             handle.opencl_handle().context(ctx.opencl_context());
00103             handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr);
00104             handle.raw_size(size_in_bytes);
00105             break;
00106 #endif
00107 #ifdef VIENNACL_WITH_CUDA
00108           case CUDA_MEMORY:
00109             handle.cuda_handle() = cuda::memory_create(size_in_bytes, host_ptr);
00110             handle.raw_size(size_in_bytes);
00111             break;
00112 #endif
00113           case MEMORY_NOT_INITIALIZED:
00114             throw memory_exception("not initialised!");
00115           default:
00116             throw memory_exception("unknown memory handle!");
00117         }
00118       }
00119     }
00120 
00121     /*
00122     inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, const void * host_ptr = NULL)
00123     {
00124       viennacl::context  ctx(default_memory_type());
00125       memory_create(handle, size_in_bytes, ctx, host_ptr);
00126     }*/
00127 
00128 
00140     inline void memory_copy(mem_handle const & src_buffer,
00141                             mem_handle & dst_buffer,
00142                             vcl_size_t src_offset,
00143                             vcl_size_t dst_offset,
00144                             vcl_size_t bytes_to_copy)
00145     {
00146       assert( src_buffer.get_active_handle_id() == dst_buffer.get_active_handle_id() && bool("memory_copy() must be called on buffers from the same domain") );
00147 
00148       if (bytes_to_copy > 0)
00149       {
00150         switch(src_buffer.get_active_handle_id())
00151         {
00152           case MAIN_MEMORY:
00153             cpu_ram::memory_copy(src_buffer.ram_handle(), dst_buffer.ram_handle(), src_offset, dst_offset, bytes_to_copy);
00154             break;
00155 #ifdef VIENNACL_WITH_OPENCL
00156           case OPENCL_MEMORY:
00157             opencl::memory_copy(src_buffer.opencl_handle(), dst_buffer.opencl_handle(), src_offset, dst_offset, bytes_to_copy);
00158             break;
00159 #endif
00160 #ifdef VIENNACL_WITH_CUDA
00161           case CUDA_MEMORY:
00162             cuda::memory_copy(src_buffer.cuda_handle(), dst_buffer.cuda_handle(), src_offset, dst_offset, bytes_to_copy);
00163             break;
00164 #endif
00165           case MEMORY_NOT_INITIALIZED:
00166             throw memory_exception("not initialised!");
00167           default:
00168             throw memory_exception("unknown memory handle!");
00169         }
00170       }
00171     }
00172 
00173     // TODO: Refine this concept. Maybe move to constructor?
00177     inline void memory_shallow_copy(mem_handle const & src_buffer,
00178                                     mem_handle & dst_buffer)
00179     {
00180       assert( (dst_buffer.get_active_handle_id() == MEMORY_NOT_INITIALIZED) && bool("Shallow copy on already initialized memory not supported!"));
00181 
00182       switch(src_buffer.get_active_handle_id())
00183       {
00184         case MAIN_MEMORY:
00185           dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id());
00186           dst_buffer.ram_handle() = src_buffer.ram_handle();
00187           dst_buffer.raw_size(src_buffer.raw_size());
00188           break;
00189 #ifdef VIENNACL_WITH_OPENCL
00190         case OPENCL_MEMORY:
00191           dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id());
00192           dst_buffer.opencl_handle() = src_buffer.opencl_handle();
00193           dst_buffer.raw_size(src_buffer.raw_size());
00194           break;
00195 #endif
00196 #ifdef VIENNACL_WITH_CUDA
00197         case CUDA_MEMORY:
00198           dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id());
00199           dst_buffer.cuda_handle() = src_buffer.cuda_handle();
00200           dst_buffer.raw_size(src_buffer.raw_size());
00201           break;
00202 #endif
00203         case MEMORY_NOT_INITIALIZED:
00204           throw memory_exception("not initialised!");
00205         default:
00206           throw memory_exception("unknown memory handle!");
00207       }
00208     }
00209 
00220     inline void memory_write(mem_handle & dst_buffer,
00221                              vcl_size_t dst_offset,
00222                              vcl_size_t bytes_to_write,
00223                              const void * ptr,
00224                              bool async = false)
00225     {
00226       if (bytes_to_write > 0)
00227       {
00228         switch(dst_buffer.get_active_handle_id())
00229         {
00230           case MAIN_MEMORY:
00231             cpu_ram::memory_write(dst_buffer.ram_handle(), dst_offset, bytes_to_write, ptr, async);
00232             break;
00233 #ifdef VIENNACL_WITH_OPENCL
00234           case OPENCL_MEMORY:
00235             opencl::memory_write(dst_buffer.opencl_handle(), dst_offset, bytes_to_write, ptr, async);
00236             break;
00237 #endif
00238 #ifdef VIENNACL_WITH_CUDA
00239           case CUDA_MEMORY:
00240             cuda::memory_write(dst_buffer.cuda_handle(), dst_offset, bytes_to_write, ptr, async);
00241             break;
00242 #endif
00243           case MEMORY_NOT_INITIALIZED:
00244             throw memory_exception("not initialised!");
00245           default:
00246             throw memory_exception("unknown memory handle!");
00247         }
00248       }
00249     }
00250 
00261     inline void memory_read(mem_handle const & src_buffer,
00262                             vcl_size_t src_offset,
00263                             vcl_size_t bytes_to_read,
00264                             void * ptr,
00265                             bool async = false)
00266     {
00267       //finish(); //Fixes some issues with AMD APP SDK. However, might sacrifice a few percents of performance in some cases.
00268 
00269       if (bytes_to_read > 0)
00270       {
00271         switch(src_buffer.get_active_handle_id())
00272         {
00273           case MAIN_MEMORY:
00274             cpu_ram::memory_read(src_buffer.ram_handle(), src_offset, bytes_to_read, ptr, async);
00275             break;
00276 #ifdef VIENNACL_WITH_OPENCL
00277           case OPENCL_MEMORY:
00278             opencl::memory_read(src_buffer.opencl_handle(), src_offset, bytes_to_read, ptr, async);
00279             break;
00280 #endif
00281 #ifdef VIENNACL_WITH_CUDA
00282           case CUDA_MEMORY:
00283             cuda::memory_read(src_buffer.cuda_handle(), src_offset, bytes_to_read, ptr, async);
00284             break;
00285 #endif
00286           case MEMORY_NOT_INITIALIZED:
00287             throw memory_exception("not initialised!");
00288           default:
00289             throw memory_exception("unknown memory handle!");
00290         }
00291       }
00292     }
00293 
00294 
00295 
00296     namespace detail
00297     {
00298       template <typename T>
00299       vcl_size_t element_size(memory_types /* mem_type */)
00300       {
00301         return sizeof(T);
00302       }
00303 
00304 
00305       template <>
00306       inline vcl_size_t element_size<unsigned long>(memory_types
00307 #ifdef VIENNACL_WITH_OPENCL
00308                                                       mem_type  //in order to compile cleanly at -Wextra in GCC
00309 #endif
00310                                                     )
00311       {
00312 #ifdef VIENNACL_WITH_OPENCL
00313         if (mem_type == OPENCL_MEMORY)
00314           return sizeof(cl_ulong);
00315 #endif
00316         return sizeof(unsigned long);
00317       }
00318 
00319       template <>
00320       inline vcl_size_t element_size<long>(memory_types
00321 #ifdef VIENNACL_WITH_OPENCL
00322                                                       mem_type  //in order to compile cleanly at -Wextra in GCC
00323 #endif
00324                                            )
00325       {
00326 #ifdef VIENNACL_WITH_OPENCL
00327         if (mem_type == OPENCL_MEMORY)
00328           return sizeof(cl_long);
00329 #endif
00330         return sizeof(long);
00331       }
00332 
00333 
00334       template <>
00335       inline vcl_size_t element_size<unsigned int>(memory_types
00336 #ifdef VIENNACL_WITH_OPENCL
00337                                                       mem_type  //in order to compile cleanly at -Wextra in GCC
00338 #endif
00339                                                    )
00340       {
00341 #ifdef VIENNACL_WITH_OPENCL
00342         if (mem_type == OPENCL_MEMORY)
00343           return sizeof(cl_uint);
00344 #endif
00345         return sizeof(unsigned int);
00346       }
00347 
00348       template <>
00349       inline vcl_size_t element_size<int>(memory_types
00350 #ifdef VIENNACL_WITH_OPENCL
00351                                            mem_type  //in order to compile cleanly at -Wextra in GCC
00352 #endif
00353                                           )
00354       {
00355 #ifdef VIENNACL_WITH_OPENCL
00356         if (mem_type == OPENCL_MEMORY)
00357           return sizeof(cl_int);
00358 #endif
00359         return sizeof(int);
00360       }
00361 
00362 
00363     }
00364 
00365 
00367     template <typename DataType>
00368     void switch_memory_context(mem_handle & handle, viennacl::context new_ctx)
00369     {
00370       if (handle.get_active_handle_id() == new_ctx.memory_type())
00371         return;
00372 
00373       if (handle.get_active_handle_id() == viennacl::MEMORY_NOT_INITIALIZED || handle.raw_size() == 0)
00374       {
00375         handle.switch_active_handle_id(new_ctx.memory_type());
00376 #ifdef VIENNACL_WITH_OPENCL
00377         if (new_ctx.memory_type() == OPENCL_MEMORY)
00378           handle.opencl_handle().context(new_ctx.opencl_context());
00379 #endif
00380         return;
00381       }
00382 
00383       vcl_size_t size_dst = detail::element_size<DataType>(handle.get_active_handle_id());
00384       vcl_size_t size_src = detail::element_size<DataType>(new_ctx.memory_type());
00385 
00386       if (size_dst != size_src)  // OpenCL data element size not the same as host data element size
00387       {
00388         throw "Heterogeneous data element sizes not yet supported!";
00389       }
00390       else //no data conversion required
00391       {
00392         if (handle.get_active_handle_id() == MAIN_MEMORY) //we can access the existing data directly
00393         {
00394           switch (new_ctx.memory_type())
00395           {
00396 #ifdef VIENNACL_WITH_OPENCL
00397             case OPENCL_MEMORY:
00398               handle.opencl_handle().context(new_ctx.opencl_context());
00399               handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), handle.raw_size(), handle.ram_handle().get());
00400               break;
00401 #endif
00402 #ifdef VIENNACL_WITH_CUDA
00403             case CUDA_MEMORY:
00404               handle.cuda_handle() = cuda::memory_create(handle.raw_size(), handle.ram_handle().get());
00405               break;
00406 #endif
00407             case MAIN_MEMORY:
00408             default:
00409               throw "Invalid destination domain";
00410           }
00411         }
00412 #ifdef VIENNACL_WITH_OPENCL
00413         else if (handle.get_active_handle_id() == OPENCL_MEMORY) // data can be dumped into destination directly
00414         {
00415           std::vector<DataType> buffer;
00416 
00417           switch (new_ctx.memory_type())
00418           {
00419             case MAIN_MEMORY:
00420               handle.ram_handle() = cpu_ram::memory_create(handle.raw_size());
00421               opencl::memory_read(handle.opencl_handle(), 0, handle.raw_size(), handle.ram_handle().get());
00422               break;
00423   #ifdef VIENNACL_WITH_CUDA
00424             case CUDA_MEMORY:
00425               buffer.resize(handle.raw_size() / sizeof(DataType));
00426               opencl::memory_read(handle.opencl_handle(), 0, handle.raw_size(), &(buffer[0]));
00427               cuda::memory_create(handle.cuda_handle(), handle.raw_size(), &(buffer[0]));
00428               break;
00429   #endif
00430             default:
00431               throw "Invalid destination domain";
00432           }
00433         }
00434 #endif
00435 #ifdef VIENNACL_WITH_CUDA
00436         else //CUDA_MEMORY
00437         {
00438           std::vector<DataType> buffer;
00439 
00440           // write
00441           switch (new_ctx.memory_type())
00442           {
00443             case MAIN_MEMORY:
00444               handle.ram_handle() = cpu_ram::memory_create(handle.raw_size());
00445               cuda::memory_read(handle.cuda_handle(), 0, handle.raw_size(), handle.ram_handle().get());
00446               break;
00447   #ifdef VIENNACL_WITH_OPENCL
00448             case OPENCL_MEMORY:
00449               buffer.resize(handle.raw_size() / sizeof(DataType));
00450               cuda::memory_read(handle.cuda_handle(), 0, handle.raw_size(), &(buffer[0]));
00451               handle.opencl_handle() = opencl::memory_create(handle.raw_size(), &(buffer[0]));
00452               break;
00453   #endif
00454             default:
00455               throw "Unsupported source memory domain";
00456           }
00457         }
00458 #endif
00459 
00460         // everything succeeded so far, now switch to new domain:
00461         handle.switch_active_handle_id(new_ctx.memory_type());
00462 
00463       } // no data conversion
00464     }
00465 
00466 
00467 
00469     template <typename DataType>
00470     void typesafe_memory_copy(mem_handle const & handle_src, mem_handle & handle_dst)
00471     {
00472       if (handle_dst.get_active_handle_id() == MEMORY_NOT_INITIALIZED)
00473         handle_dst.switch_active_handle_id(default_memory_type());
00474 
00475       vcl_size_t element_size_src = detail::element_size<DataType>(handle_src.get_active_handle_id());
00476       vcl_size_t element_size_dst = detail::element_size<DataType>(handle_dst.get_active_handle_id());
00477 
00478       if (element_size_src != element_size_dst)
00479       {
00480         // Data needs to be converted.
00481 
00482         typesafe_host_array<DataType> buffer_src(handle_src);
00483         typesafe_host_array<DataType> buffer_dst(handle_dst, handle_src.raw_size() / element_size_src);
00484 
00485         //
00486         // Step 1: Fill buffer_dst depending on where the data resides:
00487         //
00488         DataType const * src_data;
00489         switch (handle_src.get_active_handle_id())
00490         {
00491           case MAIN_MEMORY:
00492             src_data = reinterpret_cast<DataType const *>(handle_src.ram_handle().get());
00493             for (vcl_size_t i=0; i<buffer_dst.size(); ++i)
00494               buffer_dst.set(i, src_data[i]);
00495             break;
00496 
00497 #ifdef VIENNACL_WITH_OPENCL
00498           case OPENCL_MEMORY:
00499             buffer_src.resize(handle_src, handle_src.raw_size() / element_size_src);
00500             opencl::memory_read(handle_src.opencl_handle(), 0, buffer_src.raw_size(), buffer_src.get());
00501             for (vcl_size_t i=0; i<buffer_dst.size(); ++i)
00502               buffer_dst.set(i, buffer_src[i]);
00503             break;
00504 #endif
00505 #ifdef VIENNACL_WITH_CUDA
00506           case CUDA_MEMORY:
00507             buffer_src.resize(handle_src, handle_src.raw_size() / element_size_src);
00508             cuda::memory_read(handle_src.cuda_handle(), 0, buffer_src.raw_size(), buffer_src.get());
00509             for (vcl_size_t i=0; i<buffer_dst.size(); ++i)
00510               buffer_dst.set(i, buffer_src[i]);
00511             break;
00512 #endif
00513 
00514           default:
00515             throw "unsupported memory domain";
00516         }
00517 
00518         //
00519         // Step 2: Write to destination
00520         //
00521         if (handle_dst.raw_size() == buffer_dst.raw_size())
00522           viennacl::backend::memory_write(handle_dst, 0, buffer_dst.raw_size(), buffer_dst.get());
00523         else
00524           viennacl::backend::memory_create(handle_dst, buffer_dst.raw_size(), viennacl::traits::context(handle_dst), buffer_dst.get());
00525 
00526       }
00527       else
00528       {
00529         // No data conversion required.
00530         typesafe_host_array<DataType> buffer(handle_src);
00531 
00532         switch (handle_src.get_active_handle_id())
00533         {
00534           case MAIN_MEMORY:
00535             switch (handle_dst.get_active_handle_id())
00536             {
00537               case MAIN_MEMORY:
00538               case OPENCL_MEMORY:
00539               case CUDA_MEMORY:
00540                 if (handle_dst.raw_size() == handle_src.raw_size())
00541                   viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), handle_src.ram_handle().get());
00542                 else
00543                   viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst), handle_src.ram_handle().get());
00544                 break;
00545 
00546               default:
00547                 throw "unsupported destination memory domain";
00548             }
00549             break;
00550 
00551           case OPENCL_MEMORY:
00552             switch (handle_dst.get_active_handle_id())
00553             {
00554               case MAIN_MEMORY:
00555                 if (handle_dst.raw_size() != handle_src.raw_size())
00556                   viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
00557                 viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), handle_dst.ram_handle().get());
00558                 break;
00559 
00560               case OPENCL_MEMORY:
00561                 if (handle_dst.raw_size() != handle_src.raw_size())
00562                   viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
00563                 viennacl::backend::memory_copy(handle_src, handle_dst, 0, 0, handle_src.raw_size());
00564                 break;
00565 
00566               case CUDA_MEMORY:
00567                 if (handle_dst.raw_size() != handle_src.raw_size())
00568                   viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
00569                 buffer.resize(handle_src, handle_src.raw_size() / element_size_src);
00570                 viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), buffer.get());
00571                 viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), buffer.get());
00572                 break;
00573 
00574               default:
00575                 throw "unsupported destination memory domain";
00576             }
00577             break;
00578 
00579           case CUDA_MEMORY:
00580             switch (handle_dst.get_active_handle_id())
00581             {
00582               case MAIN_MEMORY:
00583                 if (handle_dst.raw_size() != handle_src.raw_size())
00584                   viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
00585                 viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), handle_dst.ram_handle().get());
00586                 break;
00587 
00588               case OPENCL_MEMORY:
00589                 if (handle_dst.raw_size() != handle_src.raw_size())
00590                   viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
00591                 buffer.resize(handle_src, handle_src.raw_size() / element_size_src);
00592                 viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), buffer.get());
00593                 viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), buffer.get());
00594                 break;
00595 
00596               case CUDA_MEMORY:
00597                 if (handle_dst.raw_size() != handle_src.raw_size())
00598                   viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst));
00599                 viennacl::backend::memory_copy(handle_src, handle_dst, 0, 0, handle_src.raw_size());
00600                 break;
00601 
00602               default:
00603                 throw "unsupported destination memory domain";
00604             }
00605             break;
00606 
00607           default:
00608             throw "unsupported source memory domain";
00609         }
00610 
00611       }
00612     }
00613 
00614 
00615   } //backend
00616 
00617 
00618   //
00619   // Convenience layer:
00620   //
00621 
00623   template <typename T>
00624   void switch_memory_context(T & obj, viennacl::context new_ctx)
00625   {
00626     obj.switch_memory_context(new_ctx);
00627   }
00628 
00629 } //viennacl
00630 #endif