ViennaCL - The Vienna Computing Library  1.5.0
viennacl/backend/cuda.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_BACKEND_CUDA_HPP_
00002 #define VIENNACL_BACKEND_CUDA_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2013, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00026 #include <iostream>
00027 #include <vector>
00028 #include <cassert>
00029 #include "viennacl/tools/shared_ptr.hpp"
00030 
00031 // includes CUDA
00032 #include <cuda_runtime.h>
00033 
00034 #define VIENNACL_CUDA_ERROR_CHECK(err)  detail::cuda_error_check (err, __FILE__, __LINE__)
00035 
00036 namespace viennacl
00037 {
00038   namespace backend
00039   {
00040     namespace cuda
00041     {
00042       typedef viennacl::tools::shared_ptr<char>  handle_type;
00043       // Requirements for backend:
00044 
00045       // * memory_create(size, host_ptr)
00046       // * memory_copy(src, dest, offset_src, offset_dest, size)
00047       // * memory_write_from_main_memory(src, offset, size,
00048       //                                 dest, offset, size)
00049       // * memory_read_to_main_memory(src, offset, size
00050       //                              dest, offset, size)
00051       // *
00052       //
00053 
00054       namespace detail
00055       {
00056 
00057 
00058         inline void cuda_error_check(cudaError error_code, const char *file, const int line )
00059         {
00060           if(cudaSuccess != error_code)
00061           {
00062             std::cerr << file << "(" << line << "): " << ": CUDA Runtime API error " << error_code << ": " << cudaGetErrorString( error_code ) << std::endl;
00063             throw "CUDA error";
00064           }
00065         }
00066 
00067 
00069         template <typename U>
00070         struct cuda_deleter
00071         {
00072           void operator()(U * p) const
00073           {
00074             //std::cout << "Freeing handle " << reinterpret_cast<void *>(p) << std::endl;
00075             cudaFree(p);
00076           }
00077         };
00078 
00079       }
00080 
00087       inline handle_type  memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL)
00088       {
00089         void * dev_ptr = NULL;
00090         VIENNACL_CUDA_ERROR_CHECK( cudaMalloc(&dev_ptr, size_in_bytes) );
00091         //std::cout << "Allocated new dev_ptr " << dev_ptr << " of size " <<  size_in_bytes << std::endl;
00092 
00093         if (!host_ptr)
00094           return handle_type(reinterpret_cast<char *>(dev_ptr), detail::cuda_deleter<char>());
00095 
00096         handle_type new_handle(reinterpret_cast<char*>(dev_ptr), detail::cuda_deleter<char>());
00097 
00098         // copy data:
00099         //std::cout << "Filling new handle from host_ptr " << host_ptr << std::endl;
00100         cudaMemcpy(new_handle.get(), host_ptr, size_in_bytes, cudaMemcpyHostToDevice);
00101 
00102         return new_handle;
00103       }
00104 
00105 
00114       inline void memory_copy(handle_type const & src_buffer,
00115                               handle_type & dst_buffer,
00116                               vcl_size_t src_offset,
00117                               vcl_size_t dst_offset,
00118                               vcl_size_t bytes_to_copy)
00119       {
00120         assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
00121         assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
00122 
00123         cudaMemcpy(reinterpret_cast<void *>(dst_buffer.get() + dst_offset),
00124                    reinterpret_cast<void *>(src_buffer.get() + src_offset),
00125                    bytes_to_copy,
00126                    cudaMemcpyDeviceToDevice);
00127       }
00128 
00129 
00138       inline void memory_write(handle_type & dst_buffer,
00139                                vcl_size_t dst_offset,
00140                                vcl_size_t bytes_to_copy,
00141                                const void * ptr,
00142                                bool async = false)
00143       {
00144         assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
00145 
00146         if (async)
00147           cudaMemcpyAsync(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset,
00148                           reinterpret_cast<const char *>(ptr),
00149                           bytes_to_copy,
00150                           cudaMemcpyHostToDevice);
00151         else
00152           cudaMemcpy(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset,
00153                      reinterpret_cast<const char *>(ptr),
00154                      bytes_to_copy,
00155                      cudaMemcpyHostToDevice);
00156       }
00157 
00158 
00167       inline void memory_read(handle_type const & src_buffer,
00168                               vcl_size_t src_offset,
00169                               vcl_size_t bytes_to_copy,
00170                               void * ptr,
00171                               bool async = false)
00172       {
00173         assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
00174 
00175         if (async)
00176           cudaMemcpyAsync(reinterpret_cast<char *>(ptr),
00177                           reinterpret_cast<char *>(src_buffer.get()) + src_offset,
00178                           bytes_to_copy,
00179                           cudaMemcpyDeviceToHost);
00180         else
00181           cudaMemcpy(reinterpret_cast<char *>(ptr),
00182                      reinterpret_cast<char *>(src_buffer.get()) + src_offset,
00183                      bytes_to_copy,
00184                      cudaMemcpyDeviceToHost);
00185       }
00186 
00187     } //cuda
00188   } //backend
00189 } //viennacl
00190 #endif