ViennaCL - The Vienna Computing Library  1.5.0
viennacl/linalg/mixed_precision_cg.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
00002 #define VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2013, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00025 #include <vector>
00026 #include <map>
00027 #include <cmath>
00028 #include "viennacl/forwards.h"
00029 #include "viennacl/tools/tools.hpp"
00030 #include "viennacl/linalg/ilu.hpp"
00031 #include "viennacl/linalg/prod.hpp"
00032 #include "viennacl/linalg/inner_prod.hpp"
00033 #include "viennacl/traits/clear.hpp"
00034 #include "viennacl/traits/size.hpp"
00035 #include "viennacl/meta/result_of.hpp"
00036 #include "viennacl/ocl/backend.hpp"
00037 #include "viennacl/ocl/kernel.hpp"
00038 #include "viennacl/backend/memory.hpp"
00039 
00040 #include "viennacl/vector_proxy.hpp"
00041 
00042 namespace viennacl
00043 {
00044   namespace linalg
00045   {
00046 
00049     class mixed_precision_cg_tag
00050     {
00051       public:
00058         mixed_precision_cg_tag(double tol = 1e-8, unsigned int max_iterations = 300, float inner_tol = 1e-2f) : tol_(tol), iterations_(max_iterations), inner_tol_(inner_tol) {}
00059 
00061         double tolerance() const { return tol_; }
00063         float inner_tolerance() const { return inner_tol_; }
00065         unsigned int max_iterations() const { return iterations_; }
00066 
00068         unsigned int iters() const { return iters_taken_; }
00069         void iters(unsigned int i) const { iters_taken_ = i; }
00070 
00072         double error() const { return last_error_; }
00074         void error(double e) const { last_error_ = e; }
00075 
00076 
00077       private:
00078         double tol_;
00079         unsigned int iterations_;
00080         float inner_tol_;
00081 
00082         //return values from solver
00083         mutable unsigned int iters_taken_;
00084         mutable double last_error_;
00085     };
00086 
00087 
00088     const char * double_float_conversion_program =
00089     "#pragma OPENCL EXTENSION cl_khr_fp64 : enable \n"
00090     "__kernel void assign_double_to_float(\n"
00091     "          __global float * vec1,\n"
00092     "          __global const double * vec2, \n"
00093     "          unsigned int size) \n"
00094     "{ \n"
00095     "  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
00096     "    vec1[i] = (float)(vec2[i]);\n"
00097     "};\n\n"
00098     "__kernel void inplace_add_float_to_double(\n"
00099     "          __global double * vec1,\n"
00100     "          __global const float * vec2, \n"
00101     "          unsigned int size) \n"
00102     "{ \n"
00103     "  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
00104     "    vec1[i] += (double)(vec2[i]);\n"
00105     "};\n";
00106 
00107 
00117     template <typename MatrixType, typename VectorType>
00118     VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag)
00119     {
00120       //typedef typename VectorType::value_type      ScalarType;
00121       typedef typename viennacl::result_of::value_type<VectorType>::type        ScalarType;
00122       typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type    CPU_ScalarType;
00123 
00124       //TODO: Assert CPU_ScalarType == double
00125 
00126       //std::cout << "Starting CG" << std::endl;
00127       vcl_size_t problem_size = viennacl::traits::size(rhs);
00128       VectorType result(rhs);
00129       viennacl::traits::clear(result);
00130 
00131       VectorType residual = rhs;
00132 
00133       CPU_ScalarType ip_rr = viennacl::linalg::inner_prod(rhs, rhs);
00134       CPU_ScalarType new_ip_rr = 0;
00135       CPU_ScalarType norm_rhs_squared = ip_rr;
00136 
00137       if (norm_rhs_squared == 0) //solution is zero if RHS norm is zero
00138         return result;
00139 
00140       static bool first = true;
00141 
00142       viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context());
00143       if (first)
00144       {
00145         ctx.add_program(double_float_conversion_program, "double_float_conversion_program");
00146       }
00147 
00148       viennacl::vector<float> residual_low_precision(problem_size, viennacl::traits::context(rhs));
00149       viennacl::vector<float> result_low_precision(problem_size, viennacl::traits::context(rhs));
00150       viennacl::vector<float> p_low_precision(problem_size, viennacl::traits::context(rhs));
00151       viennacl::vector<float> tmp_low_precision(problem_size, viennacl::traits::context(rhs));
00152       float inner_ip_rr = static_cast<float>(ip_rr);
00153       float new_inner_ip_rr = 0;
00154       float initial_inner_rhs_norm_squared = static_cast<float>(ip_rr);
00155       float alpha;
00156       float beta;
00157 
00158       viennacl::ocl::kernel & assign_double_to_float      = ctx.get_kernel("double_float_conversion_program", "assign_double_to_float");
00159       viennacl::ocl::kernel & inplace_add_float_to_double = ctx.get_kernel("double_float_conversion_program", "inplace_add_float_to_double");
00160 
00161       // transfer rhs to single precision:
00162       viennacl::ocl::enqueue( assign_double_to_float(p_low_precision.handle().opencl_handle(),
00163                                                      rhs.handle().opencl_handle(),
00164                                                      cl_uint(rhs.size())
00165                                                     ) );
00166       //std::cout << "copying p_low_precision..." << std::endl;
00167       //assign_double_to_float(p_low_precision.handle(), residual.handle(), residual.size());
00168       residual_low_precision = p_low_precision;
00169 
00170       // transfer matrix to single precision:
00171       viennacl::compressed_matrix<float> matrix_low_precision(matrix.size1(), matrix.size2(), matrix.nnz(), viennacl::traits::context(rhs));
00172       viennacl::backend::memory_copy(matrix.handle1(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle1()), 0, 0, sizeof(cl_uint) * (matrix.size1() + 1) );
00173       viennacl::backend::memory_copy(matrix.handle2(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle2()), 0, 0, sizeof(cl_uint) * (matrix.nnz()) );
00174 
00175       viennacl::ocl::enqueue( assign_double_to_float(matrix_low_precision.handle().opencl_handle(),
00176                                                      matrix.handle().opencl_handle(),
00177                                                      cl_uint(matrix.nnz())
00178                                                     ) );
00179       //std::cout << "copying matrix_low_precision..." << std::endl;
00180       //assign_double_to_float(const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle()), matrix.handle(), matrix.nnz());
00181 
00182       //std::cout << "Starting CG solver iterations... " << std::endl;
00183 
00184 
00185       for (unsigned int i = 0; i < tag.max_iterations(); ++i)
00186       {
00187         tag.iters(i+1);
00188 
00189         // lower precision 'inner iteration'
00190         tmp_low_precision = viennacl::linalg::prod(matrix_low_precision, p_low_precision);
00191 
00192         alpha = inner_ip_rr / viennacl::linalg::inner_prod(tmp_low_precision, p_low_precision);
00193         result_low_precision += alpha * p_low_precision;
00194         residual_low_precision -= alpha * tmp_low_precision;
00195 
00196         new_inner_ip_rr = viennacl::linalg::inner_prod(residual_low_precision, residual_low_precision);
00197 
00198         beta = new_inner_ip_rr / inner_ip_rr;
00199         inner_ip_rr = new_inner_ip_rr;
00200 
00201         p_low_precision = residual_low_precision + beta * p_low_precision;
00202 
00203 
00204 
00205         if (new_inner_ip_rr < tag.inner_tolerance() * initial_inner_rhs_norm_squared || i == tag.max_iterations()-1)
00206         {
00207           //std::cout << "outer correction at i=" << i << std::endl;
00208           //result += result_low_precision;
00209           viennacl::ocl::enqueue( inplace_add_float_to_double(result.handle().opencl_handle(),
00210                                                               result_low_precision.handle().opencl_handle(),
00211                                                               cl_uint(result.size())
00212                                                              ) );
00213 
00214           // residual = b - Ax  (without introducing a temporary)
00215           residual = viennacl::linalg::prod(matrix, result);
00216           residual = rhs - residual;
00217 
00218           new_ip_rr = viennacl::linalg::inner_prod(residual, residual);
00219           if (new_ip_rr / norm_rhs_squared < tag.tolerance() *  tag.tolerance())//squared norms involved here
00220             break;
00221 
00222           // p_low_precision = residual;
00223           viennacl::ocl::enqueue( assign_double_to_float(p_low_precision.handle().opencl_handle(),
00224                                                          residual.handle().opencl_handle(),
00225                                                          cl_uint(residual.size())
00226                                                         ) );
00227           result_low_precision.clear();
00228           residual_low_precision = p_low_precision;
00229           initial_inner_rhs_norm_squared = static_cast<float>(new_ip_rr);
00230           inner_ip_rr = static_cast<float>(new_ip_rr);
00231         }
00232       }
00233 
00234       //store last error estimate:
00235       tag.error(std::sqrt(new_ip_rr / norm_rhs_squared));
00236 
00237       return result;
00238     }
00239 
00240     template <typename MatrixType, typename VectorType>
00241     VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag, viennacl::linalg::no_precond)
00242     {
00243       return solve(matrix, rhs, tag);
00244     }
00245 
00246 
00247   }
00248 }
00249 
00250 #endif