ViennaCL - The Vienna Computing Library
1.5.0
|
00001 #ifndef VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_ 00002 #define VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2013, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include <vector> 00026 #include <map> 00027 #include <cmath> 00028 #include "viennacl/forwards.h" 00029 #include "viennacl/tools/tools.hpp" 00030 #include "viennacl/linalg/ilu.hpp" 00031 #include "viennacl/linalg/prod.hpp" 00032 #include "viennacl/linalg/inner_prod.hpp" 00033 #include "viennacl/traits/clear.hpp" 00034 #include "viennacl/traits/size.hpp" 00035 #include "viennacl/meta/result_of.hpp" 00036 #include "viennacl/ocl/backend.hpp" 00037 #include "viennacl/ocl/kernel.hpp" 00038 #include "viennacl/backend/memory.hpp" 00039 00040 #include "viennacl/vector_proxy.hpp" 00041 00042 namespace viennacl 00043 { 00044 namespace linalg 00045 { 00046 00049 class mixed_precision_cg_tag 00050 { 00051 public: 00058 mixed_precision_cg_tag(double tol = 1e-8, unsigned int max_iterations = 300, float inner_tol = 1e-2f) : tol_(tol), iterations_(max_iterations), inner_tol_(inner_tol) {} 00059 00061 double tolerance() const { return tol_; } 00063 float inner_tolerance() const { return inner_tol_; } 00065 unsigned int max_iterations() const { return iterations_; } 00066 00068 unsigned int iters() const { return iters_taken_; } 00069 void iters(unsigned int i) const { iters_taken_ = i; } 00070 00072 double error() const { return last_error_; } 00074 void error(double e) const { last_error_ = e; } 00075 00076 00077 private: 00078 double tol_; 00079 unsigned int iterations_; 00080 float inner_tol_; 00081 00082 //return values from solver 00083 mutable unsigned int iters_taken_; 00084 mutable double last_error_; 00085 }; 00086 00087 00088 const char * double_float_conversion_program = 00089 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable \n" 00090 "__kernel void assign_double_to_float(\n" 00091 " __global float * vec1,\n" 00092 " __global const double * vec2, \n" 00093 " unsigned int size) \n" 00094 "{ \n" 00095 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00096 " vec1[i] = (float)(vec2[i]);\n" 00097 "};\n\n" 00098 "__kernel void inplace_add_float_to_double(\n" 00099 " __global double * vec1,\n" 00100 " __global const float * vec2, \n" 00101 " unsigned int size) \n" 00102 "{ \n" 00103 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00104 " vec1[i] += (double)(vec2[i]);\n" 00105 "};\n"; 00106 00107 00117 template <typename MatrixType, typename VectorType> 00118 VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag) 00119 { 00120 //typedef typename VectorType::value_type ScalarType; 00121 typedef typename viennacl::result_of::value_type<VectorType>::type ScalarType; 00122 typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType; 00123 00124 //TODO: Assert CPU_ScalarType == double 00125 00126 //std::cout << "Starting CG" << std::endl; 00127 vcl_size_t problem_size = viennacl::traits::size(rhs); 00128 VectorType result(rhs); 00129 viennacl::traits::clear(result); 00130 00131 VectorType residual = rhs; 00132 00133 CPU_ScalarType ip_rr = viennacl::linalg::inner_prod(rhs, rhs); 00134 CPU_ScalarType new_ip_rr = 0; 00135 CPU_ScalarType norm_rhs_squared = ip_rr; 00136 00137 if (norm_rhs_squared == 0) //solution is zero if RHS norm is zero 00138 return result; 00139 00140 static bool first = true; 00141 00142 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context()); 00143 if (first) 00144 { 00145 ctx.add_program(double_float_conversion_program, "double_float_conversion_program"); 00146 } 00147 00148 viennacl::vector<float> residual_low_precision(problem_size, viennacl::traits::context(rhs)); 00149 viennacl::vector<float> result_low_precision(problem_size, viennacl::traits::context(rhs)); 00150 viennacl::vector<float> p_low_precision(problem_size, viennacl::traits::context(rhs)); 00151 viennacl::vector<float> tmp_low_precision(problem_size, viennacl::traits::context(rhs)); 00152 float inner_ip_rr = static_cast<float>(ip_rr); 00153 float new_inner_ip_rr = 0; 00154 float initial_inner_rhs_norm_squared = static_cast<float>(ip_rr); 00155 float alpha; 00156 float beta; 00157 00158 viennacl::ocl::kernel & assign_double_to_float = ctx.get_kernel("double_float_conversion_program", "assign_double_to_float"); 00159 viennacl::ocl::kernel & inplace_add_float_to_double = ctx.get_kernel("double_float_conversion_program", "inplace_add_float_to_double"); 00160 00161 // transfer rhs to single precision: 00162 viennacl::ocl::enqueue( assign_double_to_float(p_low_precision.handle().opencl_handle(), 00163 rhs.handle().opencl_handle(), 00164 cl_uint(rhs.size()) 00165 ) ); 00166 //std::cout << "copying p_low_precision..." << std::endl; 00167 //assign_double_to_float(p_low_precision.handle(), residual.handle(), residual.size()); 00168 residual_low_precision = p_low_precision; 00169 00170 // transfer matrix to single precision: 00171 viennacl::compressed_matrix<float> matrix_low_precision(matrix.size1(), matrix.size2(), matrix.nnz(), viennacl::traits::context(rhs)); 00172 viennacl::backend::memory_copy(matrix.handle1(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle1()), 0, 0, sizeof(cl_uint) * (matrix.size1() + 1) ); 00173 viennacl::backend::memory_copy(matrix.handle2(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle2()), 0, 0, sizeof(cl_uint) * (matrix.nnz()) ); 00174 00175 viennacl::ocl::enqueue( assign_double_to_float(matrix_low_precision.handle().opencl_handle(), 00176 matrix.handle().opencl_handle(), 00177 cl_uint(matrix.nnz()) 00178 ) ); 00179 //std::cout << "copying matrix_low_precision..." << std::endl; 00180 //assign_double_to_float(const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle()), matrix.handle(), matrix.nnz()); 00181 00182 //std::cout << "Starting CG solver iterations... " << std::endl; 00183 00184 00185 for (unsigned int i = 0; i < tag.max_iterations(); ++i) 00186 { 00187 tag.iters(i+1); 00188 00189 // lower precision 'inner iteration' 00190 tmp_low_precision = viennacl::linalg::prod(matrix_low_precision, p_low_precision); 00191 00192 alpha = inner_ip_rr / viennacl::linalg::inner_prod(tmp_low_precision, p_low_precision); 00193 result_low_precision += alpha * p_low_precision; 00194 residual_low_precision -= alpha * tmp_low_precision; 00195 00196 new_inner_ip_rr = viennacl::linalg::inner_prod(residual_low_precision, residual_low_precision); 00197 00198 beta = new_inner_ip_rr / inner_ip_rr; 00199 inner_ip_rr = new_inner_ip_rr; 00200 00201 p_low_precision = residual_low_precision + beta * p_low_precision; 00202 00203 00204 00205 if (new_inner_ip_rr < tag.inner_tolerance() * initial_inner_rhs_norm_squared || i == tag.max_iterations()-1) 00206 { 00207 //std::cout << "outer correction at i=" << i << std::endl; 00208 //result += result_low_precision; 00209 viennacl::ocl::enqueue( inplace_add_float_to_double(result.handle().opencl_handle(), 00210 result_low_precision.handle().opencl_handle(), 00211 cl_uint(result.size()) 00212 ) ); 00213 00214 // residual = b - Ax (without introducing a temporary) 00215 residual = viennacl::linalg::prod(matrix, result); 00216 residual = rhs - residual; 00217 00218 new_ip_rr = viennacl::linalg::inner_prod(residual, residual); 00219 if (new_ip_rr / norm_rhs_squared < tag.tolerance() * tag.tolerance())//squared norms involved here 00220 break; 00221 00222 // p_low_precision = residual; 00223 viennacl::ocl::enqueue( assign_double_to_float(p_low_precision.handle().opencl_handle(), 00224 residual.handle().opencl_handle(), 00225 cl_uint(residual.size()) 00226 ) ); 00227 result_low_precision.clear(); 00228 residual_low_precision = p_low_precision; 00229 initial_inner_rhs_norm_squared = static_cast<float>(new_ip_rr); 00230 inner_ip_rr = static_cast<float>(new_ip_rr); 00231 } 00232 } 00233 00234 //store last error estimate: 00235 tag.error(std::sqrt(new_ip_rr / norm_rhs_squared)); 00236 00237 return result; 00238 } 00239 00240 template <typename MatrixType, typename VectorType> 00241 VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag, viennacl::linalg::no_precond) 00242 { 00243 return solve(matrix, rhs, tag); 00244 } 00245 00246 00247 } 00248 } 00249 00250 #endif