ViennaCL - The Vienna Computing Library  1.5.0
viennacl/linalg/opencl/kernels/scalar.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_SCALAR_HPP
00002 #define VIENNACL_LINALG_OPENCL_KERNELS_SCALAR_HPP
00003 
00004 #include "viennacl/tools/tools.hpp"
00005 #include "viennacl/ocl/kernel.hpp"
00006 #include "viennacl/ocl/platform.hpp"
00007 #include "viennacl/ocl/utils.hpp"
00008 
00011 namespace viennacl
00012 {
00013   namespace linalg
00014   {
00015     namespace opencl
00016     {
00017       namespace kernels
00018       {
00019 
00021 
00023         enum asbs_scalar_type
00024         {
00025           VIENNACL_ASBS_NONE = 0, // scalar does not exist/contribute
00026           VIENNACL_ASBS_CPU,
00027           VIENNACL_ASBS_GPU
00028         };
00029 
00031         struct asbs_config
00032         {
00033           asbs_config() : with_stride_and_range(true), a(VIENNACL_ASBS_CPU), b(VIENNACL_ASBS_NONE) {}
00034 
00035           bool with_stride_and_range;
00036           std::string      assign_op;
00037           asbs_scalar_type a;
00038           asbs_scalar_type b;
00039         };
00040 
00041         // just returns the assignment string
00042         template <typename StringType>
00043         void generate_asbs_impl3(StringType & source, char sign_a, char sign_b, asbs_config const & cfg, bool mult_alpha, bool mult_beta)
00044         {
00045           source.append("      *s1 "); source.append(cfg.assign_op); source.append(1, sign_a); source.append(" *s2 ");
00046           if (mult_alpha)
00047             source.append("* alpha ");
00048           else
00049             source.append("/ alpha ");
00050           if (cfg.b != VIENNACL_ASBS_NONE)
00051           {
00052             source.append(1, sign_b); source.append(" *s3 ");
00053             if (mult_beta)
00054               source.append("* beta");
00055             else
00056               source.append("/ beta");
00057           }
00058           source.append("; \n");
00059         }
00060 
00061         template <typename StringType>
00062         void generate_asbs_impl2(StringType & source, char sign_a, char sign_b, asbs_config const & cfg)
00063         {
00064           source.append("    if (options2 & (1 << 1)) { \n");
00065           if (cfg.b != VIENNACL_ASBS_NONE)
00066           {
00067             source.append("     if (options3 & (1 << 1)) \n");
00068             generate_asbs_impl3(source, sign_a, sign_b, cfg, false, false);
00069             source.append("     else \n");
00070             generate_asbs_impl3(source, sign_a, sign_b, cfg, false, true);
00071           }
00072           else
00073             generate_asbs_impl3(source, sign_a, sign_b, cfg, false, true);
00074           source.append("    } else { \n");
00075           if (cfg.b != VIENNACL_ASBS_NONE)
00076           {
00077             source.append("     if (options3 & (1 << 1)) \n");
00078             generate_asbs_impl3(source, sign_a, sign_b, cfg, true, false);
00079             source.append("     else \n");
00080             generate_asbs_impl3(source, sign_a, sign_b, cfg, true, true);
00081           }
00082           else
00083             generate_asbs_impl3(source, sign_a, sign_b, cfg, true, true);
00084           source.append("    } \n");
00085 
00086         }
00087 
00088         template <typename StringType>
00089         void generate_asbs_impl(StringType & source, std::string const & numeric_string, asbs_config const & cfg)
00090         {
00091           source.append("__kernel void as");
00092           if (cfg.b != VIENNACL_ASBS_NONE)
00093             source.append("bs");
00094           if (cfg.assign_op != "=")
00095             source.append("_s");
00096 
00097           if (cfg.a == VIENNACL_ASBS_CPU)
00098             source.append("_cpu");
00099           else if (cfg.a == VIENNACL_ASBS_GPU)
00100             source.append("_gpu");
00101 
00102           if (cfg.b == VIENNACL_ASBS_CPU)
00103             source.append("_cpu");
00104           else if (cfg.b == VIENNACL_ASBS_GPU)
00105             source.append("_gpu");
00106           source.append("( \n");
00107           source.append("  __global "); source.append(numeric_string); source.append(" * s1, \n");
00108           source.append(" \n");
00109           if (cfg.a == VIENNACL_ASBS_CPU)
00110           {
00111             source.append("  "); source.append(numeric_string); source.append(" fac2, \n");
00112           }
00113           else if (cfg.a == VIENNACL_ASBS_GPU)
00114           {
00115             source.append("  __global "); source.append(numeric_string); source.append(" * fac2, \n");
00116           }
00117           source.append("  unsigned int options2, \n");  // 0: no action, 1: flip sign, 2: take inverse, 3: flip sign and take inverse
00118           source.append("  __global const "); source.append(numeric_string); source.append(" * s2");
00119 
00120           if (cfg.b != VIENNACL_ASBS_NONE)
00121           {
00122             source.append(", \n\n");
00123             if (cfg.b == VIENNACL_ASBS_CPU)
00124             {
00125               source.append("  "); source.append(numeric_string); source.append(" fac3, \n");
00126             }
00127             else if (cfg.b == VIENNACL_ASBS_GPU)
00128             {
00129               source.append("  __global "); source.append(numeric_string); source.append(" * fac3, \n");
00130             }
00131             source.append("  unsigned int options3, \n");  // 0: no action, 1: flip sign, 2: take inverse, 3: flip sign and take inverse
00132             source.append("  __global const "); source.append(numeric_string); source.append(" * s3");
00133           }
00134           source.append(") \n{ \n");
00135 
00136           if (cfg.a == VIENNACL_ASBS_CPU)
00137           {
00138             source.append("  "); source.append(numeric_string); source.append(" alpha = fac2; \n");
00139           }
00140           else if (cfg.a == VIENNACL_ASBS_GPU)
00141           {
00142             source.append("  "); source.append(numeric_string); source.append(" alpha = fac2[0]; \n");
00143           }
00144           source.append(" \n");
00145 
00146           if (cfg.b == VIENNACL_ASBS_CPU)
00147           {
00148             source.append("  "); source.append(numeric_string); source.append(" beta = fac3; \n");
00149           }
00150           else if (cfg.b == VIENNACL_ASBS_GPU)
00151           {
00152             source.append("  "); source.append(numeric_string); source.append(" beta = fac3[0]; \n");
00153           }
00154 
00155           source.append("  if (options2 & (1 << 0)) { \n");
00156           if (cfg.b != VIENNACL_ASBS_NONE)
00157           {
00158             source.append("   if (options3 & (1 << 0)) { \n");
00159             generate_asbs_impl2(source, '-', '-', cfg);
00160             source.append("   } else { \n");
00161             generate_asbs_impl2(source, '-', '+', cfg);
00162             source.append("   } \n");
00163           }
00164           else
00165             generate_asbs_impl2(source, '-', '+', cfg);
00166           source.append("  } else { \n");
00167           if (cfg.b != VIENNACL_ASBS_NONE)
00168           {
00169             source.append("   if (options3 & (1 << 0)) { \n");
00170             generate_asbs_impl2(source, '+', '-', cfg);
00171             source.append("   } else { \n");
00172             generate_asbs_impl2(source, '+', '+', cfg);
00173             source.append("   } \n");
00174           }
00175           else
00176             generate_asbs_impl2(source, '+', '+', cfg);
00177 
00178           source.append("  } \n");
00179           source.append("} \n");
00180         }
00181 
00182         template <typename StringType>
00183         void generate_asbs(StringType & source, std::string const & numeric_string)
00184         {
00185           asbs_config cfg;
00186           cfg.assign_op = "=";
00187           cfg.with_stride_and_range = true;
00188 
00189           // as
00190           cfg.b = VIENNACL_ASBS_NONE; cfg.a = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
00191           cfg.b = VIENNACL_ASBS_NONE; cfg.a = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
00192 
00193           // asbs
00194           cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
00195           cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
00196           cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
00197           cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
00198 
00199           // asbs
00200           cfg.assign_op = "+=";
00201 
00202           cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
00203           cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
00204           cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg);
00205           cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg);
00206         }
00207 
00208         template <typename StringType>
00209         void generate_scalar_swap(StringType & source, std::string const & numeric_string)
00210         {
00211           source.append("__kernel void swap( \n");
00212           source.append("          __global "); source.append(numeric_string); source.append(" * s1, \n");
00213           source.append("          __global "); source.append(numeric_string); source.append(" * s2) \n");
00214           source.append("{ \n");
00215           source.append("  "); source.append(numeric_string); source.append(" tmp = *s2; \n");
00216           source.append("  *s2 = *s1; \n");
00217           source.append("  *s1 = tmp; \n");
00218           source.append("} \n");
00219         }
00220 
00222 
00223         // main kernel class
00225         template <class TYPE>
00226         struct scalar
00227         {
00228           static std::string program_name()
00229           {
00230             return viennacl::ocl::type_to_string<TYPE>::apply() + "_scalar";
00231           }
00232 
00233           static void init(viennacl::ocl::context & ctx)
00234           {
00235             viennacl::ocl::DOUBLE_PRECISION_CHECKER<TYPE>::apply(ctx);
00236             std::string numeric_string = viennacl::ocl::type_to_string<TYPE>::apply();
00237 
00238             static std::map<cl_context, bool> init_done;
00239             if (!init_done[ctx.handle().get()])
00240             {
00241               std::string source;
00242               source.reserve(8192);
00243 
00244               viennacl::ocl::append_double_precision_pragma<TYPE>(ctx, source);
00245 
00246               // fully parametrized kernels:
00247               generate_asbs(source, numeric_string);
00248               generate_scalar_swap(source, numeric_string);
00249 
00250 
00251               std::string prog_name = program_name();
00252               #ifdef VIENNACL_BUILD_INFO
00253               std::cout << "Creating program " << prog_name << std::endl;
00254               #endif
00255               ctx.add_program(source, prog_name);
00256               init_done[ctx.handle().get()] = true;
00257             } //if
00258           } //init
00259         };
00260 
00261       }  // namespace kernels
00262     }  // namespace opencl
00263   }  // namespace linalg
00264 }  // namespace viennacl
00265 #endif
00266