ViennaCL - The Vienna Computing Library
1.5.0
|
00001 #ifndef VIENNACL_GENERATOR_PROFILES_HPP 00002 #define VIENNACL_GENERATOR_PROFILES_HPP 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2013, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00021 00027 #include <map> 00028 00029 #include "viennacl/ocl/device.hpp" 00030 00031 #include "viennacl/generator/forwards.h" 00032 00033 #include "viennacl/tools/shared_ptr.hpp" 00034 00035 #include "viennacl/generator/profile_base.hpp" 00036 #include "viennacl/generator/saxpy.hpp" 00037 #include "viennacl/generator/scalar_reduction.hpp" 00038 #include "viennacl/generator/vector_reduction.hpp" 00039 #include "viennacl/generator/matrix_product.hpp" 00040 00041 namespace viennacl{ 00042 00043 namespace generator{ 00044 00045 namespace profiles{ 00046 00047 typedef cl_uint vendor_id_type; 00048 typedef cl_device_type device_type; 00049 00050 typedef std::string device_name_type; 00051 typedef viennacl::tools::shared_ptr<profile_base> profile_base_ptr; 00052 00054 template<class KeyType, class ValueType> 00055 struct map_wrapper{ 00056 typedef std::map<KeyType,ValueType> map_type; 00057 map_type map; 00058 ValueType & operator[](KeyType const & key){ return map[key]; } 00059 }; 00060 00062 struct expression_map : public map_wrapper<expression_key_type, profile_base_ptr>{ }; 00063 00065 struct device_name_map : public map_wrapper<device_name_type, expression_map>{ }; 00066 00068 struct device_architecture_map : public map_wrapper<viennacl::ocl::device_architecture_family, device_name_map>{ }; 00069 00071 struct device_type_map : public map_wrapper<device_type,device_architecture_map>{ }; 00072 00074 struct database_type : public map_wrapper<vendor_id_type, device_type_map>{ }; 00075 00077 inline void set_generation_default_to(database_type & map, vendor_id_type vendor_id, viennacl::ocl::device_architecture_family family, expression_key_type expression, std::string const & device_name){ 00078 map[vendor_id][CL_DEVICE_TYPE_GPU][family][""][expression] = map[vendor_id][CL_DEVICE_TYPE_GPU][family][device_name][expression]; 00079 } 00080 00082 inline void set_all_generation_default_to(database_type & map, vendor_id_type vendor_id, viennacl::ocl::device_architecture_family family, std::string const & device_name){ 00083 set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_SAXPY_TYPE,4),device_name); 00084 set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_SAXPY_TYPE,4),device_name); 00085 set_generation_default_to(map,vendor_id,family,std::make_pair(SCALAR_REDUCE_TYPE,4),device_name); 00086 set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_REDUCE_Nx_TYPE,4),device_name); 00087 set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_REDUCE_Tx_TYPE,4),device_name); 00088 set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_NN_TYPE,4),device_name); 00089 set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_TN_TYPE,4),device_name); 00090 set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_NT_TYPE,4),device_name); 00091 set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_TT_TYPE,4),device_name); 00092 00093 set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_SAXPY_TYPE,8),device_name); 00094 set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_SAXPY_TYPE,8),device_name); 00095 set_generation_default_to(map,vendor_id,family,std::make_pair(SCALAR_REDUCE_TYPE,8),device_name); 00096 set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_REDUCE_Nx_TYPE,8),device_name); 00097 set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_REDUCE_Tx_TYPE,8),device_name); 00098 set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_NN_TYPE,8),device_name); 00099 set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_TN_TYPE,8),device_name); 00100 set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_NT_TYPE,8),device_name); 00101 set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_TT_TYPE,8),device_name); 00102 } 00103 00105 static database_type init_database(){ 00106 database_type map; 00107 00108 /*---------------------------*/ 00109 /* GPU Defaults */ 00110 /*---------------------------*/ 00111 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(1,128,128,true)); 00112 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); 00113 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(1, 128, 128, true)); 00114 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1, 1, 256, 32)); 00115 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1, 1, 256, 32)); 00116 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0)); 00117 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0)); 00118 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0)); 00119 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0)); 00120 00121 00122 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(1,128,128,true)); 00123 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); 00124 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(1, 128, 128, true)); 00125 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1, 1, 256, 32)); 00126 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1, 1, 256, 32)); 00127 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0)); 00128 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0)); 00129 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0)); 00130 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0)); 00131 00132 /*---------------------------*/ 00133 /* CPU Defaults */ 00134 /*---------------------------*/ 00135 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(8,16,256,true)); 00136 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); 00137 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(8,8,512,true)); 00138 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,2,1,8)); 00139 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,16,8,8)); 00140 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(4,16,64,1,4,4,128,0,0)); 00141 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,16,4,32,0,0)); 00142 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,16,4,32,0,0)); 00143 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,16,4,32,0,0)); 00144 00145 00146 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(8,16,32,true)); 00147 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); 00148 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(8,8,512,true)); 00149 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,1,8)); 00150 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,8,16,16)); 00151 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(2,16,64,1,8,4,64,0,0)); 00152 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,128,2,8,4,16,0,0)); 00153 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,16,4,32,0,0)); 00154 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,16,4,32,0,0)); 00155 00156 00157 /*---------------------------*/ 00158 /* ACCELERATOR Defaults */ 00159 /*---------------------------*/ 00160 //same as CPU for now 00161 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(8,16,256,true)); 00162 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); 00163 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(8,8,512,true)); 00164 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,2,1,8)); 00165 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,16,8,8)); 00166 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(4,16,64,1,4,4,128,0,0)); 00167 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,16,4,32,0,0)); 00168 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,16,4,32,0,0)); 00169 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,16,4,32,0,0)); 00170 00171 00172 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(8,16,32,true)); 00173 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); 00174 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(8,8,512,true)); 00175 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,1,8)); 00176 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,8,16,16)); 00177 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(2,16,64,1,8,4,64,0,0)); 00178 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,128,2,8,4,16,0,0)); 00179 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,16,4,32,0,0)); 00180 map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,16,4,32,0,0)); 00181 00182 00183 00184 /*---------------------------*/ 00185 /* AMD */ 00186 /*---------------------------*/ 00187 00188 //Evergreen 00189 00190 //Cypress 00191 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(1,4,64,true)); 00192 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); 00193 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(8,128,128,true)); 00194 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,1,256,1024)); 00195 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,32,8,256)); 00196 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(4,4,64,16,4,4,8,1,0)); 00197 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(4,4,64,16,4,4,8,1,0)); 00198 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(4,8,64,16,4,4,8,1,0)); 00199 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(4,8,128,8,8,4,4,0,0)); 00200 00201 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(2,1,64,true)); 00202 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); 00203 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(2,256,64,true)); 00204 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,256,1024)); 00205 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,64,4,256)); 00206 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(2,16,32,16,2,2,8,0,0)); 00207 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(2,4,64,32,4,2,2,0,0)); 00208 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(4,2,64,32,8,8,4,0,0)); 00209 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(2,16,64,8,2,2,4,0,0)); 00210 00211 //Default 00212 set_all_generation_default_to(map,viennacl::ocl::amd_id,viennacl::ocl::Evergreen,"Cypress"); 00213 00214 00215 //Southern Islands 00216 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(1,4,64,true)); 00217 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); 00218 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(8,128,128,true)); 00219 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,1,256,1024)); 00220 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,32,8,256)); 00221 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(4,8,128,32,4,4,4,1,0)); 00222 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,64,16,4,2,8,1,0)); 00223 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(4,16,64,16,4,4,8,1,0)); 00224 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(4,16,64,16,4,4,8,1,0)); 00225 00226 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(2,1,64,true)); 00227 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); 00228 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(2,256,64,true)); 00229 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,256,1024)); 00230 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,64,4,256)); 00231 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(2,4,128,64,4,2,2,1,0)); 00232 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(2,2,128,32,4,2,2,0,0)); 00233 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(2,8,128,32,2,2,2,1,0)); 00234 map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(2,8,128,32,2,2,2,1,0)); 00235 00236 00237 //Default 00238 set_all_generation_default_to(map,viennacl::ocl::amd_id,viennacl::ocl::SouthernIslands,"Tahiti"); 00239 00240 00241 /*---------------------------*/ 00242 /* NVidia */ 00243 /*---------------------------*/ 00244 00245 //-----Fermi 00246 00247 //Geforce GTX 470 00248 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(1,1,256,true)); 00249 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true)); 00250 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(4,64,512,true)); 00251 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,1,256,1024)); 00252 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,64,4,64)); 00253 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(1,2,64,64,8,4,2,1,0)); 00254 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,16,4,4,8,0,0)); 00255 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(1,4,128,32,4,8,4,1,0)); 00256 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(1,4,32,16,8,4,8,0,0)); 00257 00258 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(2,1,64,true)); 00259 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(2,16,16,16,16,true)); 00260 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(2,64,512,true)); 00261 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,128,1024)); 00262 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,16,32,1024)); 00263 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,64,32,2,2,8,1,0)); 00264 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(1,64,128,4,2,2,8,0,1)); 00265 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(1,4,128,32,4,8,4,1,0)); 00266 map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(1,4,32,16,8,4,8,0,0)); 00267 00268 //default 00269 set_all_generation_default_to(map,viennacl::ocl::nvidia_id,viennacl::ocl::Fermi,"GeForce GTX 470"); 00270 00271 00272 00273 return map; 00274 } 00275 static database_type database = init_database(); 00276 00278 static profile_base * handle_failure(viennacl::ocl::device const & device, expression_descriptor const & descriptor, tools::shared_ptr<profile_base> const & profile){ 00279 //Returns default if the profile is invalid 00280 if(profile->is_invalid(device, descriptor.scalartype_size)) 00281 return at(at(at(at(at(database.map, viennacl::ocl::unknown_id).map, device.type()).map, viennacl::ocl::UNKNOWN).map, std::string("")).map, descriptor.make_key()).get(); 00282 return profile.get(); 00283 } 00284 00286 static profile_base * get(viennacl::ocl::device const & device, expression_descriptor const & descriptor){ 00287 device_type dev_type = device.type(); 00288 vendor_id_type vendor_id = device.vendor_id(); 00289 viennacl::ocl::device_architecture_family device_architecture = device.architecture_family(); 00290 std::string const & device_name = device.name(); 00291 expression_key_type expression_key = descriptor.make_key(); 00292 00293 //std::cout << "Looking up vendor ID..." << std::endl; 00294 /*-Vendor ID-*/ 00295 database_type::map_type::iterator vendor_it = database.map.find(vendor_id); 00296 //Vendor not recognized => global default: 00297 if(vendor_it==database.map.end()) 00298 return handle_failure(device, descriptor, at(at(at(at(at(database.map, viennacl::ocl::unknown_id).map, dev_type).map, viennacl::ocl::UNKNOWN).map, std::string("")).map, expression_key)); 00299 00300 /*-Device Type-*/ 00301 //std::cout << "Looking up device type..." << std::endl; 00302 device_type_map::map_type::iterator device_type_it = vendor_it->second.map.find(dev_type); 00303 //Device type not recognized for this vendor => global default 00304 if(device_type_it==vendor_it->second.map.end()) 00305 return handle_failure(device, descriptor, at(at(at(at(at(database.map, viennacl::ocl::unknown_id).map, dev_type).map, viennacl::ocl::UNKNOWN).map, std::string("")).map, expression_key)); 00306 00307 /*-Device Architecture-*/ 00308 //std::cout << "Looking up device architecture..." << std::endl; 00309 device_architecture_map::map_type::iterator architecture_it = device_type_it->second.map.find(device_architecture); 00310 if(architecture_it==device_type_it->second.map.end()) 00311 return handle_failure(device, descriptor, at(at(at(at(at(database.map, viennacl::ocl::unknown_id).map, dev_type).map, viennacl::ocl::UNKNOWN).map, std::string("")).map, expression_key)); 00312 00313 /*-Device Name-*/ 00314 //std::cout << "Looking up device name..." << std::endl; 00315 device_name_map::map_type::iterator device_name_it = architecture_it->second.map.find(device_name); 00316 //Name not found => Vendor default 00317 if(device_name_it==architecture_it->second.map.end()) 00318 return handle_failure(device, descriptor, at(at(at(at(at(database.map, vendor_id).map, dev_type).map, device_architecture).map, std::string("")).map, expression_key)); 00319 00320 //std::cout << "Looking up expression name.." << std::endl; 00321 /*-Expression-*/ 00322 expression_map::map_type::iterator expression_it = device_name_it->second.map.find(expression_key); 00323 //Expression not found => Vendor default 00324 if(expression_it==device_name_it->second.map.end()) 00325 return handle_failure(device, descriptor, at(at(at(at(at(database.map, vendor_id).map, dev_type).map, device_architecture).map, std::string("")).map, expression_key)); 00326 00327 //std::cout << "Device found in the database! Getting profile..." << std::endl; 00328 //Everything okay. Return specific profile// 00329 return handle_failure(device, descriptor, at(at(at(at(at(database.map, vendor_id).map, dev_type).map, device_architecture).map, std::string("")).map, expression_key)); 00330 } 00331 00332 } 00333 00334 } 00335 00336 } 00337 00338 00339 #endif 00340