ViennaCL - The Vienna Computing Library
1.5.0
|
00001 #ifndef VIENNACL_GENERATOR_GENERATE_SAXPY_HPP 00002 #define VIENNACL_GENERATOR_GENERATE_SAXPY_HPP 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2013, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00021 00027 #include <vector> 00028 00029 #include "viennacl/scheduler/forwards.h" 00030 00031 #include "viennacl/generator/mapped_objects.hpp" 00032 #include "viennacl/generator/helpers.hpp" 00033 #include "viennacl/generator/utils.hpp" 00034 00035 #include "viennacl/generator/profile_base.hpp" 00036 00037 #include "viennacl/tools/tools.hpp" 00038 00039 namespace viennacl{ 00040 00041 namespace generator{ 00042 00044 class vector_saxpy : public profile_base{ 00045 public: 00046 static std::string csv_format() { 00047 return "Vec,LSize1,NumGroups1,GlobalDecomposition"; 00048 } 00049 00050 std::string csv_representation() const{ 00051 std::ostringstream oss; 00052 oss << vector_size_ 00053 << "," << local_size_1_ 00054 << "," << num_groups_ 00055 << "," << decomposition_; 00056 return oss.str(); 00057 } 00058 00059 vector_saxpy(unsigned int v, vcl_size_t gs, vcl_size_t ng, unsigned int d) : profile_base(v, gs, 1, 1), num_groups_(ng), decomposition_(d){ } 00060 00061 void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg) const{ 00062 configure_local_sizes(k, kernel_id); 00063 00064 k.global_work_size(0,local_size_1_*num_groups_); 00065 k.global_work_size(1,1); 00066 00067 scheduler::statement_node const & first_node = statements.front().second; 00068 viennacl::vcl_size_t N = utils::call_on_vector(first_node.lhs, utils::internal_size_fun()); 00069 k.arg(n_arg++, cl_uint(N/vector_size_)); 00070 } 00071 void kernel_arguments(statements_type const & /*statements*/, std::string & arguments_string) const{ 00072 arguments_string += detail::generate_value_kernel_argument("unsigned int", "N"); 00073 } 00074 00075 private: 00076 00077 void core(vcl_size_t /*kernel_id*/, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector<detail::mapping_type> const & mapping) const { 00078 stream << "for(unsigned int i = get_global_id(0) ; i < N ; i += get_global_size(0))" << std::endl; 00079 stream << "{" << std::endl; 00080 stream.inc_tab(); 00081 00082 //Fetches entries to registers 00083 std::set<std::string> fetched; 00084 for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it) 00085 for(detail::mapping_type::const_reverse_iterator iit = it->rbegin() ; iit != it->rend() ; ++iit) 00086 //Useless to fetch cpu scalars into registers 00087 if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(iit->second.get())) 00088 p->fetch( std::make_pair("i","0"), vector_size_, fetched, stream); 00089 00090 //Generates all the expression, in order 00091 vcl_size_t i = 0; 00092 for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ 00093 std::string str; 00094 detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair("i","0"), -1, str, mapping[i++])); 00095 stream << str << ";" << std::endl; 00096 } 00097 00098 //Writes back 00099 for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it) 00100 //Gets the mapped object at the LHS of each expression 00101 if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(at(mapping.at(std::distance(statements.begin(),it)), std::make_pair(&it->second, detail::LHS_NODE_TYPE)).get())) 00102 p->write_back( std::make_pair("i", "0"), fetched, stream); 00103 00104 stream.dec_tab(); 00105 stream << "}" << std::endl; 00106 } 00107 00108 private: 00109 vcl_size_t num_groups_; 00110 unsigned int decomposition_; 00111 00112 }; 00113 00114 00115 00117 class matrix_saxpy : public profile_base{ 00118 00119 bool invalid_impl(viennacl::ocl::device const & /*dev*/, vcl_size_t /*scalartype_size*/) const{ return false; } 00120 bool is_slow_impl(viennacl::ocl::device const &) const { return false; } 00121 00122 public: 00123 matrix_saxpy(unsigned int v, vcl_size_t gs1, vcl_size_t gs2, vcl_size_t ng1, vcl_size_t ng2, unsigned int d) : profile_base(v, gs1, gs2, 1), num_groups_row_(ng1), num_groups_col_(ng2), decomposition_(d){ } 00124 00125 static std::string csv_format() { 00126 return "Vec,LSize1,LSize2,NumGroups1,NumGroups2,GlobalDecomposition"; 00127 } 00128 00129 std::string csv_representation() const{ 00130 std::ostringstream oss; 00131 oss << vector_size_ 00132 << "," << local_size_1_ 00133 << "," << local_size_2_ 00134 << "," << num_groups_row_ 00135 << "," << num_groups_col_ 00136 << "," << decomposition_; 00137 return oss.str(); 00138 } 00139 00140 void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const & statements, viennacl::ocl::kernel & k, unsigned int & n_arg) const{ 00141 configure_local_sizes(k, kernel_id); 00142 00143 k.global_work_size(0,local_size_1_*num_groups_row_); 00144 k.global_work_size(1,local_size_2_*num_groups_col_); 00145 00146 scheduler::statement_node const & first_node = statements.front().second; 00147 k.arg(n_arg++, cl_uint(utils::call_on_matrix(first_node.lhs, utils::internal_size1_fun()))); 00148 k.arg(n_arg++, cl_uint(utils::call_on_matrix(first_node.lhs, utils::internal_size2_fun()))); 00149 } 00150 00151 void kernel_arguments(statements_type const & /*statements*/, std::string & arguments_string) const{ 00152 arguments_string += detail::generate_value_kernel_argument("unsigned int", "M"); 00153 arguments_string += detail::generate_value_kernel_argument("unsigned int", "N"); 00154 } 00155 00156 private: 00157 void core(vcl_size_t /*kernel_id*/, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector<detail::mapping_type> const & mapping) const { 00158 00159 for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it){ 00160 for(detail::mapping_type::const_iterator iit = it->begin() ; iit != it->end() ; ++iit){ 00161 if(detail::mapped_matrix * p = dynamic_cast<detail::mapped_matrix*>(iit->second.get())) 00162 p->bind_sizes("M","N"); 00163 } 00164 } 00165 00166 stream << "for(unsigned int i = get_global_id(0) ; i < M ; i += get_global_size(0))" << std::endl; 00167 stream << "{" << std::endl; 00168 stream.inc_tab(); 00169 stream << "for(unsigned int j = get_global_id(1) ; j < N ; j += get_global_size(1))" << std::endl; 00170 stream << "{" << std::endl; 00171 stream.inc_tab(); 00172 00173 //Fetches entries to registers 00174 std::set<std::string> fetched; 00175 for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it) 00176 for(detail::mapping_type::const_reverse_iterator it2 = it->rbegin() ; it2 != it->rend() ; ++it2) 00177 if(detail::mapped_matrix * p = dynamic_cast<detail::mapped_matrix *>(it2->second.get())) 00178 p->fetch(std::make_pair("i", "j"), vector_size_, fetched, stream); 00179 00180 00181 vcl_size_t i = 0; 00182 for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ 00183 std::string str; 00184 detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair("i", "j"), -1, str, mapping[i++])); 00185 stream << str << ";" << std::endl; 00186 } 00187 00188 //Writes back 00189 for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ 00190 if(detail::mapped_handle * p = dynamic_cast<detail::mapped_handle *>(at(mapping.at(std::distance(statements.begin(),it)), std::make_pair(&it->second,detail::LHS_NODE_TYPE)).get())) 00191 p->write_back(std::make_pair("i", "j"), fetched, stream); 00192 } 00193 00194 stream.dec_tab(); 00195 stream << "}" << std::endl; 00196 stream.dec_tab(); 00197 stream << "}" << std::endl; 00198 } 00199 00200 private: 00201 vcl_size_t num_groups_row_; 00202 vcl_size_t num_groups_col_; 00203 00204 unsigned int decomposition_; 00205 }; 00206 } 00207 00208 } 00209 00210 #endif