ViennaCL - The Vienna Computing Library
1.5.0
|
00001 #ifndef VIENNACL_GENERATOR_GENERATE_VECTOR_REDUCTION_HPP 00002 #define VIENNACL_GENERATOR_GENERATE_VECTOR_REDUCTION_HPP 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2013, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00021 00027 #include <vector> 00028 00029 #include "viennacl/scheduler/forwards.h" 00030 00031 #include "viennacl/generator/mapped_objects.hpp" 00032 #include "viennacl/generator/helpers.hpp" 00033 #include "viennacl/generator/utils.hpp" 00034 00035 #include "viennacl/generator/profile_base.hpp" 00036 00037 #include "viennacl/tools/tools.hpp" 00038 00039 namespace viennacl{ 00040 00041 namespace generator{ 00042 00044 class vector_reduction : public profile_base{ 00045 00046 vcl_size_t lmem_used(vcl_size_t scalartype_size) const { 00047 return m_*(k_+1)*scalartype_size; 00048 } 00049 00050 public: 00052 vector_reduction(unsigned int vectorization, unsigned int m, unsigned int k, unsigned int num_groups) : profile_base(vectorization, m, k, 1), m_(m), k_(k), num_groups_(num_groups){ } 00053 00054 00055 static std::string csv_format() { 00056 return "Vec,M,K,NumGroups"; 00057 } 00058 00059 std::string csv_representation() const{ 00060 std::ostringstream oss; 00061 oss << vector_size_ 00062 << "," << m_ 00063 << "," << k_ 00064 << "," << num_groups_; 00065 return oss.str(); 00066 } 00067 00068 unsigned int m() const { return m_; } 00069 00070 unsigned int k() const { return k_; } 00071 00072 unsigned int num_groups() const { return num_groups_; } 00073 00074 void configure_range_enqueue_arguments(vcl_size_t kernel_id, statements_type const & statements, viennacl::ocl::kernel & kernel, unsigned int & n_arg) const{ 00075 00076 configure_local_sizes(kernel, kernel_id); 00077 kernel.global_work_size(0,m_*num_groups_); 00078 kernel.global_work_size(1,k_); 00079 00080 00081 for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ 00082 scheduler::statement::container_type exprs = it->first.array(); 00083 for(scheduler::statement::container_type::iterator iit = exprs.begin() ; iit != exprs.end() ; ++iit){ 00084 if(iit->op.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE){ 00085 scheduler::statement_node const * current_node = &(*iit); 00086 //The LHS of the prod is a matrix 00087 if(current_node->lhs.type_family==scheduler::MATRIX_TYPE_FAMILY) 00088 { 00089 kernel.arg(n_arg++, cl_uint(utils::call_on_matrix(current_node->lhs, utils::internal_size1_fun()))); 00090 kernel.arg(n_arg++, cl_uint(utils::call_on_matrix(current_node->lhs, utils::internal_size2_fun()))); 00091 return; 00092 } 00093 else{ 00094 //The LHS of the prod is a matrix expression 00095 current_node = &exprs[current_node->lhs.node_index]; 00096 if(current_node->lhs.type_family==scheduler::MATRIX_TYPE_FAMILY) 00097 { 00098 kernel.arg(n_arg++, cl_uint(utils::call_on_matrix(current_node->lhs, utils::internal_size1_fun()))); 00099 kernel.arg(n_arg++, cl_uint(utils::call_on_matrix(current_node->lhs, utils::internal_size2_fun()))); 00100 return; 00101 } 00102 else if(current_node->rhs.type_family==scheduler::MATRIX_TYPE_FAMILY) 00103 { 00104 kernel.arg(n_arg++, cl_uint(utils::call_on_matrix(current_node->lhs, utils::internal_size1_fun()))); 00105 kernel.arg(n_arg++, cl_uint(utils::call_on_matrix(current_node->lhs, utils::internal_size2_fun()))); 00106 return; 00107 } 00108 else{ 00109 assert(false && bool("unexpected expression tree")); 00110 } 00111 } 00112 return; 00113 } 00114 } 00115 } 00116 } 00117 00118 void kernel_arguments(statements_type const & /*statements*/, std::string & arguments_string) const{ 00119 arguments_string += detail::generate_value_kernel_argument("unsigned int", "M"); 00120 arguments_string += detail::generate_value_kernel_argument("unsigned int", "N"); 00121 } 00122 00123 private: 00124 void core(vcl_size_t /*kernel_id*/, utils::kernel_generation_stream& stream, statements_type const & statements, std::vector<detail::mapping_type> const & mapping) const { 00125 00126 std::vector<detail::mapped_vector_reduction*> exprs; 00127 for(std::vector<detail::mapping_type>::const_iterator it = mapping.begin() ; it != mapping.end() ; ++it){ 00128 for(detail::mapping_type::const_iterator iit = it->begin() ; iit != it->end() ; ++iit){ 00129 if(detail::mapped_vector_reduction * p = dynamic_cast<detail::mapped_vector_reduction*>(iit->second.get())) 00130 exprs.push_back(p); 00131 if(detail::mapped_matrix * p = dynamic_cast<detail::mapped_matrix*>(iit->second.get())) 00132 p->bind_sizes("M","N"); 00133 } 00134 } 00135 00136 vcl_size_t lsize1 = m_; 00137 vcl_size_t lsize2 = k_+1; 00138 std::string scalartype = "float"; 00139 bool is_lhs_transposed = false; 00140 if(exprs.front()->root_node().lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) 00141 if(exprs.front()->statement().array()[exprs.front()->root_node().lhs.node_index].op.type==scheduler::OPERATION_UNARY_TRANS_TYPE) 00142 is_lhs_transposed = true; 00143 00144 std::string size1 = "M", size2 = "N"; 00145 if(is_lhs_transposed) 00146 std::swap(size1, size2); 00147 00148 for(std::vector<detail::mapped_vector_reduction*>::iterator it = exprs.begin() ; it != exprs.end() ; ++it){ 00149 stream << "__local " << (*it)->scalartype() << " buf" << std::distance(exprs.begin(), it) << '[' << lsize1*lsize2 << "];" << std::endl; 00150 } 00151 00152 stream << "unsigned int lid0 = get_local_id(0);" << std::endl; 00153 stream << "unsigned int lid1 = get_local_id(1);" << std::endl; 00154 00155 00156 stream << "for(unsigned int r = get_global_id(0) ; r < " << size1 << " ; r += get_global_size(0)){" << std::endl; 00157 stream.inc_tab(); 00158 00159 for(vcl_size_t k = 0 ; k < exprs.size() ; ++k) 00160 stream << scalartype << " sum" << k << " = 0;" << std::endl; 00161 00162 stream << "for(unsigned int c = get_local_id(1) ; c < " << size2 << " ; c += get_local_size(1)){" << std::endl; 00163 stream.inc_tab(); 00164 00165 std::set<std::string> fetched; 00166 00167 for(std::vector<detail::mapped_vector_reduction*>::iterator it = exprs.begin() ; it != exprs.end() ; ++it){ 00168 viennacl::scheduler::statement const & statement = (*it)->statement(); 00169 viennacl::scheduler::statement_node const & root_node = (*it)->root_node(); 00170 if(is_lhs_transposed) 00171 detail::fetch_all_lhs(fetched,statement,root_node, std::make_pair("c", "r"),vector_size_,stream,(*it)->mapping()); 00172 else 00173 detail::fetch_all_lhs(fetched,statement,root_node, std::make_pair("r", "c"),vector_size_,stream,(*it)->mapping()); 00174 00175 detail::fetch_all_rhs(fetched,statement,root_node, std::make_pair("c", "0"),vector_size_,stream,(*it)->mapping()); 00176 } 00177 00178 00179 //Update sums; 00180 for(std::vector<detail::mapped_vector_reduction*>::iterator it = exprs.begin() ; it != exprs.end() ; ++it){ 00181 viennacl::scheduler::statement const & statement = (*it)->statement(); 00182 viennacl::scheduler::statement_node const & root_node = (*it)->root_node(); 00183 std::string str; 00184 detail::generate_all_lhs(statement,root_node,std::make_pair("i","0"),-1,str,(*it)->mapping()); 00185 str += "*"; 00186 detail::generate_all_rhs(statement,root_node,std::make_pair("i","0"),-1,str,(*it)->mapping()); 00187 stream << " sum" << std::distance(exprs.begin(),it) << " += " << str << ";" << std::endl; 00188 } 00189 00190 00191 stream.dec_tab(); 00192 stream << "}" << std::endl; 00193 00194 for(vcl_size_t k = 0 ; k < exprs.size() ; ++k){ 00195 stream << "buf" << k << "[lid0*" << lsize2 << "+ lid1] = sum" << k << ";" << std::endl; 00196 } 00197 00198 for(unsigned int stride = k_/2 ; stride>1 ; stride /=2){ 00199 stream << "barrier(CLK_LOCAL_MEM_FENCE); " << std::endl; 00200 stream << "if(lid1 < " << stride << ")" ; 00201 stream << "{" << std::endl; 00202 stream.inc_tab(); 00203 00204 for(vcl_size_t i = 0 ; i < exprs.size() ; ++i) 00205 stream << "buf" << i << "[lid0*" << lsize2 << "+ lid1] += buf" << i << "[lid0*" << lsize2 << "+ lid1 + " << stride << "];" << std::endl; 00206 00207 stream.dec_tab(); 00208 stream << "}" << std::endl; 00209 } 00210 00211 00212 stream << "barrier(CLK_LOCAL_MEM_FENCE); " << std::endl; 00213 stream << "if(lid1 == 0)" ; 00214 stream << "{" << std::endl; 00215 stream.inc_tab(); 00216 for(vcl_size_t i = 0 ; i < exprs.size() ; ++i){ 00217 stream << "buf" << i << "[lid0*" << lsize2 << "] += buf" << i << "[lid0*" << lsize2 << "+ 1];" << std::endl; 00218 exprs[i]->access_name("buf"+utils::to_string(i)+"[lid0*"+utils::to_string(lsize2)+"]"); 00219 } 00220 vcl_size_t i = 0; 00221 for(statements_type::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ 00222 std::string str; 00223 detail::traverse(it->first, it->second, detail::expression_generation_traversal(std::make_pair("r","0"), -1, str, mapping[i++]), false); 00224 stream << str << ";" << std::endl; 00225 } 00226 stream.dec_tab(); 00227 stream << "}" << std::endl; 00228 00229 00230 stream.dec_tab(); 00231 stream << "}" << std::endl; 00232 00233 } 00234 00235 private: 00236 unsigned int m_; 00237 unsigned int k_; 00238 unsigned int num_groups_; 00239 }; 00240 } 00241 } 00242 00243 #endif