ViennaCL - The Vienna Computing Library  1.5.0
viennacl/linalg/detail/spai/qr.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_DETAIL_SPAI_QR_HPP
00002 #define VIENNACL_LINALG_DETAIL_SPAI_QR_HPP
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2013, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00027 #include <utility>
00028 #include <iostream>
00029 #include <fstream>
00030 #include <string>
00031 #include <algorithm>
00032 #include <vector>
00033 #include <math.h>
00034 #include <cmath>
00035 #include <sstream>
00036 #include "viennacl/ocl/backend.hpp"
00037 #include "boost/numeric/ublas/vector.hpp"
00038 #include "boost/numeric/ublas/matrix.hpp"
00039 #include "boost/numeric/ublas/matrix_proxy.hpp"
00040 #include "boost/numeric/ublas/storage.hpp"
00041 #include "boost/numeric/ublas/io.hpp"
00042 #include "boost/numeric/ublas/matrix_expression.hpp"
00043 #include "boost/numeric/ublas/detail/matrix_assign.hpp"
00044 
00045 #include "viennacl/vector.hpp"
00046 #include "viennacl/matrix.hpp"
00047 
00048 #include "viennacl/linalg/detail/spai/block_matrix.hpp"
00049 #include "viennacl/linalg/detail/spai/block_vector.hpp"
00050 #include "viennacl/linalg/opencl/kernels/spai.hpp"
00051 
00052 namespace viennacl
00053 {
00054     namespace linalg
00055     {
00056       namespace detail
00057       {
00058         namespace spai
00059         {
00060 
00061           //********** DEBUG FUNCTIONS *****************//
00062           template< typename T, typename InputIterator>
00063           void Print(std::ostream& ostr, InputIterator it_begin, InputIterator it_end){
00064               //std::ostream_iterator<int> it_os(ostr, delimiter);
00065               std::string delimiters = " ";
00066               std::copy(it_begin, it_end, std::ostream_iterator<T>(ostr, delimiters.c_str()));
00067               ostr<<std::endl;
00068           }
00069 
00070           template<typename VectorType, typename MatrixType>
00071           void write_to_block(VectorType& con_A_I_J, unsigned int start_ind,  const std::vector<unsigned int>& I, const std::vector<unsigned int>& J, MatrixType& m){
00072               m.resize(I.size(), J.size(), false);
00073               for(vcl_size_t i = 0; i < J.size(); ++i){
00074                   for(vcl_size_t j = 0; j < I.size(); ++j){
00075                       m(j,i) = con_A_I_J[start_ind + i*I.size() + j];
00076                   }
00077               }
00078           }
00079 
00080           template<typename VectorType>
00081           void print_continious_matrix(VectorType& con_A_I_J, std::vector<cl_uint>& blocks_ind,
00082                                       const std::vector<std::vector<unsigned int> >& g_I, const std::vector<std::vector<unsigned int> >& g_J){
00083               typedef typename VectorType::value_type ScalarType;
00084               std::vector<boost::numeric::ublas::matrix<ScalarType> > com_A_I_J(g_I.size());
00085               for(vcl_size_t i = 0; i < g_I.size(); ++i){
00086                   write_to_block( con_A_I_J, blocks_ind[i], g_I[i], g_J[i], com_A_I_J[i]);
00087                   std::cout<<com_A_I_J[i]<<std::endl;
00088               }
00089           }
00090           template<typename VectorType>
00091           void print_continious_vector(VectorType& con_v, std::vector<cl_uint>& block_ind, const std::vector<std::vector<unsigned int> >& g_J){
00092               typedef typename VectorType::value_type ScalarType;
00093               std::vector<boost::numeric::ublas::vector<ScalarType> > com_v(g_J.size());
00094               //Print<ScalarType>(std::cout, con_v.begin(), con_v.end());
00095               for(vcl_size_t i = 0; i < g_J.size(); ++i){
00096                   com_v[i].resize(g_J[i].size());
00097                   for(vcl_size_t j = 0; j < g_J[i].size(); ++j){
00098                       com_v[i](j) = con_v[block_ind[i] + j];
00099                   }
00100                   std::cout<<com_v[i]<<std::endl;
00101               }
00102           }
00103 
00105 
00112           inline void compute_blocks_size(const std::vector<std::vector<unsigned int> >& g_I, const std::vector<std::vector<unsigned int> >& g_J,
00113                                           unsigned int& sz, std::vector<cl_uint>& blocks_ind, std::vector<cl_uint>& matrix_dims)
00114           {
00115               sz = 0;
00116               for(vcl_size_t i = 0; i < g_I.size(); ++i){
00117                   sz += static_cast<unsigned int>(g_I[i].size()*g_J[i].size());
00118                   matrix_dims[2*i] = static_cast<cl_uint>(g_I[i].size());
00119                   matrix_dims[2*i + 1] = static_cast<cl_uint>(g_J[i].size());
00120                   blocks_ind[i+1] = blocks_ind[i] + static_cast<cl_uint>(g_I[i].size()*g_J[i].size());
00121 
00122               }
00123           }
00128           template <typename SizeType>
00129           void get_size(const std::vector<std::vector<SizeType> >& inds, SizeType & size){
00130               size = 0;
00131               for (vcl_size_t i = 0; i < inds.size(); ++i) {
00132                   size += static_cast<unsigned int>(inds[i].size());
00133               }
00134           }
00135 
00140           template <typename SizeType>
00141           void init_start_inds(const std::vector<std::vector<SizeType> >& inds, std::vector<cl_uint>& start_inds){
00142               for(vcl_size_t i = 0; i < inds.size(); ++i){
00143                   start_inds[i+1] = start_inds[i] + static_cast<cl_uint>(inds[i].size());
00144               }
00145           }
00146 
00147           //*************************************  QR FUNCTIONS  ***************************************//
00153           template<typename MatrixType, typename ScalarType>
00154           void dot_prod(const MatrixType& A,  unsigned int beg_ind, ScalarType& res){
00155               res = static_cast<ScalarType>(0);
00156               for(vcl_size_t i = beg_ind; i < A.size1(); ++i){
00157                   res += A(i, beg_ind-1)*A(i, beg_ind-1);
00158               }
00159           }
00167           template<typename MatrixType, typename VectorType, typename ScalarType>
00168           void custom_inner_prod(const MatrixType& A, const VectorType& v, unsigned int col_ind, unsigned int start_ind, ScalarType& res){
00169               res = static_cast<ScalarType>(0);
00170               for(unsigned int i = start_ind; i < static_cast<unsigned int>(A.size1()); ++i){
00171                   res += A(i, col_ind)*v(i);
00172               }
00173           }
00174 
00180           template<typename MatrixType, typename VectorType>
00181           void copy_vector(const MatrixType & A, VectorType & v, const unsigned int beg_ind){
00182               for(unsigned int i = beg_ind; i < static_cast<unsigned int>(A.size1()); ++i){
00183                   v(i) = A( i, beg_ind-1);
00184               }
00185           }
00186 
00187           //householder reflection c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.210
00194           template<typename MatrixType, typename VectorType, typename ScalarType>
00195           void householder_vector(const MatrixType& A, unsigned int j, VectorType& v, ScalarType& b)
00196           {
00197               ScalarType sg;
00198               //
00199               dot_prod(A, j+1, sg);
00200               copy_vector(A, v, j+1);
00201               ScalarType mu;
00202               v(j) = static_cast<ScalarType>(1.0);
00203               if(sg == 0){
00204                   b = 0;
00205               }
00206               else{
00207                   mu = std::sqrt(A(j,j)*A(j, j) + sg);
00208                   if(A(j, j) <= 0){
00209                       v(j) = A(j, j) - mu;
00210                   }else{
00211                       v(j) = -sg/(A(j, j) + mu);
00212                   }
00213                   b = 2*(v(j)*v(j))/(sg + v(j)*v(j));
00214                   v = v/v(j);
00215               }
00216           }
00223           template<typename MatrixType, typename VectorType, typename ScalarType>
00224           void apply_householder_reflection(MatrixType& A, unsigned int iter_cnt, VectorType& v, ScalarType b)
00225           {
00226               //update every column of matrix A
00227               ScalarType in_prod_res;
00228               for(unsigned int i = iter_cnt; i < static_cast<unsigned int>(A.size2()); ++i){
00229                   //update each column in a fashion: ai = ai - b*v*(v'*ai)
00230                   custom_inner_prod(A, v, i, iter_cnt, in_prod_res);
00231                   for(unsigned int j = iter_cnt; j < static_cast<unsigned int>(A.size1()); ++j){
00232                       A(j, i) -= b*in_prod_res*v(j);
00233                   }
00234               }
00235           }
00236 
00242           template<typename MatrixType, typename VectorType>
00243           void store_householder_vector(MatrixType& A, unsigned int ind, VectorType& v)
00244           {
00245               for(unsigned int i = ind; i < static_cast<unsigned int>(A.size1()); ++i){
00246                   A(i, ind-1) = v(i);
00247               }
00248           }
00249 
00250 
00251           //QR algorithm
00256           template<typename MatrixType, typename VectorType>
00257           void single_qr(MatrixType& R, VectorType& b_v)
00258           {
00259               typedef typename MatrixType::value_type ScalarType;
00260               if((R.size1() > 0) && (R.size2() > 0)){
00261                   VectorType v = (VectorType)boost::numeric::ublas::zero_vector<ScalarType>(R.size1());
00262                   b_v = (VectorType)boost::numeric::ublas::zero_vector<ScalarType>(R.size2());
00263                   for(unsigned int i = 0; i < static_cast<unsigned int>(R.size2()); ++i){
00264                       householder_vector(R, i, v, b_v[i]);
00265                       apply_householder_reflection(R, i, v, b_v[i]);
00266                       if(i < R.size1()) store_householder_vector(R, i+1, v);
00267                   }
00268               }
00269           }
00270 
00271           //********************** HELP FUNCTIONS FOR GPU-based QR factorization *************************//
00272           /* * @brief Reading from text file into string
00273           * @param file_name file name
00274           * @param kernel_source string that contains file
00275 
00276           void read_kernel_from_file(std::string& file_name, std::string& kernel_source){
00277               std::ifstream ifs(file_name.c_str(), std::ifstream::in);
00278 
00279               if (!ifs)
00280                 std::cerr << "WARNING: Cannot open file " << file_name << std::endl;
00281 
00282               std::string line;
00283               std::ostringstream ost;
00284               while (std::getline(ifs, line)) {
00285                   ost<<line<<std::endl;
00286               }
00287               kernel_source = ost.str();
00288           }*/
00289 
00294           template <typename SizeType>
00295           void get_max_block_size(const std::vector<std::vector<SizeType> >& inds, SizeType & max_size)
00296           {
00297               max_size = 0;
00298               for(vcl_size_t i = 0; i < inds.size(); ++i){
00299                   if(inds[i].size() > max_size){
00300                       max_size = static_cast<SizeType>(inds[i].size());
00301                   }
00302               }
00303           }
00304 
00311           template<typename MatrixType, typename VectorType, typename ScalarType>
00312           void custom_dot_prod(const MatrixType& A, const VectorType& v, unsigned int ind, ScalarType& res)
00313           {
00314               res = static_cast<ScalarType>(0);
00315               for(unsigned int j = ind; j < A.size1(); ++j){
00316                   if(j == ind){
00317                       res += v(j);
00318                   }else{
00319                       res += A(j, ind)*v(j);
00320                   }
00321               }
00322           }
00323 
00329           template<typename MatrixType, typename VectorType>
00330           void apply_q_trans_vec(const MatrixType& R, const VectorType& b_v, VectorType& y)
00331           {
00332               typedef typename MatrixType::value_type ScalarType;
00333               ScalarType inn_prod = static_cast<ScalarType>(0);
00334               for(vcl_size_t i = 0; i < R.size2(); ++i){
00335                   custom_dot_prod(R, y, static_cast<unsigned int>(i), inn_prod);
00336                   for(vcl_size_t j = i; j < R.size1(); ++j){
00337                       if(i == j){
00338                           y(j) -= b_v(i)*inn_prod;
00339                       }
00340                       else{
00341                           y(j) -= b_v(i)*inn_prod*R(j,i);
00342                       }
00343                   }
00344               }
00345           }
00346 
00352           template<typename MatrixType, typename VectorType>
00353           void apply_q_trans_mat(const MatrixType& R, const VectorType& b_v, MatrixType& A)
00354           {
00355               VectorType tmp_v;
00356               for(vcl_size_t i = 0; i < A.size2(); ++i){
00357                   tmp_v = (VectorType)column(A,i);
00358                   apply_q_trans_vec(R, b_v, tmp_v);
00359                   column(A,i) = tmp_v;
00360               }
00361           }
00362 
00363           //parallel QR for GPU
00373           template<typename ScalarType>
00374           void block_qr(std::vector<std::vector<unsigned int> >& g_I,
00375                         std::vector<std::vector<unsigned int> >& g_J,
00376                         block_matrix& g_A_I_J_vcl,
00377                         block_vector& g_bv_vcl,
00378                         std::vector<cl_uint>& g_is_update,
00379                         viennacl::context ctx)
00380           {
00381             viennacl::ocl::context & opencl_ctx = const_cast<viennacl::ocl::context &>(ctx.opencl_context());
00382 
00383             //typedef typename MatrixType::value_type ScalarType;
00384             unsigned int bv_size = 0;
00385             unsigned int v_size = 0;
00386             //set up arguments for GPU
00387             //find maximum size of rows/columns
00388             unsigned int local_r_n = 0;
00389             unsigned int local_c_n = 0;
00390             //find max size for blocks
00391             get_max_block_size(g_I, local_r_n);
00392             get_max_block_size(g_J, local_c_n);
00393             //get size
00394             get_size(g_J, bv_size);
00395             get_size(g_I, v_size);
00396             //get start indices
00397             std::vector<cl_uint> start_bv_inds(g_I.size() + 1, 0);
00398             std::vector<cl_uint> start_v_inds(g_I.size() + 1, 0);
00399             init_start_inds(g_J, start_bv_inds);
00400             init_start_inds(g_I, start_v_inds);
00401             //init arrays
00402             std::vector<ScalarType> b_v(bv_size, static_cast<ScalarType>(0));
00403             std::vector<ScalarType> v(v_size, static_cast<ScalarType>(0));
00404             //call qr program
00405             block_vector v_vcl;
00406 
00407             g_bv_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
00408                                                          static_cast<unsigned int>(sizeof(ScalarType)*bv_size),
00409                                                          &(b_v[0]));
00410 
00411             v_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
00412                                                       static_cast<unsigned int>(sizeof(ScalarType)*v_size),
00413                                                       &(v[0]));
00414             //the same as j_start_inds
00415             g_bv_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
00416                                                           static_cast<unsigned int>(sizeof(cl_uint)*g_I.size()),
00417                                                           &(start_bv_inds[0]));
00418 
00419             v_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
00420                                                        static_cast<unsigned int>(sizeof(cl_uint)*g_I.size()),
00421                                                        &(start_v_inds[0]));
00422             viennacl::ocl::handle<cl_mem> g_is_update_vcl = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
00423                                                                                      static_cast<unsigned int>(sizeof(cl_uint)*g_is_update.size()),
00424                                                                                      &(g_is_update[0]));
00425             //local memory
00426             //viennacl::ocl::enqueue(k(vcl_vec, size, viennacl::ocl::local_mem(sizeof(SCALARTYPE) * k.local_work_size()), temp));
00427             viennacl::linalg::opencl::kernels::spai<ScalarType>::init(opencl_ctx);
00428             viennacl::ocl::kernel& qr_kernel = opencl_ctx.get_kernel(viennacl::linalg::opencl::kernels::spai<ScalarType>::program_name(), "block_qr");
00429             qr_kernel.local_work_size(0, local_c_n);
00430             qr_kernel.global_work_size(0, local_c_n*256);
00431             viennacl::ocl::enqueue(qr_kernel(g_A_I_J_vcl.handle(), g_A_I_J_vcl.handle1(), g_bv_vcl.handle(),
00432                                             v_vcl.handle(), g_A_I_J_vcl.handle2(),
00433                                             g_bv_vcl.handle1(), v_vcl.handle1(), g_is_update_vcl,
00434                                             viennacl::ocl::local_mem(static_cast<unsigned int>(sizeof(ScalarType)*(local_r_n*local_c_n))),
00435                                             static_cast<cl_uint>(g_I.size())));
00436 
00437           }
00438         }
00439       }
00440     }
00441 }
00442 #endif