ViennaCL - The Vienna Computing Library  1.5.0
viennacl/linalg/detail/spai/fspai.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP
00002 #define VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2013, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00021 #include <utility>
00022 #include <iostream>
00023 #include <fstream>
00024 #include <string>
00025 #include <algorithm>
00026 #include <vector>
00027 #include <math.h>
00028 #include <map>
00029 
00030 //boost includes
00031 #include "boost/numeric/ublas/vector.hpp"
00032 #include "boost/numeric/ublas/matrix.hpp"
00033 #include "boost/numeric/ublas/matrix_proxy.hpp"
00034 #include "boost/numeric/ublas/vector_proxy.hpp"
00035 #include "boost/numeric/ublas/storage.hpp"
00036 #include "boost/numeric/ublas/io.hpp"
00037 #include "boost/numeric/ublas/lu.hpp"
00038 #include "boost/numeric/ublas/triangular.hpp"
00039 #include "boost/numeric/ublas/matrix_expression.hpp"
00040 
00041 // ViennaCL includes
00042 #include "viennacl/linalg/prod.hpp"
00043 #include "viennacl/matrix.hpp"
00044 #include "viennacl/compressed_matrix.hpp"
00045 #include "viennacl/linalg/sparse_matrix_operations.hpp"
00046 #include "viennacl/linalg/matrix_operations.hpp"
00047 #include "viennacl/scalar.hpp"
00048 #include "viennacl/linalg/cg.hpp"
00049 #include "viennacl/linalg/inner_prod.hpp"
00050 #include "viennacl/linalg/ilu.hpp"
00051 //#include <omp.h>
00052 
00057 namespace viennacl
00058 {
00059     namespace linalg
00060     {
00061       namespace detail
00062       {
00063         namespace spai
00064         {
00065 
00070           class fspai_tag{
00077           public:
00078               fspai_tag(
00079                       double residual_norm_threshold = 1e-3,
00080                       unsigned int iteration_limit = 5,
00081                       bool is_static = false,
00082                       bool is_right = false) :
00083               residual_norm_threshold_(residual_norm_threshold),
00084               iteration_limit_(iteration_limit),
00085               is_static_(is_static),
00086               is_right_(is_right) {}
00087 
00088               inline double getResidualNormThreshold() const
00089               { return residual_norm_threshold_; }
00090               inline unsigned long getIterationLimit () const
00091               { return iteration_limit_; }
00092               inline bool getIsStatic() const
00093               { return is_static_; }
00094               inline bool getIsRight() const
00095               { return is_right_; }
00096               inline void setResidualNormThreshold(double residual_norm_threshold){
00097                   if(residual_norm_threshold > 0)
00098                       residual_norm_threshold_ = residual_norm_threshold;
00099               }
00100               inline void setIterationLimit(unsigned long iteration_limit){
00101                   if(iteration_limit > 0)
00102                       iteration_limit_ = iteration_limit;
00103               }
00104               inline void setIsRight(bool is_right){
00105                   is_right_ = is_right;
00106               }
00107               inline void setIsStatic(bool is_static){
00108                   is_static_ = is_static;
00109               }
00110 
00111           private:
00112               double residual_norm_threshold_;
00113               unsigned long iteration_limit_;
00114               bool is_static_;
00115               bool is_right_;
00116           };
00117 
00118 
00119           //
00120           // Helper: Store A in an STL container of type, exploiting symmetry
00121           // Reason: ublas interface does not allow to iterate over nonzeros of a particular row without starting an iterator1 from the very beginning of the matrix...
00122           //
00123           template <typename MatrixType, typename ScalarType>
00124           void sym_sparse_matrix_to_stl(MatrixType const & A, std::vector<std::map<unsigned int, ScalarType> > & STL_A)
00125           {
00126             STL_A.resize(A.size1());
00127             for (typename MatrixType::const_iterator1 row_it  = A.begin1();
00128                                                       row_it != A.end1();
00129                                                     ++row_it)
00130             {
00131               for (typename MatrixType::const_iterator2 col_it  = row_it.begin();
00132                                                         col_it != row_it.end();
00133                                                       ++col_it)
00134               {
00135                 if (col_it.index1() >= col_it.index2())
00136                   STL_A[col_it.index1()][static_cast<unsigned int>(col_it.index2())] = *col_it;
00137                 else
00138                   break; //go to next row
00139               }
00140             }
00141           }
00142 
00143 
00144           //
00145           // Generate index sets J_k, k=0,...,N-1
00146           //
00147           template <typename MatrixType>
00148           void generateJ(MatrixType const & A, std::vector<std::vector<vcl_size_t> > & J)
00149           {
00150             for (typename MatrixType::const_iterator1 row_it  = A.begin1();
00151                                                       row_it != A.end1();
00152                                                     ++row_it)
00153             {
00154               for (typename MatrixType::const_iterator2 col_it  = row_it.begin();
00155                                                         col_it != row_it.end();
00156                                                       ++col_it)
00157               {
00158                 if (col_it.index1() > col_it.index2()) //Matrix is symmetric, thus only work on lower triangular part
00159                 {
00160                   J[col_it.index2()].push_back(col_it.index1());
00161                   J[col_it.index1()].push_back(col_it.index2());
00162                 }
00163                 else
00164                   break; //go to next row
00165               }
00166             }
00167           }
00168 
00169 
00170           //
00171           // Extracts the blocks A(\tilde{J}_k, \tilde{J}_k) from A
00172           // Sets up y_k = A(\tilde{J}_k, k) for the inplace-solution after Cholesky-factoriation
00173           //
00174           template <typename ScalarType, typename MatrixType, typename VectorType>
00175           void fill_blocks(std::vector< std::map<unsigned int, ScalarType> > & A,
00176                           std::vector<MatrixType> & blocks,
00177                           std::vector<std::vector<vcl_size_t> > const & J,
00178                           std::vector<VectorType> & Y)
00179           {
00180             for (vcl_size_t k=0; k<A.size(); ++k)
00181             {
00182               std::vector<vcl_size_t> const & Jk = J[k];
00183               VectorType & yk = Y[k];
00184               MatrixType & block_k = blocks[k];
00185 
00186               yk.resize(Jk.size());
00187               block_k.resize(Jk.size(), Jk.size());
00188               block_k.clear();
00189 
00190               for (vcl_size_t i=0; i<Jk.size(); ++i)
00191               {
00192                 vcl_size_t row_index = Jk[i];
00193                 std::map<unsigned int, ScalarType> & A_row = A[row_index];
00194 
00195                 //fill y_k:
00196                 yk[i] = A_row[static_cast<unsigned int>(k)];
00197 
00198                 for (vcl_size_t j=0; j<Jk.size(); ++j)
00199                 {
00200                   vcl_size_t col_index = Jk[j];
00201                   if (col_index <= row_index && A_row.find(static_cast<unsigned int>(col_index)) != A_row.end()) //block is symmetric, thus store only lower triangular part
00202                     block_k(i, j) = A_row[static_cast<unsigned int>(col_index)];
00203                 }
00204               }
00205             }
00206           }
00207 
00208 
00209           //
00210           // Perform Cholesky factorization of A inplace. Cf. Schwarz: Numerische Mathematik, vol 5, p. 58
00211           //
00212           template <typename MatrixType>
00213           void cholesky_decompose(MatrixType & A)
00214           {
00215             for (vcl_size_t k=0; k<A.size2(); ++k)
00216             {
00217               if (A(k,k) <= 0)
00218               {
00219                 std::cout << "k: " << k << std::endl;
00220                 std::cout << "A(k,k): " << A(k,k) << std::endl;
00221               }
00222 
00223               assert(A(k,k) > 0);
00224 
00225               A(k,k) = std::sqrt(A(k,k));
00226 
00227               for (vcl_size_t i=k+1; i<A.size1(); ++i)
00228               {
00229                 A(i,k) /= A(k,k);
00230                 for (vcl_size_t j=k+1; j<=i; ++j)
00231                   A(i,j) -= A(i,k) * A(j,k);
00232               }
00233             }
00234           }
00235 
00236 
00237           //
00238           // Compute x in Ax = b, where A is already Cholesky factored (A = L L^T)
00239           //
00240           template <typename MatrixType, typename VectorType>
00241           void cholesky_solve(MatrixType const & L, VectorType & b)
00242           {
00243             // inplace forward solve L x = b
00244             for (vcl_size_t i=0; i<L.size1(); ++i)
00245             {
00246               for (vcl_size_t j=0; j<i; ++j)
00247                 b[i] -= L(i,j) * b[j];
00248               b[i] /= L(i,i);
00249             }
00250 
00251             // inplace backward solve L^T x = b:
00252             for (vcl_size_t i=L.size1()-1; ; --i)
00253             {
00254               for (vcl_size_t k=i+1; k<L.size1(); ++k)
00255                 b[i] -= L(k,i) * b[k];
00256               b[i] /= L(i,i);
00257 
00258               if (i==0) //vcl_size_t might be unsigned, therefore manual check for equality with zero here
00259                 break;
00260             }
00261           }
00262 
00263 
00264 
00265           //
00266           // Compute the Cholesky factor L from the sparse vectors y_k
00267           //
00268           template <typename MatrixType, typename VectorType1>
00269           void computeL(MatrixType const & A,
00270                         MatrixType & L,
00271                         MatrixType & L_trans,
00272                         std::vector<VectorType1> & Y,
00273                         std::vector<std::vector<vcl_size_t> > & J)
00274           {
00275             typedef typename VectorType1::value_type    ScalarType;
00276             typedef std::vector<std::map<unsigned int, ScalarType> >     STLSparseMatrixType;
00277 
00278             STLSparseMatrixType L_temp(A.size1());
00279 
00280             for (vcl_size_t k=0; k<A.size1(); ++k)
00281             {
00282               std::vector<vcl_size_t> const & Jk = J[k];
00283               VectorType1 const & yk = Y[k];
00284 
00285               //compute L(k,k):
00286               ScalarType Lkk = A(k,k);
00287               for (vcl_size_t i=0; i<Jk.size(); ++i)
00288                 Lkk -= A(Jk[i],k) * yk[i];
00289 
00290               Lkk = ScalarType(1) / std::sqrt(Lkk);
00291               L_temp[k][static_cast<unsigned int>(k)] = Lkk;
00292               L_trans(k,k) = Lkk;
00293 
00294               //write lower diagonal entries:
00295               for (vcl_size_t i=0; i<Jk.size(); ++i)
00296               {
00297                 L_temp[Jk[i]][static_cast<unsigned int>(k)] = -Lkk * yk[i];
00298                 L_trans(k, Jk[i]) = -Lkk * yk[i];
00299               }
00300             } //for k
00301 
00302 
00303             //build L from L_temp
00304             for (vcl_size_t i=0; i<L_temp.size(); ++i)
00305               for (typename std::map<unsigned int, ScalarType>::const_iterator it = L_temp[i].begin();
00306                   it != L_temp[i].end();
00307                 ++it)
00308                   L(i, it->first) = it->second;
00309           }
00310 
00311 
00312           //
00313           // Top level FSPAI function
00314           //
00315           template <typename MatrixType>
00316           void computeFSPAI(MatrixType const & A,
00317                             MatrixType const & PatternA,
00318                             MatrixType & L,
00319                             MatrixType & L_trans,
00320                             fspai_tag)
00321           {
00322             typedef typename MatrixType::value_type              ScalarType;
00323             typedef boost::numeric::ublas::matrix<ScalarType>    DenseMatrixType;
00324             typedef std::vector<std::map<unsigned int, ScalarType> >     SparseMatrixType;
00325 
00326             //
00327             // preprocessing: Store A in a STL container:
00328             //
00329             //std::cout << "Transferring to STL container:" << std::endl;
00330             std::vector<std::vector<ScalarType> >    y_k(A.size1());
00331             SparseMatrixType   STL_A(A.size1());
00332             sym_sparse_matrix_to_stl(A, STL_A);
00333 
00334 
00335             //
00336             // Step 1: Generate pattern indices
00337             //
00338             //std::cout << "computeFSPAI(): Generating pattern..." << std::endl;
00339             std::vector<std::vector<vcl_size_t> > J(A.size1());
00340             generateJ(PatternA, J);
00341 
00342             //
00343             // Step 2: Set up matrix blocks
00344             //
00345             //std::cout << "computeFSPAI(): Setting up matrix blocks..." << std::endl;
00346             std::vector<DenseMatrixType>  subblocks_A(A.size1());
00347             fill_blocks(STL_A, subblocks_A, J, y_k);
00348             STL_A.clear(); //not needed anymore
00349 
00350             //
00351             // Step 3: Cholesky-factor blocks
00352             //
00353             //std::cout << "computeFSPAI(): Cholesky-factorization..." << std::endl;
00354             for (vcl_size_t i=0; i<subblocks_A.size(); ++i)
00355             {
00356               //std::cout << "Block before: " << subblocks_A[i] << std::endl;
00357               cholesky_decompose(subblocks_A[i]);
00358               //std::cout << "Block after: " << subblocks_A[i] << std::endl;
00359             }
00360 
00361 
00362             /*vcl_size_t num_bytes = 0;
00363             for (vcl_size_t i=0; i<subblocks_A.size(); ++i)
00364               num_bytes += 8*subblocks_A[i].size1()*subblocks_A[i].size2();*/
00365             //std::cout << "Memory for FSPAI matrix: " << num_bytes / (1024.0 * 1024.0) << " MB" << std::endl;
00366 
00367             //
00368             // Step 4: Solve for y_k
00369             //
00370             //std::cout << "computeFSPAI(): Cholesky-solve..." << std::endl;
00371             for (vcl_size_t i=0; i<y_k.size(); ++i)
00372             {
00373               if (subblocks_A[i].size1() > 0) //block might be empty...
00374               {
00375                 //y_k[i].resize(subblocks_A[i].size1());
00376                 //std::cout << "y_k[" << i << "]: ";
00377                 //for (vcl_size_t j=0; j<y_k[i].size(); ++j)
00378                 //  std::cout << y_k[i][j] << " ";
00379                 //std::cout << std::endl;
00380                 cholesky_solve(subblocks_A[i], y_k[i]);
00381               }
00382             }
00383 
00384 
00385             //
00386             // Step 5: Set up Cholesky factors L and L_trans
00387             //
00388             //std::cout << "computeFSPAI(): Computing L..." << std::endl;
00389             L.resize(A.size1(), A.size2(), false);
00390             L.reserve(A.nnz(), false);
00391             L_trans.resize(A.size1(), A.size2(), false);
00392             L_trans.reserve(A.nnz(), false);
00393             computeL(A, L, L_trans, y_k, J);
00394 
00395             //std::cout << "L: " << L << std::endl;
00396           }
00397 
00398 
00399 
00400         }
00401       }
00402     }
00403 }
00404 
00405 #endif