ViennaCL - The Vienna Computing Library
1.5.0
|
00001 #ifndef VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_ 00002 #define VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2013, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include <vector> 00026 #include <cmath> 00027 #include "viennacl/forwards.h" 00028 #include "viennacl/tools/tools.hpp" 00029 #include "viennacl/linalg/detail/ilu/common.hpp" 00030 #include "viennacl/linalg/detail/ilu/ilu0.hpp" 00031 #include "viennacl/linalg/detail/ilu/ilut.hpp" 00032 00033 #include <map> 00034 00035 namespace viennacl 00036 { 00037 namespace linalg 00038 { 00039 namespace detail 00040 { 00042 template <typename VectorType, typename ValueType, typename SizeType = vcl_size_t> 00043 class ilu_vector_range 00044 { 00045 public: 00046 //typedef typename VectorType::value_type value_type; 00047 //typedef typename VectorType::size_type size_type; 00048 00049 ilu_vector_range(VectorType & v, 00050 SizeType start_index, 00051 SizeType vec_size 00052 ) : vec_(v), start_(start_index), size_(vec_size) {} 00053 00054 ValueType & operator()(SizeType index) 00055 { 00056 assert(index < size_ && bool("Index out of bounds!")); 00057 return vec_[start_ + index]; 00058 } 00059 00060 ValueType & operator[](SizeType index) 00061 { 00062 assert(index < size_ && bool("Index out of bounds!")); 00063 return vec_[start_ + index]; 00064 } 00065 00066 SizeType size() const { return size_; } 00067 00068 private: 00069 VectorType & vec_; 00070 SizeType start_; 00071 SizeType size_; 00072 }; 00073 00081 template <typename ScalarType> 00082 void extract_block_matrix(viennacl::compressed_matrix<ScalarType> const & A, 00083 viennacl::compressed_matrix<ScalarType> & diagonal_block_A, 00084 vcl_size_t start_index, 00085 vcl_size_t stop_index 00086 ) 00087 { 00088 00089 assert( (A.handle1().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") ); 00090 assert( (A.handle2().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") ); 00091 assert( (A.handle().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") ); 00092 00093 ScalarType const * A_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(A.handle()); 00094 unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1()); 00095 unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2()); 00096 00097 ScalarType * output_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(diagonal_block_A.handle()); 00098 unsigned int * output_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle1()); 00099 unsigned int * output_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle2()); 00100 00101 vcl_size_t output_counter = 0; 00102 for (vcl_size_t row = start_index; row < stop_index; ++row) 00103 { 00104 unsigned int buffer_col_start = A_row_buffer[row]; 00105 unsigned int buffer_col_end = A_row_buffer[row+1]; 00106 00107 output_row_buffer[row - start_index] = static_cast<unsigned int>(output_counter); 00108 00109 for (unsigned int buf_index = buffer_col_start; buf_index < buffer_col_end; ++buf_index) 00110 { 00111 unsigned int col = A_col_buffer[buf_index]; 00112 if (col < start_index) 00113 continue; 00114 00115 if (col >= static_cast<unsigned int>(stop_index)) 00116 continue; 00117 00118 output_col_buffer[output_counter] = static_cast<unsigned int>(col - start_index); 00119 output_elements[output_counter] = A_elements[buf_index]; 00120 ++output_counter; 00121 } 00122 output_row_buffer[row - start_index + 1] = static_cast<unsigned int>(output_counter); 00123 } 00124 } 00125 00126 00127 } 00128 00134 template <typename MatrixType, typename ILUTag> 00135 class block_ilu_precond 00136 { 00137 typedef typename MatrixType::value_type ScalarType; 00138 00139 public: 00140 typedef std::vector<std::pair<vcl_size_t, vcl_size_t> > index_vector_type; //the pair refers to index range [a, b) of each block 00141 00142 00143 block_ilu_precond(MatrixType const & mat, 00144 ILUTag const & tag, 00145 vcl_size_t num_blocks = 8 00146 ) : tag_(tag), LU_blocks(num_blocks) 00147 { 00148 00149 // Set up vector of block indices: 00150 block_indices_.resize(num_blocks); 00151 for (vcl_size_t i=0; i<num_blocks; ++i) 00152 { 00153 vcl_size_t start_index = ( i * mat.size1()) / num_blocks; 00154 vcl_size_t stop_index = ((i+1) * mat.size1()) / num_blocks; 00155 00156 block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index); 00157 } 00158 00159 //initialize preconditioner: 00160 //std::cout << "Start CPU precond" << std::endl; 00161 init(mat); 00162 //std::cout << "End CPU precond" << std::endl; 00163 } 00164 00165 block_ilu_precond(MatrixType const & mat, 00166 ILUTag const & tag, 00167 index_vector_type const & block_boundaries 00168 ) : tag_(tag), block_indices_(block_boundaries), LU_blocks(block_boundaries.size()) 00169 { 00170 //initialize preconditioner: 00171 //std::cout << "Start CPU precond" << std::endl; 00172 init(mat); 00173 //std::cout << "End CPU precond" << std::endl; 00174 } 00175 00176 00177 template <typename VectorType> 00178 void apply(VectorType & vec) const 00179 { 00180 for (vcl_size_t i=0; i<block_indices_.size(); ++i) 00181 { 00182 detail::ilu_vector_range<VectorType, ScalarType> vec_range(vec, block_indices_[i].first, LU_blocks[i].size2()); 00183 00184 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle1()); 00185 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle2()); 00186 ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(LU_blocks[i].handle()); 00187 00188 viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), unit_lower_tag()); 00189 viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), upper_tag()); 00190 00191 } 00192 } 00193 00194 private: 00195 void init(MatrixType const & A) 00196 { 00197 viennacl::context host_context(viennacl::MAIN_MEMORY); 00198 viennacl::compressed_matrix<ScalarType> mat(host_context); 00199 00200 viennacl::copy(A, mat); 00201 00202 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1()); 00203 00204 #ifdef VIENNACL_WITH_OPENMP 00205 #pragma omp parallel for 00206 #endif 00207 for (long i=0; i<static_cast<long>(block_indices_.size()); ++i) 00208 { 00209 // Step 1: Extract blocks 00210 vcl_size_t block_size = block_indices_[i].second - block_indices_[i].first; 00211 vcl_size_t block_nnz = row_buffer[block_indices_[i].second] - row_buffer[block_indices_[i].first]; 00212 viennacl::compressed_matrix<ScalarType> mat_block(block_size, block_size, block_nnz, host_context); 00213 00214 detail::extract_block_matrix(mat, mat_block, block_indices_[i].first, block_indices_[i].second); 00215 00216 // Step 2: Precondition blocks: 00217 viennacl::switch_memory_context(LU_blocks[i], host_context); 00218 preconditioner_dispatch(mat_block, LU_blocks[i], tag_); 00219 } 00220 00221 } 00222 00223 void preconditioner_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block, 00224 viennacl::compressed_matrix<ScalarType> & LU, 00225 viennacl::linalg::ilu0_tag) 00226 { 00227 LU = mat_block; 00228 viennacl::linalg::precondition(LU, tag_); 00229 } 00230 00231 void preconditioner_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block, 00232 viennacl::compressed_matrix<ScalarType> & LU, 00233 viennacl::linalg::ilut_tag) 00234 { 00235 std::vector< std::map<unsigned int, ScalarType> > temp(mat_block.size1()); 00236 00237 viennacl::linalg::precondition(mat_block, temp, tag_); 00238 00239 viennacl::copy(temp, LU); 00240 } 00241 00242 ILUTag const & tag_; 00243 index_vector_type block_indices_; 00244 std::vector< viennacl::compressed_matrix<ScalarType> > LU_blocks; 00245 }; 00246 00247 00248 00249 00250 00255 template <typename ScalarType, unsigned int MAT_ALIGNMENT, typename ILUTag> 00256 class block_ilu_precond< compressed_matrix<ScalarType, MAT_ALIGNMENT>, ILUTag > 00257 { 00258 typedef compressed_matrix<ScalarType, MAT_ALIGNMENT> MatrixType; 00259 //typedef std::vector<ScalarType> STLVectorType; 00260 00261 public: 00262 typedef std::vector<std::pair<vcl_size_t, vcl_size_t> > index_vector_type; //the pair refers to index range [a, b) of each block 00263 00264 00265 block_ilu_precond(MatrixType const & mat, 00266 ILUTag const & tag, 00267 vcl_size_t num_blocks = 8 00268 ) : tag_(tag), 00269 block_indices_(num_blocks), 00270 gpu_block_indices(), 00271 gpu_L_trans(0,0, viennacl::traits::context(mat)), 00272 gpu_U_trans(0,0, viennacl::traits::context(mat)), 00273 gpu_D(mat.size1(), viennacl::traits::context(mat)), 00274 LU_blocks(num_blocks) 00275 { 00276 // Set up vector of block indices: 00277 block_indices_.resize(num_blocks); 00278 for (vcl_size_t i=0; i<num_blocks; ++i) 00279 { 00280 vcl_size_t start_index = ( i * mat.size1()) / num_blocks; 00281 vcl_size_t stop_index = ((i+1) * mat.size1()) / num_blocks; 00282 00283 block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index); 00284 } 00285 00286 //initialize preconditioner: 00287 //std::cout << "Start CPU precond" << std::endl; 00288 init(mat); 00289 //std::cout << "End CPU precond" << std::endl; 00290 } 00291 00292 block_ilu_precond(MatrixType const & mat, 00293 ILUTag const & tag, 00294 index_vector_type const & block_boundaries 00295 ) : tag_(tag), 00296 block_indices_(block_boundaries), 00297 gpu_block_indices(viennacl::traits::context(mat)), 00298 gpu_L_trans(0,0,viennacl::traits::context(mat)), 00299 gpu_U_trans(0,0,viennacl::traits::context(mat)), 00300 gpu_D(0,viennacl::traits::context(mat)), 00301 LU_blocks(block_boundaries.size()) 00302 { 00303 //initialize preconditioner: 00304 //std::cout << "Start CPU precond" << std::endl; 00305 init(mat); 00306 //std::cout << "End CPU precond" << std::endl; 00307 } 00308 00309 00310 void apply(vector<ScalarType> & vec) const 00311 { 00312 viennacl::linalg::detail::block_inplace_solve(trans(gpu_L_trans), gpu_block_indices, block_indices_.size(), gpu_D, 00313 vec, 00314 viennacl::linalg::unit_lower_tag()); 00315 00316 viennacl::linalg::detail::block_inplace_solve(trans(gpu_U_trans), gpu_block_indices, block_indices_.size(), gpu_D, 00317 vec, 00318 viennacl::linalg::upper_tag()); 00319 00320 //apply_cpu(vec); 00321 } 00322 00323 00324 private: 00325 00326 void init(MatrixType const & A) 00327 { 00328 viennacl::context host_context(viennacl::MAIN_MEMORY); 00329 viennacl::compressed_matrix<ScalarType> mat(host_context); 00330 00331 mat = A; 00332 00333 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1()); 00334 00335 #ifdef VIENNACL_WITH_OPENMP 00336 #pragma omp parallel for 00337 #endif 00338 for (long i=0; i<static_cast<long>(block_indices_.size()); ++i) 00339 { 00340 // Step 1: Extract blocks 00341 vcl_size_t block_size = block_indices_[i].second - block_indices_[i].first; 00342 vcl_size_t block_nnz = row_buffer[block_indices_[i].second] - row_buffer[block_indices_[i].first]; 00343 viennacl::compressed_matrix<ScalarType> mat_block(block_size, block_size, block_nnz, host_context); 00344 00345 detail::extract_block_matrix(mat, mat_block, block_indices_[i].first, block_indices_[i].second); 00346 00347 // Step 2: Precondition blocks: 00348 viennacl::switch_memory_context(LU_blocks[i], host_context); 00349 preconditioner_dispatch(mat_block, LU_blocks[i], tag_); 00350 } 00351 00352 /* 00353 * copy resulting preconditioner back to GPU: 00354 */ 00355 00356 viennacl::switch_memory_context(gpu_L_trans, viennacl::traits::context(A)); 00357 viennacl::switch_memory_context(gpu_U_trans, viennacl::traits::context(A)); 00358 viennacl::switch_memory_context(gpu_D, viennacl::traits::context(A)); 00359 00360 viennacl::backend::typesafe_host_array<unsigned int> block_indices_uint(gpu_block_indices, 2 * block_indices_.size()); 00361 for (vcl_size_t i=0; i<block_indices_.size(); ++i) 00362 { 00363 block_indices_uint.set(2*i, block_indices_[i].first); 00364 block_indices_uint.set(2*i + 1, block_indices_[i].second); 00365 } 00366 00367 viennacl::backend::memory_create(gpu_block_indices, block_indices_uint.raw_size(), viennacl::traits::context(A), block_indices_uint.get()); 00368 00369 blocks_to_device(mat.size1()); 00370 00371 } 00372 00373 // Copy computed preconditioned blocks to OpenCL device 00374 void blocks_to_device(vcl_size_t matrix_size) 00375 { 00376 std::vector< std::map<unsigned int, ScalarType> > L_transposed(matrix_size); 00377 std::vector< std::map<unsigned int, ScalarType> > U_transposed(matrix_size); 00378 std::vector<ScalarType> entries_D(matrix_size); 00379 00380 // 00381 // Transpose individual blocks into a single large matrix: 00382 // 00383 for (vcl_size_t block_index = 0; block_index < LU_blocks.size(); ++block_index) 00384 { 00385 MatrixType const & current_block = LU_blocks[block_index]; 00386 00387 unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(current_block.handle1()); 00388 unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(current_block.handle2()); 00389 ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(current_block.handle()); 00390 00391 vcl_size_t block_start = block_indices_[block_index].first; 00392 00393 //transpose L and U: 00394 for (vcl_size_t row = 0; row < current_block.size1(); ++row) 00395 { 00396 unsigned int buffer_col_start = row_buffer[row]; 00397 unsigned int buffer_col_end = row_buffer[row+1]; 00398 00399 for (unsigned int buf_index = buffer_col_start; buf_index < buffer_col_end; ++buf_index) 00400 { 00401 unsigned int col = col_buffer[buf_index]; 00402 00403 if (row > col) //entry for L 00404 L_transposed[col + block_start][static_cast<unsigned int>(row + block_start)] = elements[buf_index]; 00405 else if (row == col) 00406 entries_D[row + block_start] = elements[buf_index]; 00407 else //entry for U 00408 U_transposed[col + block_start][static_cast<unsigned int>(row + block_start)] = elements[buf_index]; 00409 } 00410 } 00411 } 00412 00413 // 00414 // Move data to GPU: 00415 // 00416 tools::const_sparse_matrix_adapter<ScalarType, unsigned int> adapted_L_transposed(L_transposed, matrix_size, matrix_size); 00417 tools::const_sparse_matrix_adapter<ScalarType, unsigned int> adapted_U_transposed(U_transposed, matrix_size, matrix_size); 00418 viennacl::copy(adapted_L_transposed, gpu_L_trans); 00419 viennacl::copy(adapted_U_transposed, gpu_U_trans); 00420 viennacl::copy(entries_D, gpu_D); 00421 } 00422 00423 void preconditioner_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block, 00424 viennacl::compressed_matrix<ScalarType> & LU, 00425 viennacl::linalg::ilu0_tag) 00426 { 00427 LU = mat_block; 00428 viennacl::linalg::precondition(LU, tag_); 00429 } 00430 00431 void preconditioner_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block, 00432 viennacl::compressed_matrix<ScalarType> & LU, 00433 viennacl::linalg::ilut_tag) 00434 { 00435 std::vector< std::map<unsigned int, ScalarType> > temp(mat_block.size1()); 00436 00437 viennacl::linalg::precondition(mat_block, temp, tag_); 00438 00439 viennacl::copy(temp, LU); 00440 } 00441 00442 00443 ILUTag const & tag_; 00444 index_vector_type block_indices_; 00445 viennacl::backend::mem_handle gpu_block_indices; 00446 viennacl::compressed_matrix<ScalarType> gpu_L_trans; 00447 viennacl::compressed_matrix<ScalarType> gpu_U_trans; 00448 viennacl::vector<ScalarType> gpu_D; 00449 00450 std::vector< MatrixType > LU_blocks; 00451 }; 00452 00453 00454 } 00455 } 00456 00457 00458 00459 00460 #endif 00461 00462 00463