ViennaCL - The Vienna Computing Library
1.5.0
|
00001 #ifndef VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_ 00002 #define VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2013, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include <cmath> 00026 #include <algorithm> //for std::max and std::min 00027 00028 #include "viennacl/forwards.h" 00029 #include "viennacl/scalar.hpp" 00030 #include "viennacl/tools/tools.hpp" 00031 #include "viennacl/meta/predicate.hpp" 00032 #include "viennacl/meta/enable_if.hpp" 00033 #include "viennacl/traits/size.hpp" 00034 #include "viennacl/traits/start.hpp" 00035 #include "viennacl/linalg/host_based/common.hpp" 00036 #include "viennacl/linalg/detail/op_applier.hpp" 00037 #include "viennacl/traits/stride.hpp" 00038 00039 00040 // Minimum vector size for using OpenMP on vector operations: 00041 #ifndef VIENNACL_OPENMP_VECTOR_MIN_SIZE 00042 #define VIENNACL_OPENMP_VECTOR_MIN_SIZE 5000 00043 #endif 00044 00045 namespace viennacl 00046 { 00047 namespace linalg 00048 { 00049 namespace host_based 00050 { 00051 namespace detail 00052 { 00053 template <typename NumericT> 00054 NumericT flip_sign(NumericT val) { return -val; } 00055 inline unsigned long flip_sign(unsigned long val) { return val; } 00056 inline unsigned int flip_sign(unsigned int val) { return val; } 00057 inline unsigned short flip_sign(unsigned short val) { return val; } 00058 inline unsigned char flip_sign(unsigned char val) { return val; } 00059 } 00060 00061 // 00062 // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here! 00063 // 00064 00065 template <typename T, typename ScalarType1> 00066 void av(vector_base<T> & vec1, 00067 vector_base<T> const & vec2, ScalarType1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha) 00068 { 00069 typedef T value_type; 00070 00071 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1); 00072 value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2); 00073 00074 value_type data_alpha = alpha; 00075 if (flip_sign_alpha) 00076 data_alpha = detail::flip_sign(data_alpha); 00077 00078 vcl_size_t start1 = viennacl::traits::start(vec1); 00079 vcl_size_t inc1 = viennacl::traits::stride(vec1); 00080 vcl_size_t size1 = viennacl::traits::size(vec1); 00081 00082 vcl_size_t start2 = viennacl::traits::start(vec2); 00083 vcl_size_t inc2 = viennacl::traits::stride(vec2); 00084 00085 if (reciprocal_alpha) 00086 { 00087 #ifdef VIENNACL_WITH_OPENMP 00088 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00089 #endif 00090 for (long i = 0; i < static_cast<long>(size1); ++i) 00091 data_vec1[i*inc1+start1] = data_vec2[i*inc2+start2] / data_alpha; 00092 } 00093 else 00094 { 00095 #ifdef VIENNACL_WITH_OPENMP 00096 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00097 #endif 00098 for (long i = 0; i < static_cast<long>(size1); ++i) 00099 data_vec1[i*inc1+start1] = data_vec2[i*inc2+start2] * data_alpha; 00100 } 00101 } 00102 00103 00104 template <typename T, typename ScalarType1, typename ScalarType2> 00105 void avbv(vector_base<T> & vec1, 00106 vector_base<T> const & vec2, ScalarType1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha, 00107 vector_base<T> const & vec3, ScalarType2 const & beta, vcl_size_t /* len_beta */, bool reciprocal_beta, bool flip_sign_beta) 00108 { 00109 typedef T value_type; 00110 00111 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1); 00112 value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2); 00113 value_type const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3); 00114 00115 value_type data_alpha = alpha; 00116 if (flip_sign_alpha) 00117 data_alpha = detail::flip_sign(data_alpha); 00118 00119 value_type data_beta = beta; 00120 if (flip_sign_beta) 00121 data_beta = detail::flip_sign(data_beta); 00122 00123 vcl_size_t start1 = viennacl::traits::start(vec1); 00124 vcl_size_t inc1 = viennacl::traits::stride(vec1); 00125 vcl_size_t size1 = viennacl::traits::size(vec1); 00126 00127 vcl_size_t start2 = viennacl::traits::start(vec2); 00128 vcl_size_t inc2 = viennacl::traits::stride(vec2); 00129 00130 vcl_size_t start3 = viennacl::traits::start(vec3); 00131 vcl_size_t inc3 = viennacl::traits::stride(vec3); 00132 00133 if (reciprocal_alpha) 00134 { 00135 if (reciprocal_beta) 00136 { 00137 #ifdef VIENNACL_WITH_OPENMP 00138 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00139 #endif 00140 for (long i = 0; i < static_cast<long>(size1); ++i) 00141 data_vec1[i*inc1+start1] = data_vec2[i*inc2+start2] / data_alpha + data_vec3[i*inc3+start3] / data_beta; 00142 } 00143 else 00144 { 00145 #ifdef VIENNACL_WITH_OPENMP 00146 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00147 #endif 00148 for (long i = 0; i < static_cast<long>(size1); ++i) 00149 data_vec1[i*inc1+start1] = data_vec2[i*inc2+start2] / data_alpha + data_vec3[i*inc3+start3] * data_beta; 00150 } 00151 } 00152 else 00153 { 00154 if (reciprocal_beta) 00155 { 00156 #ifdef VIENNACL_WITH_OPENMP 00157 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00158 #endif 00159 for (long i = 0; i < static_cast<long>(size1); ++i) 00160 data_vec1[i*inc1+start1] = data_vec2[i*inc2+start2] * data_alpha + data_vec3[i*inc3+start3] / data_beta; 00161 } 00162 else 00163 { 00164 #ifdef VIENNACL_WITH_OPENMP 00165 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00166 #endif 00167 for (long i = 0; i < static_cast<long>(size1); ++i) 00168 data_vec1[i*inc1+start1] = data_vec2[i*inc2+start2] * data_alpha + data_vec3[i*inc3+start3] * data_beta; 00169 } 00170 } 00171 } 00172 00173 00174 template <typename T, typename ScalarType1, typename ScalarType2> 00175 void avbv_v(vector_base<T> & vec1, 00176 vector_base<T> const & vec2, ScalarType1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha, 00177 vector_base<T> const & vec3, ScalarType2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta) 00178 { 00179 typedef T value_type; 00180 00181 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1); 00182 value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2); 00183 value_type const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3); 00184 00185 value_type data_alpha = alpha; 00186 if (flip_sign_alpha) 00187 data_alpha = detail::flip_sign(data_alpha); 00188 00189 value_type data_beta = beta; 00190 if (flip_sign_beta) 00191 data_beta = detail::flip_sign(data_beta); 00192 00193 vcl_size_t start1 = viennacl::traits::start(vec1); 00194 vcl_size_t inc1 = viennacl::traits::stride(vec1); 00195 vcl_size_t size1 = viennacl::traits::size(vec1); 00196 00197 vcl_size_t start2 = viennacl::traits::start(vec2); 00198 vcl_size_t inc2 = viennacl::traits::stride(vec2); 00199 00200 vcl_size_t start3 = viennacl::traits::start(vec3); 00201 vcl_size_t inc3 = viennacl::traits::stride(vec3); 00202 00203 if (reciprocal_alpha) 00204 { 00205 if (reciprocal_beta) 00206 { 00207 #ifdef VIENNACL_WITH_OPENMP 00208 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00209 #endif 00210 for (long i = 0; i < static_cast<long>(size1); ++i) 00211 data_vec1[i*inc1+start1] += data_vec2[i*inc2+start2] / data_alpha + data_vec3[i*inc3+start3] / data_beta; 00212 } 00213 else 00214 { 00215 #ifdef VIENNACL_WITH_OPENMP 00216 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00217 #endif 00218 for (long i = 0; i < static_cast<long>(size1); ++i) 00219 data_vec1[i*inc1+start1] += data_vec2[i*inc2+start2] / data_alpha + data_vec3[i*inc3+start3] * data_beta; 00220 } 00221 } 00222 else 00223 { 00224 if (reciprocal_beta) 00225 { 00226 #ifdef VIENNACL_WITH_OPENMP 00227 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00228 #endif 00229 for (long i = 0; i < static_cast<long>(size1); ++i) 00230 data_vec1[i*inc1+start1] += data_vec2[i*inc2+start2] * data_alpha + data_vec3[i*inc3+start3] / data_beta; 00231 } 00232 else 00233 { 00234 #ifdef VIENNACL_WITH_OPENMP 00235 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00236 #endif 00237 for (long i = 0; i < static_cast<long>(size1); ++i) 00238 data_vec1[i*inc1+start1] += data_vec2[i*inc2+start2] * data_alpha + data_vec3[i*inc3+start3] * data_beta; 00239 } 00240 } 00241 } 00242 00243 00244 00245 00252 template <typename T> 00253 void vector_assign(vector_base<T> & vec1, const T & alpha, bool up_to_internal_size = false) 00254 { 00255 typedef T value_type; 00256 00257 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1); 00258 00259 vcl_size_t start1 = viennacl::traits::start(vec1); 00260 vcl_size_t inc1 = viennacl::traits::stride(vec1); 00261 vcl_size_t size1 = viennacl::traits::size(vec1); 00262 vcl_size_t loop_bound = up_to_internal_size ? vec1.internal_size() : size1; //Note: Do NOT use traits::internal_size() here, because vector proxies don't require padding. 00263 00264 value_type data_alpha = static_cast<value_type>(alpha); 00265 00266 #ifdef VIENNACL_WITH_OPENMP 00267 #pragma omp parallel for if (loop_bound > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00268 #endif 00269 for (long i = 0; i < static_cast<long>(loop_bound); ++i) 00270 data_vec1[i*inc1+start1] = data_alpha; 00271 } 00272 00273 00279 template <typename T> 00280 void vector_swap(vector_base<T> & vec1, vector_base<T> & vec2) 00281 { 00282 typedef T value_type; 00283 00284 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1); 00285 value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2); 00286 00287 vcl_size_t start1 = viennacl::traits::start(vec1); 00288 vcl_size_t inc1 = viennacl::traits::stride(vec1); 00289 vcl_size_t size1 = viennacl::traits::size(vec1); 00290 00291 vcl_size_t start2 = viennacl::traits::start(vec2); 00292 vcl_size_t inc2 = viennacl::traits::stride(vec2); 00293 00294 #ifdef VIENNACL_WITH_OPENMP 00295 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00296 #endif 00297 for (long i = 0; i < static_cast<long>(size1); ++i) 00298 { 00299 value_type temp = data_vec2[i*inc2+start2]; 00300 data_vec2[i*inc2+start2] = data_vec1[i*inc1+start1]; 00301 data_vec1[i*inc1+start1] = temp; 00302 } 00303 } 00304 00305 00307 00313 template <typename T, typename OP> 00314 void element_op(vector_base<T> & vec1, 00315 vector_expression<const vector_base<T>, const vector_base<T>, op_element_binary<OP> > const & proxy) 00316 { 00317 typedef T value_type; 00318 typedef viennacl::linalg::detail::op_applier<op_element_binary<OP> > OpFunctor; 00319 00320 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1); 00321 value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs()); 00322 value_type const * data_vec3 = detail::extract_raw_pointer<value_type>(proxy.rhs()); 00323 00324 vcl_size_t start1 = viennacl::traits::start(vec1); 00325 vcl_size_t inc1 = viennacl::traits::stride(vec1); 00326 vcl_size_t size1 = viennacl::traits::size(vec1); 00327 00328 vcl_size_t start2 = viennacl::traits::start(proxy.lhs()); 00329 vcl_size_t inc2 = viennacl::traits::stride(proxy.lhs()); 00330 00331 vcl_size_t start3 = viennacl::traits::start(proxy.rhs()); 00332 vcl_size_t inc3 = viennacl::traits::stride(proxy.rhs()); 00333 00334 #ifdef VIENNACL_WITH_OPENMP 00335 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00336 #endif 00337 for (long i = 0; i < static_cast<long>(size1); ++i) 00338 OpFunctor::apply(data_vec1[i*inc1+start1], data_vec2[i*inc2+start2], data_vec3[i*inc3+start3]); 00339 } 00340 00346 template <typename T, typename OP> 00347 void element_op(vector_base<T> & vec1, 00348 vector_expression<const vector_base<T>, const vector_base<T>, op_element_unary<OP> > const & proxy) 00349 { 00350 typedef T value_type; 00351 typedef viennacl::linalg::detail::op_applier<op_element_unary<OP> > OpFunctor; 00352 00353 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1); 00354 value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs()); 00355 00356 vcl_size_t start1 = viennacl::traits::start(vec1); 00357 vcl_size_t inc1 = viennacl::traits::stride(vec1); 00358 vcl_size_t size1 = viennacl::traits::size(vec1); 00359 00360 vcl_size_t start2 = viennacl::traits::start(proxy.lhs()); 00361 vcl_size_t inc2 = viennacl::traits::stride(proxy.lhs()); 00362 00363 #ifdef VIENNACL_WITH_OPENMP 00364 #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00365 #endif 00366 for (long i = 0; i < static_cast<long>(size1); ++i) 00367 OpFunctor::apply(data_vec1[i*inc1+start1], data_vec2[i*inc2+start2]); 00368 } 00369 00370 00372 00373 00374 //implementation of inner product: 00375 //namespace { 00382 template <typename T, typename S3> 00383 void inner_prod_impl(vector_base<T> const & vec1, 00384 vector_base<T> const & vec2, 00385 S3 & result) 00386 { 00387 typedef T value_type; 00388 00389 value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1); 00390 value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2); 00391 00392 vcl_size_t start1 = viennacl::traits::start(vec1); 00393 vcl_size_t inc1 = viennacl::traits::stride(vec1); 00394 vcl_size_t size1 = viennacl::traits::size(vec1); 00395 00396 vcl_size_t start2 = viennacl::traits::start(vec2); 00397 vcl_size_t inc2 = viennacl::traits::stride(vec2); 00398 00399 value_type temp = 0; 00400 00401 #ifdef VIENNACL_WITH_OPENMP 00402 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00403 #endif 00404 for (long i = 0; i < static_cast<long>(size1); ++i) 00405 temp += data_vec1[i*inc1+start1] * data_vec2[i*inc2+start2]; 00406 00407 result = temp; //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation 00408 } 00409 00410 template <typename T> 00411 void inner_prod_impl(vector_base<T> const & x, 00412 vector_tuple<T> const & vec_tuple, 00413 vector_base<T> & result) 00414 { 00415 typedef T value_type; 00416 00417 value_type const * data_x = detail::extract_raw_pointer<value_type>(x); 00418 00419 vcl_size_t start_x = viennacl::traits::start(x); 00420 vcl_size_t inc_x = viennacl::traits::stride(x); 00421 vcl_size_t size_x = viennacl::traits::size(x); 00422 00423 std::vector<value_type> temp(vec_tuple.const_size()); 00424 std::vector<value_type const *> data_y(vec_tuple.const_size()); 00425 std::vector<vcl_size_t> start_y(vec_tuple.const_size()); 00426 std::vector<vcl_size_t> stride_y(vec_tuple.const_size()); 00427 00428 for (vcl_size_t j=0; j<vec_tuple.const_size(); ++j) 00429 { 00430 data_y[j] = detail::extract_raw_pointer<value_type>(vec_tuple.const_at(j)); 00431 start_y[j] = viennacl::traits::start(vec_tuple.const_at(j)); 00432 stride_y[j] = viennacl::traits::stride(vec_tuple.const_at(j)); 00433 } 00434 00435 // Note: No OpenMP here because it cannot perform a reduction on temp-array. Savings in memory bandwidth are expected to still justify this approach... 00436 for (vcl_size_t i = 0; i < size_x; ++i) 00437 { 00438 value_type entry_x = data_x[i*inc_x+start_x]; 00439 for (vcl_size_t j=0; j < vec_tuple.const_size(); ++j) 00440 temp[j] += entry_x * data_y[j][i*stride_y[j]+start_y[j]]; 00441 } 00442 00443 for (vcl_size_t j=0; j < vec_tuple.const_size(); ++j) 00444 result[j] = temp[j]; //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation 00445 } 00446 00447 00453 template <typename T, typename S2> 00454 void norm_1_impl(vector_base<T> const & vec1, 00455 S2 & result) 00456 { 00457 typedef T value_type; 00458 00459 value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1); 00460 00461 vcl_size_t start1 = viennacl::traits::start(vec1); 00462 vcl_size_t inc1 = viennacl::traits::stride(vec1); 00463 vcl_size_t size1 = viennacl::traits::size(vec1); 00464 00465 value_type temp = 0; 00466 00467 #ifdef VIENNACL_WITH_OPENMP 00468 #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00469 #endif 00470 for (long i = 0; i < static_cast<long>(size1); ++i) 00471 temp += static_cast<value_type>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1]))); //casting to double in order to avoid problems if T is an integer type 00472 00473 result = temp; //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation 00474 } 00475 00481 template <typename T, typename S2> 00482 void norm_2_impl(vector_base<T> const & vec1, 00483 S2 & result) 00484 { 00485 typedef T value_type; 00486 00487 value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1); 00488 00489 vcl_size_t start1 = viennacl::traits::start(vec1); 00490 vcl_size_t inc1 = viennacl::traits::stride(vec1); 00491 vcl_size_t size1 = viennacl::traits::size(vec1); 00492 00493 value_type temp = 0; 00494 value_type data = 0; 00495 00496 #ifdef VIENNACL_WITH_OPENMP 00497 #pragma omp parallel for reduction(+: temp) private(data) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00498 #endif 00499 for (long i = 0; i < static_cast<long>(size1); ++i) 00500 { 00501 data = data_vec1[i*inc1+start1]; 00502 temp += data * data; 00503 } 00504 00505 result = std::sqrt(temp); //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation 00506 } 00507 00513 template <typename T, typename S2> 00514 void norm_inf_impl(vector_base<T> const & vec1, 00515 S2 & result) 00516 { 00517 typedef T value_type; 00518 00519 value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1); 00520 00521 vcl_size_t start1 = viennacl::traits::start(vec1); 00522 vcl_size_t inc1 = viennacl::traits::stride(vec1); 00523 vcl_size_t size1 = viennacl::traits::size(vec1); 00524 00525 value_type temp = 0; 00526 00527 // Note: No max() reduction in OpenMP yet 00528 for (vcl_size_t i = 0; i < size1; ++i) 00529 temp = std::max<value_type>(temp, static_cast<value_type>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1])))); //casting to double in order to avoid problems if T is an integer type 00530 00531 result = temp; //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation 00532 } 00533 00534 //This function should return a CPU scalar, otherwise statements like 00535 // vcl_rhs[index_norm_inf(vcl_rhs)] 00536 // are ambiguous 00542 template <typename T> 00543 vcl_size_t index_norm_inf(vector_base<T> const & vec1) 00544 { 00545 typedef T value_type; 00546 00547 value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1); 00548 00549 vcl_size_t start1 = viennacl::traits::start(vec1); 00550 vcl_size_t inc1 = viennacl::traits::stride(vec1); 00551 vcl_size_t size1 = viennacl::traits::size(vec1); 00552 00553 value_type temp = 0; 00554 value_type data; 00555 vcl_size_t index = start1; 00556 00557 // Note: No suitable reduction in OpenMP yet 00558 for (vcl_size_t i = 0; i < size1; ++i) 00559 { 00560 data = static_cast<value_type>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1]))); //casting to double in order to avoid problems if T is an integer type 00561 if (data > temp) 00562 { 00563 index = i; 00564 temp = data; 00565 } 00566 } 00567 00568 return index; 00569 } 00570 00571 00581 template <typename T> 00582 void plane_rotation(vector_base<T> & vec1, 00583 vector_base<T> & vec2, 00584 T alpha, T beta) 00585 { 00586 typedef T value_type; 00587 00588 value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1); 00589 value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2); 00590 00591 vcl_size_t start1 = viennacl::traits::start(vec1); 00592 vcl_size_t inc1 = viennacl::traits::stride(vec1); 00593 vcl_size_t size1 = viennacl::traits::size(vec1); 00594 00595 vcl_size_t start2 = viennacl::traits::start(vec2); 00596 vcl_size_t inc2 = viennacl::traits::stride(vec2); 00597 00598 value_type temp1 = 0; 00599 value_type temp2 = 0; 00600 value_type data_alpha = alpha; 00601 value_type data_beta = beta; 00602 00603 #ifdef VIENNACL_WITH_OPENMP 00604 #pragma omp parallel for private(temp1, temp2) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE) 00605 #endif 00606 for (long i = 0; i < static_cast<long>(size1); ++i) 00607 { 00608 temp1 = data_vec1[i*inc1+start1]; 00609 temp2 = data_vec2[i*inc2+start2]; 00610 00611 data_vec1[i*inc1+start1] = data_alpha * temp1 + data_beta * temp2; 00612 data_vec2[i*inc2+start2] = data_alpha * temp2 - data_beta * temp1; 00613 } 00614 } 00615 00616 } //namespace host_based 00617 } //namespace linalg 00618 } //namespace viennacl 00619 00620 00621 #endif