00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef _LA_VEC_H_
00021 #define _LA_VEC_H_
00022
00023 #include "la_traits.h"
00024
00025 namespace LA {
00026
00027
00030 template <typename T> void lawritemat(FILE *file, const T *a, int r, int c,
00031 const char *form0, int nodim, int modulo, int issym);
00032
00033
00036 const static complex<double> CONE = 1.0, CMONE = -1.0, CZERO = 0.0;
00037 #ifdef CUDALA
00038 const static cuDoubleComplex CUONE = {1.,0.}, CUMONE = {-1.,0.}, CUZERO = {0.,0.};
00039 #endif
00040
00041
00045 #define NRVECMAT_OPER(E,X) \
00046 template<class T> \
00047 inline const NR##E<T> NR##E<T>::operator X(const T &a) const \
00048 { return NR##E(*this) X##= a; } \
00049 \
00050 template<class T> \
00051 inline const NR##E<T> operator X(const T &a, const NR##E<T> &rhs) \
00052 { return NR##E<T>(rhs) X##= a; }
00053
00054 #define NRVECMAT_OPER2(E,X) \
00055 template<class T> \
00056 inline const NR##E<T> NR##E<T>::operator X(const NR##E<T> &a) const \
00057 { return NR##E(*this) X##= a; }
00058
00059
00060
00064 template <typename T>
00065 class NRVec {
00066 protected:
00067 int nn;
00068 T *v;
00069 int *count;
00070 #ifdef CUDALA
00071 GPUID location;
00072 #endif
00073 public:
00074 friend class NRSMat<T>;
00075 friend class NRMat<T>;
00076 template <typename U> friend NRVec<complex<U> > complexify(const NRVec<U>&);
00077
00078 typedef T ROWTYPE;
00079
00081 ~NRVec();
00082
00083
00086 inline NRVec(): nn(0), v(0), count(0) {
00087 #ifdef CUDALA
00088 location = DEFAULT_LOC;
00089 #endif
00090 };
00091
00092
00098 explicit inline NRVec(const int n, const GPUID loc = undefined): nn(n), count(new int(1)) {
00099 #ifdef CUDALA
00100 location = (loc == undefined)?DEFAULT_LOC:loc;
00101 if(location == cpu){
00102 #endif
00103 v = new T[n];
00104 #ifdef CUDALA
00105 }else{
00106 v = (T*) gpualloc(n*sizeof(T));
00107 }
00108 #endif
00109 };
00110
00112 inline NRVec(const T &a, const int n);
00113
00115 inline NRVec(const T *a, const int n);
00116
00118 inline NRVec(T *a, const int n, bool skeleton);
00119
00121 inline NRVec(const NRVec &rhs);
00122
00124 NRVec(const typename LA_traits_complex<T>::NRVec_Noncomplex_type &rhs, bool imagpart=false);
00125
00127 inline explicit NRVec(const NRSMat<T> & S);
00128
00129
00132 #ifdef MATPTR
00133 explicit NRVec(const NRMat<T> &rhs): NRVec(&rhs[0][0], rhs.nrows()*rhs.ncols()) {};
00134 #else
00135 explicit NRVec(const NRMat<T> &rhs);
00136 #endif
00137
00138
00143 #ifdef CUDALA
00144 inline GPUID getlocation() const { return location; }
00145 void moveto(const GPUID dest);
00146 #else
00147 inline GPUID getlocation() const { return cpu; }
00148 void moveto(const GPUID dest) {};
00149 #endif
00150
00152 void copyonwrite();
00153
00155 void clear() { copyonwrite(); LA_traits<T>::clear(v, nn); };
00156
00158 NRVec& operator=(const NRVec &rhs);
00159
00161 NRVec& operator=(const T &a);
00162
00164 void randomize(const typename LA_traits<T>::normtype &x);
00165
00167 NRVec& operator|=(const NRVec &rhs);
00168
00170 const bool operator!=(const NRVec &rhs) const {if(nn!=rhs.nn) return 1; return LA_traits<T>::gencmp(v,rhs.v,nn);}
00171 const bool operator==(const NRVec &rhs) const {return !(*this != rhs);};
00172 const bool operator>(const NRVec &rhs) const;
00173 const bool operator<(const NRVec &rhs) const;
00174 const bool operator>=(const NRVec &rhs) const {return !(*this < rhs);};
00175 const bool operator<=(const NRVec &rhs) const {return !(*this > rhs);};
00176
00178 const NRVec operator-() const;
00179
00181 inline NRVec& operator+=(const NRVec &rhs);
00182 inline NRVec& operator-=(const NRVec &rhs);
00183 inline NRVec& operator*=(const NRVec &rhs);
00184 inline NRVec& operator/=(const NRVec &rhs);
00185
00186 inline const NRVec operator+(const NRVec &rhs) const;
00187 inline const NRVec operator-(const NRVec &rhs) const;
00188
00190 inline NRVec& operator+=(const T &a);
00191 inline NRVec& operator-=(const T &a);
00192 inline NRVec& operator*=(const T &a);
00193
00194 inline const NRVec operator+(const T &a) const;
00195 inline const NRVec operator-(const T &a) const;
00196 inline const NRVec operator*(const T &a) const;
00197
00198
00200 inline int getcount() const {return count?*count:0;}
00201
00203 inline const T operator*(const NRVec &rhs) const;
00204 inline const T dot(const NRVec &rhs) const {return *this * rhs;};
00205
00207 inline const T dot(const T *a, const int stride = 1) const;
00208
00209 void gemv(const T beta, const NRMat<T> &a, const char trans, const T alpha, const NRVec &x);
00210 void gemv(const T beta, const NRSMat<T> &a, const char trans , const T alpha, const NRVec &x);
00211 void gemv(const T beta, const SparseMat<T> &a, const char trans, const T alpha, const NRVec &x,const bool treat_as_symmetric = false);
00212
00213 void gemv( const typename LA_traits_complex<T>::Component_type beta,
00214 const typename LA_traits_complex<T>::NRMat_Noncomplex_type &a,
00215 const char trans,
00216 const typename LA_traits_complex<T>::Component_type alpha,
00217 const NRVec &x);
00218
00219 void gemv( const typename LA_traits_complex<T>::Component_type beta,
00220 const typename LA_traits_complex<T>::NRSMat_Noncomplex_type &a,
00221 const char trans,
00222 const typename LA_traits_complex<T>::Component_type alpha, const NRVec &x);
00223
00225 const NRVec operator*(const NRMat<T> &mat) const {
00226 SAME_LOC(*this, mat);
00227
00228 NRVec<T> result(mat.ncols(), mat.getlocation());
00229 result.gemv((T)0, mat, 't', (T)1, *this);
00230 return result;
00231 };
00232
00234 const NRVec operator*(const NRSMat<T> &mat) const {
00235 SAME_LOC(*this, mat);
00236
00237 NRVec<T> result(mat.ncols(), mat.getlocation());
00238 result.gemv((T)0, mat, 't', (T)1, *this);
00239 return result;
00240 };
00241
00243 const NRVec operator*(const SparseMat<T> &mat) const {
00244 NOT_GPU(*this);
00245
00246 NRVec<T> result(mat.ncols());
00247 result.gemv((T)0, mat, 't', (T)1, *this);
00248 return result;
00249 };
00250
00252 const NRMat<T> otimes(const NRVec<T> &rhs, const bool conjugate = false, const T &scale = 1) const;
00254 inline const NRMat<T> operator|(const NRVec<T> &rhs) const { return otimes(rhs,true); };
00255
00257 inline const T sum() const {
00258 T sum(0);
00259 for(register int i=0; i<nn; i++){ sum += v[i];}
00260 return sum;
00261 };
00262
00264 inline const typename LA_traits<T>::normtype asum() const;
00265
00267 inline T & operator[](const int i);
00268 inline const T & operator[](const int i) const;
00269
00271 inline void setcoldim(int i) {};
00272
00274 inline operator T*();
00276 inline operator const T*() const;
00277
00279 void axpy(const T alpha, const NRVec &x);
00280
00282 void axpy(const T alpha, const T *x, const int stride=1);
00283
00285 inline int size() const;
00286
00288 void resize(const int n);
00289
00291 inline const typename LA_traits<T>::normtype norm() const;
00292
00294 NRVec& normalize(typename LA_traits<T>::normtype* norm = 0);
00295
00297 inline const NRVec unitvector() const;
00298
00300 inline const T amax() const;
00302 inline const T amin() const;
00303
00305 void fprintf(FILE *f, const char *format, const int modulo) const;
00307 void put(int fd, bool dimensions=1, bool transp=0) const;
00308
00310 void fscanf(FILE *f, const char *format);
00312 void get(int fd, bool dimensions=1, bool transp=0);
00313
00315 explicit NRVec(const SparseMat<T> &rhs);
00316
00318 inline void simplify() {};
00319
00321 bool bigger(int i, int j) const {
00322 NOT_GPU(*this);
00323 return LA_traits<T>::bigger(v[i], v[j]);
00324 };
00325
00327 bool smaller(int i, int j) const {
00328 NOT_GPU(*this);
00329 return LA_traits<T>::smaller(v[i], v[j]);
00330 };
00331
00333 void swap(int i, int j) {
00334 const T tmp(v[i]);
00335 v[i] = v[j];
00336 v[j] = tmp;
00337 };
00338
00340 int sort(int direction = 0, int from = 0, int to = -1, int *perm = NULL);
00341
00343 NRVec& call_on_me(T (*_F)(const T &) ){
00344 NOT_GPU(*this);
00345
00346 copyonwrite();
00347 for(int i=0; i<nn; ++i) v[i] = _F(v[i]);
00348 return *this;
00349 };
00350 };
00351
00352 }
00353
00354
00355 #include "mat.h"
00356 #include "smat.h"
00357 #include "sparsemat.h"
00358 #include "sparsesmat.h"
00359
00360
00361 namespace LA {
00362
00363
00369 template <typename T>
00370 std::ostream & operator<<(std::ostream &s, const NRVec<T> &x) {
00371 #ifdef CUDALA
00372 if(x.getlocation() == cpu){
00373 #endif
00374 const int n = x.size();
00375 s << n << std::endl;
00376 for(register int i = 0; i<n; i++){
00377 s << (typename LA_traits_io<T>::IOtype)x[i] << (i == n-1 ? '\n' : ' ');
00378 }
00379 return s;
00380 #ifdef CUDALA
00381 }else{
00382 NRVec<T> tmp(x);
00383 tmp.moveto(cpu);
00384 return s << tmp;
00385 }
00386 #endif
00387 }
00388
00389
00395 template <typename T>
00396 std::istream & operator>>(std::istream &s, NRVec<T> &x) {
00397 #ifdef CUDALA
00398 if(x.getlocation() == cpu){
00399 #endif
00400 int i,n;
00401 s >> n;
00402 x.resize(n);
00403 typename LA_traits_io<T>::IOtype tmp;
00404 for(i=0; i<n; i++){
00405 s >> tmp;
00406 x[i] = tmp;
00407 }
00408 return s;
00409 #ifdef CUDALA
00410 }else{
00411 NRVec<T> tmp;
00412 tmp.moveto(cpu);
00413 s >> tmp;
00414 tmp.moveto(x.getlocation());
00415 x = tmp;
00416 return s;
00417 }
00418 #endif
00419 }
00420
00421
00422
00427 template <typename T>
00428 inline NRVec<T>::NRVec(const T& a, const int n): nn(n), count(new int) {
00429 *count = 1;
00430 #ifdef CUDALA
00431 location = DEFAULT_LOC;
00432 if(location == cpu){
00433 #endif
00434 v = new T[n];
00435 if(a != (T)0){
00436 for(register int i=0; i<n; i++) v[i] = a;
00437 }else{
00438 memset(v, 0, nn*sizeof(T));
00439 }
00440 #ifdef CUDALA
00441 }else{
00442 v = (T*) gpualloc(n*sizeof(T));
00443 smart_gpu_set(n, a, v);
00444 }
00445 #endif
00446 }
00447
00448
00449
00454 template <typename T>
00455 inline NRVec<T>::NRVec(const T *a, const int n): nn(n), count(new int) {
00456 #ifdef CUDALA
00457 location = DEFAULT_LOC;
00458 if(location == cpu) {
00459 #endif
00460 v = new T[n];
00461 *count = 1;
00462 memcpy(v, a, n*sizeof(T));
00463 #ifdef CUDALA
00464 }else{
00465 v = (T*) gpualloc(n*sizeof(T));
00466 cublasSetVector(n, sizeof(T), a, 1, v, 1);
00467 TEST_CUBLAS("cublasSetVector");
00468 }
00469 #endif
00470
00471 }
00472
00473
00480 template <typename T>
00481 inline NRVec<T>::NRVec(T *a, const int n, bool skeleton) : nn(n), count(new int) {
00482 if(!skeleton){
00483 #ifdef CUDALA
00484 location = DEFAULT_LOC;
00485 if(location == cpu){
00486 #endif
00487 v = new T[n];
00488 *count = 1;
00489 memcpy(v, a, n*sizeof(T));
00490 #ifdef CUDALA
00491 }else{
00492 v= (T*) gpualloc(n*sizeof(T));
00493 cublasSetVector(n, sizeof(T), a, 1, v, 1);
00494 TEST_CUBLAS("cublasSetVector");
00495 }
00496 #endif
00497 }else{
00498 #ifdef CUDALA
00499 if(location != cpu) laerror("NRVec() with skeleton option cannot be on GPU");
00500 #endif
00501 *count = 2;
00502 v = a;
00503 }
00504 }
00505
00506
00510 template <typename T>
00511 inline NRVec<T>::NRVec(const NRVec<T> &rhs) {
00512 #ifdef CUDALA
00513 location = rhs.location;
00514 #endif
00515 v = rhs.v;
00516 nn = rhs.nn;
00517 count = rhs.count;
00518 if(count) (*count)++;
00519 }
00520
00521
00527 template <typename T>
00528 inline NRVec<T>::NRVec(const NRSMat<T> &rhs) {
00529 #ifdef CUDALA
00530 location = rhs.location;
00531 #endif
00532 nn = rhs.nn;
00534 nn = NN2;
00535 v = rhs.v;
00536 count = rhs.count;
00537 (*count)++;
00538 }
00539
00540
00545 template <typename T>
00546 inline NRVec<T> & NRVec<T>::operator+=(const T &a) {
00547 NOT_GPU(*this);
00548
00549 copyonwrite();
00550
00551 if(a != (T)0){ for(register int i=0; i<nn; ++i) v[i] += a; }
00552 return *this;
00553 }
00554
00555
00556
00561 template <typename T>
00562 inline NRVec<T>& NRVec<T>::operator-=(const T &a) {
00563 NOT_GPU(*this);
00564
00565 copyonwrite();
00566
00567 if(a != (T)0){ for(register int i=0; i<nn; ++i) v[i] -= a; }
00568 return *this;
00569 }
00570
00571
00572
00578 template <typename T>
00579 inline NRVec<T>& NRVec<T>::operator+=(const NRVec<T> &rhs) {
00580 #ifdef DEBUG
00581 if (nn != rhs.nn) laerror("incompatible dimensions");
00582 #endif
00583 NOT_GPU(*this);
00584 NOT_GPU(rhs);
00585
00586 copyonwrite();
00587
00588 for(register int i=0; i<nn; ++i) v[i] += rhs.v[i];
00589 return *this;
00590 }
00591
00592
00598 template <typename T>
00599 inline NRVec<T>& NRVec<T>::operator*=(const NRVec<T>& rhs) {
00600 #ifdef DEBUG
00601 if (nn != rhs.nn) laerror("incompatible dimensions");
00602 #endif
00603 NOT_GPU(*this);
00604 NOT_GPU(rhs);
00605
00606 copyonwrite();
00607
00608 for(register int i=0; i<nn; ++i) v[i] *= rhs.v[i];
00609 return *this;
00610 }
00611
00612
00618 template <typename T>
00619 inline NRVec<T> & NRVec<T>::operator/=(const NRVec<T> &rhs) {
00620 #ifdef DEBUG
00621 if (nn != rhs.nn) laerror("incompatible dimensions");
00622 #endif
00623 NOT_GPU(*this);
00624 NOT_GPU(rhs);
00625
00626 copyonwrite();
00627
00628 for(register int i=0; i<nn; ++i) v[i] /= rhs.v[i];
00629 return *this;
00630 }
00631
00632
00633
00639 template <typename T>
00640 inline NRVec<T> & NRVec<T>::operator-=(const NRVec<T> &rhs) {
00641 #ifdef DEBUG
00642 if (nn != rhs.nn) laerror("incompatible dimensions");
00643 #endif
00644 NOT_GPU(*this);
00645 NOT_GPU(rhs);
00646
00647 copyonwrite();
00648
00649 for(register int i=0; i<nn; ++i) v[i] -= rhs.v[i];
00650 return *this;
00651 }
00652
00653
00654
00660 template <typename T>
00661 inline NRVec<T> & NRVec<T>::operator*=(const T &a) {
00662 NOT_GPU(*this);
00663 copyonwrite();
00664
00665 for(register int i=0; i<nn; ++i) v[i] *= a;
00666 return *this;
00667 }
00668
00669
00676 template<typename T>
00677 inline const T NRVec<T>::operator*(const NRVec<T> &rhs) const {
00678 #ifdef DEBUG
00679 if (nn != rhs.nn) laerror("incompatible dimensions");
00680 #endif
00681 NOT_GPU(*this);
00682 NOT_GPU(rhs);
00683
00684 T dot(0);
00685 for(register int i=0; i<nn; ++i) dot += v[i]*rhs.v[i];
00686 return dot;
00687 }
00688
00689
00695 template <typename T>
00696 inline T& NRVec<T>::operator[](const int i) {
00697 #ifdef DEBUG
00698 if(_LA_count_check && *count != 1) laerror("possible use of NRVec[] with count>1 as l-value");
00699 if(i < 0 || i >= nn) laerror("out of range");
00700 if(!v) laerror("unallocated NRVec");
00701 #endif
00702 NOT_GPU(*this);
00703
00704 return v[i];
00705 }
00706
00707
00713 template <typename T>
00714 inline const T& NRVec<T>::operator[](const int i) const {
00715 #ifdef DEBUG
00716 if(i < 0 || i >= nn) laerror("out of range");
00717 if(!v) laerror("unallocated NRVec");
00718 #endif
00719 NOT_GPU(*this);
00720
00721 return v[i];
00722 }
00723
00724
00728 template <typename T>
00729 inline int NRVec<T>::size() const {
00730 return nn;
00731 }
00732
00733
00737 template <typename T>
00738 inline NRVec<T>::operator T*() {
00739 #ifdef DEBUG
00740 if(!v) laerror("unallocated NRVec");
00741 #endif
00742 return v;
00743 }
00744
00745
00749 template <typename T>
00750 inline NRVec<T>::operator const T*() const {
00751 #ifdef DEBUG
00752 if(!v) laerror("unallocated NRVec");
00753 #endif
00754 return v;
00755 }
00756
00757
00758
00763 template <typename T>
00764 inline const NRVec<T> NRVec<T>::unitvector() const {
00765 return NRVec<T>(*this).normalize();
00766 }
00767
00768
00771 NRVECMAT_OPER(Vec,+)
00772 NRVECMAT_OPER(Vec,-)
00773 NRVECMAT_OPER(Vec,*)
00774
00775
00778 NRVECMAT_OPER2(Vec,+)
00779 NRVECMAT_OPER2(Vec,-)
00780
00781
00785 template <typename T>
00786 NRVec<T>::~NRVec() {
00787 if(!count) return;
00788 if(--(*count) <= 0) {
00789 if(v){
00790 #ifdef CUDALA
00791 if(location == cpu){
00792 #endif
00793 delete[] v;
00794 #ifdef CUDALA
00795 }else{ gpufree(v); }
00796 #endif
00797 }
00798 delete count;
00799 }
00800 }
00801
00802
00805 template <typename T>
00806 void NRVec<T>::copyonwrite() {
00807 if(!count) laerror("copyonwrite of an undefined vector");
00808 if(*count > 1) {
00809 (*count)--;
00810 count = new int;
00811 *count = 1;
00812 T *newv;
00813 #ifdef CUDALA
00814 if(location == cpu){
00815 #endif
00816 newv = new T[nn];
00817 memcpy(newv, v, nn*sizeof(T));
00818 #ifdef CUDALA
00819 }else{
00820 newv = (T *) gpualloc(nn*sizeof(T));
00821 if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRVec<T>::copyonwrite()");
00822 cublasScopy(nn*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
00823 TEST_CUBLAS("cublasScopy");
00824 }
00825 #endif
00826 v = newv;
00827 }
00828 }
00829
00830
00831
00838 template <typename T>
00839 NRVec<T> & NRVec<T>::operator=(const NRVec<T> &rhs) {
00840
00841 if(this != &rhs){
00842 if(count){
00843 if(--(*count) == 0){
00844 #ifdef CUDALA
00845 if(location == cpu){
00846 #endif
00847 delete[] v;
00848 #ifdef CUDALA
00849 }else{
00850 gpufree(v);
00851 }
00852 #endif
00853 delete count;
00854 }
00855 }
00856 v = rhs.v;
00857 nn = rhs.nn;
00858 count = rhs.count;
00859 #ifdef CUDALA
00860 location = rhs.location;
00861 #endif
00862 if(count){ (*count)++; }
00863 }
00864 return *this;
00865 }
00866
00867
00868
00872 template <typename T>
00873 void NRVec<T>::resize(const int n) {
00874 #ifdef DEBUG
00875 if(n < 0) laerror("illegal dimension");
00876 #endif
00877 if(count){
00878 if(n == 0){
00879 if(--(*count) <= 0){
00880 if(v){
00881 #ifdef CUDALA
00882 if(location == cpu){
00883 #endif
00884 delete[] (v);
00885 #ifdef CUDALA
00886 }else{
00887 gpufree(v);
00888 }
00889 #endif
00890 }
00891 delete count;
00892 }
00893 count = 0;
00894 nn = 0;
00895 v = 0;
00896 return;
00897 }
00898 if(*count > 1) {
00899 (*count)--;
00900 count = 0;
00901 v = 0;
00902 nn = 0;
00903 }
00904 }
00905 if(!count){
00906 count = new int;
00907 *count = 1;
00908 nn = n;
00909 #ifdef CUDALA
00910 if(location == cpu)
00911 #endif
00912 v = new T[nn];
00913 #ifdef CUDALA
00914 else
00915 v = (T*) gpualloc(nn*sizeof(T));
00916 #endif
00917 return;
00918 }
00919
00920 if (n != nn) {
00921 nn = n;
00922 #ifdef CUDALA
00923 if(location == cpu){
00924 #endif
00925
00926 delete[] v;
00927 v = new T[nn];
00928 #ifdef CUDALA
00929 }else{
00930
00931 gpufree(v);
00932 v = (T*) gpualloc(nn*sizeof(T));
00933 }
00934 #endif
00935 }
00936 }
00937
00938
00939
00944 template <typename T>
00945 NRVec<T> & NRVec<T>::operator|=(const NRVec<T> &rhs) {
00946 #ifdef DEBUG
00947 if(!rhs.v) laerror("unallocated vector");
00948 #endif
00949 if(this == &rhs) return *this;
00950 *this = rhs;
00951 this->copyonwrite();
00952 return *this;
00953 }
00954
00955
00956
00962 template<typename T>
00963 NRVec<complex<T> > complexify(const NRVec<T> &rhs) {
00964 NOT_GPU(rhs);
00965
00966 NRVec<complex<T> > r(rhs.size(), rhs.getlocation());
00967 for(register int i=0; i<rhs.size(); ++i) r[i] = rhs[i];
00968 return r;
00969 }
00970 template<> NRVec<complex<double> > complexify<double>(const NRVec<double> &rhs);
00971
00972
00977 #ifdef CUDALA
00978 template<typename T>
00979 void NRVec<T>::moveto(const GPUID dest) {
00980 if(location == dest) return;
00981
00982 CPU_GPU(location, dest);
00983 location = dest;
00984
00985 if(v && !count) laerror("internal");
00986 if (!count) return;
00987
00988 if(v && *count == 0) laerror("internal");
00989 if(!v) return;
00990
00991 T *vold = v;
00992
00993 if(dest == cpu){
00994 v = new T[nn];
00995 gpuget(nn,sizeof(T),vold,v);
00996 if(*count == 1) gpufree(vold);
00997 else {--(*count); count = new int(1);}
00998
00999 }else{
01000 v = (T *) gpualloc(nn*sizeof(T));
01001 gpuput(nn,sizeof(T),vold,v);
01002 if(*count == 1) delete[] vold;
01003 else {--(*count); count = new int(1);}
01004 }
01005 }
01006 #endif
01007
01008
01014 template<>
01015 inline NRVec<double>& NRVec<double>::operator+=(const double &a) {
01016 copyonwrite();
01017 #ifdef CUDALA
01018 if(location == cpu){
01019 #endif
01020 cblas_daxpy(nn, 1.0, &a, 0, v, 1);
01021 #ifdef CUDALA
01022 }else{
01023 double *d = gpuputdouble(a);
01024 cublasDaxpy(nn, 1.0, d, 0, v, 1);
01025 TEST_CUBLAS("cublasDaxpy");
01026 gpufree(d);
01027 }
01028 #endif
01029 return *this;
01030 }
01031
01032
01038 template<>
01039 inline NRVec<complex<double> >& NRVec<complex<double> >::operator+=(const complex<double> &a) {
01040 copyonwrite();
01041 #ifdef CUDALA
01042 if(location == cpu){
01043 #endif
01044 cblas_zaxpy(nn, &CONE, &a, 0, v, 1);
01045 #ifdef CUDALA
01046 }else{
01047 complex<double> *d = gpuputcomplex(a);
01048 cublasZaxpy(nn, CUONE, (cuDoubleComplex *)d, 0, (cuDoubleComplex *)v, 1);
01049 TEST_CUBLAS("cublasZaxpy");
01050 gpufree(d);
01051 }
01052 #endif
01053 return *this;
01054 }
01055
01056
01062 template<>
01063 inline NRVec<double>& NRVec<double>::operator-=(const double &a) {
01064 copyonwrite();
01065 #ifdef CUDALA
01066 if(location == cpu){
01067 #endif
01068 cblas_daxpy(nn, -1.0, &a, 0, v, 1);
01069 #ifdef CUDALA
01070 }else{
01071 double *d = gpuputdouble(a);
01072 cublasDaxpy(nn, -1.0, d, 0, v, 1);
01073 TEST_CUBLAS("cublasDaxpy");
01074 gpufree(d);
01075 }
01076 #endif
01077 return *this;
01078 }
01079
01080
01086 template<>
01087 inline NRVec<complex<double> >& NRVec<complex<double> >::operator-=(const complex<double> &a) {
01088 copyonwrite();
01089 #ifdef CUDALA
01090 if(location == cpu){
01091 #endif
01092 cblas_zaxpy(nn, &CMONE, &a, 0, v, 1);
01093 #ifdef CUDALA
01094 }else{
01095 complex<double> *d = gpuputcomplex(a);
01096 cublasZaxpy(nn, CUMONE, (cuDoubleComplex *)d, 0, (cuDoubleComplex *)v, 1);
01097 TEST_CUBLAS("cublasZaxpy");
01098 gpufree(d);
01099 }
01100 #endif
01101 return *this;
01102 }
01103
01104
01110 template<>
01111 inline NRVec<double>& NRVec<double>::operator+=(const NRVec<double> &rhs) {
01112 #ifdef DEBUG
01113 if (nn != rhs.nn) laerror("incompatible dimensions");
01114 #endif
01115 SAME_LOC(*this, rhs);
01116 copyonwrite();
01117 #ifdef CUDALA
01118 if(location == cpu){
01119 #endif
01120 cblas_daxpy(nn, 1.0, rhs.v, 1, v, 1);
01121 #ifdef CUDALA
01122 }else{
01123 cublasDaxpy(nn, 1.0, rhs.v, 1, v, 1);
01124 TEST_CUBLAS("cubasDaxpy");
01125 }
01126 #endif
01127 return *this;
01128 }
01129
01130
01136 template<>
01137 inline NRVec<complex<double> >& NRVec<complex<double> >::operator+=(const NRVec<complex<double> > &rhs) {
01138 #ifdef DEBUG
01139 if (nn != rhs.nn) laerror("incompatible dimensions");
01140 #endif
01141 SAME_LOC(*this, rhs);
01142 copyonwrite();
01143 #ifdef CUDALA
01144 if(location == cpu){
01145 #endif
01146 cblas_zaxpy(nn, &CONE, rhs.v, 1, v, 1);
01147 #ifdef CUDALA
01148 }else{
01149 cublasZaxpy(nn, CUONE, (cuDoubleComplex*)rhs.v, 1, (cuDoubleComplex*)v, 1);
01150 TEST_CUBLAS("cublasZaxpy");
01151 }
01152 #endif
01153 return *this;
01154 }
01155
01156
01162 template<>
01163 inline NRVec<double> & NRVec<double>::operator-=(const NRVec<double> &rhs) {
01164 #ifdef DEBUG
01165 if (nn != rhs.nn) laerror("incompatible dimensions");
01166 #endif
01167 SAME_LOC(*this,rhs);
01168 copyonwrite();
01169 #ifdef CUDALA
01170 if(location == cpu){
01171 #endif
01172 cblas_daxpy(nn, -1.0, rhs.v, 1, v, 1);
01173 #ifdef CUDALA
01174 }else{
01175 cublasDaxpy(nn, -1.0, rhs.v, 1, v, 1);
01176 TEST_CUBLAS("cubasDaxpy");
01177 }
01178 #endif
01179 return *this;
01180 }
01181
01182
01188 template<>
01189 inline NRVec<complex<double> >& NRVec<complex<double> >::operator-=(const NRVec<complex<double> > &rhs) {
01190 #ifdef DEBUG
01191 if (nn != rhs.nn) laerror("incompatible dimensions");
01192 #endif
01193 SAME_LOC(*this, rhs);
01194 copyonwrite();
01195 #ifdef CUDALA
01196 if(location == cpu){
01197 #endif
01198 cblas_zaxpy(nn, &CMONE, rhs.v, 1, v, 1);
01199 #ifdef CUDALA
01200 }else{
01201 cublasZaxpy(nn, CUMONE, (cuDoubleComplex*)rhs.v, 1, (cuDoubleComplex*)v, 1);
01202 TEST_CUBLAS("cublasZaxpy");
01203 }
01204 #endif
01205 return *this;
01206 }
01207
01208
01214 template<>
01215 inline NRVec<double>& NRVec<double>::operator*=(const double &a) {
01216 copyonwrite();
01217 #ifdef CUDALA
01218 if(location == cpu){
01219 #endif
01220 cblas_dscal(nn, a, v, 1);
01221 #ifdef CUDALA
01222 }else{
01223 cublasDscal(nn, a, v, 1);
01224 TEST_CUBLAS("cublasDscal");
01225 }
01226 #endif
01227 return *this;
01228 }
01229
01230
01236 template<>
01237 inline NRVec<complex<double> >& NRVec<complex<double> >::operator*=(const complex<double> &a) {
01238 copyonwrite();
01239 #ifdef CUDALA
01240 if(location == cpu){
01241 #endif
01242 cblas_zscal(nn, &a, v, 1);
01243 #ifdef CUDALA
01244 }else{
01245 const cuDoubleComplex alpha = make_cuDoubleComplex(a.real(), a.imag());
01246 cublasZscal(nn, alpha, (cuDoubleComplex*)v, 1);
01247 TEST_CUBLAS("cublasZscal");
01248 }
01249 #endif
01250 return *this;
01251 }
01252
01253
01258 template<>
01259 inline const double NRVec<double>::operator*(const NRVec<double> &rhs) const {
01260 double ret(0.0);
01261 #ifdef DEBUG
01262 if(nn != rhs.nn) laerror("incompatible dimensions");
01263 #endif
01264 SAME_LOC(*this, rhs);
01265 #ifdef CUDALA
01266 if(location == cpu){
01267 #endif
01268 ret = cblas_ddot(nn, v, 1, rhs.v, 1);
01269 #ifdef CUDALA
01270 }else{
01271 ret = cublasDdot(nn, v, 1, rhs.v, 1);
01272 TEST_CUBLAS("cublasDdot");
01273 }
01274 #endif
01275 return ret;
01276 }
01277
01278
01284 template<>
01285 inline const complex<double> NRVec<complex<double> >::operator*(const NRVec< complex<double> > &rhs) const {
01286 #ifdef DEBUG
01287 if(nn != rhs.nn) laerror("incompatible dimensions");
01288 #endif
01289 complex<double> dot;
01290 SAME_LOC(*this, rhs);
01291 #ifdef CUDALA
01292 if(location == cpu){
01293 #endif
01294 cblas_zdotc_sub(nn, v, 1, rhs.v, 1, &dot);
01295 #ifdef CUDALA
01296 }else{
01297 const cuDoubleComplex val = cublasZdotc(nn, (cuDoubleComplex*)v, 1, (cuDoubleComplex*)rhs.v, 1);
01298 TEST_CUBLAS("cublasZdotc");
01299 dot = complex<double>(cuCreal(val), cuCimag(val));
01300 }
01301 #endif
01302 return dot;
01303 }
01304
01305
01311 template<>
01312 inline const double NRVec<double>::dot(const double *y, const int stride) const {
01313 NOT_GPU(*this);
01314 return cblas_ddot(nn, y, stride, v, 1);
01315 }
01316
01317
01323 template<>
01324 inline const complex<double> NRVec<complex<double> >::dot(const complex<double> *y, const int stride) const {
01325 complex<double> dot;
01326 NOT_GPU(*this);
01327 cblas_zdotc_sub(nn, y, stride, v, 1, &dot);
01328 return dot;
01329 }
01330
01331
01335 template<>
01336 inline const double NRVec<double>::asum() const {
01337 double ret(0.0);
01338 #ifdef CUDALA
01339 if(location == cpu){
01340 #endif
01341 ret = cblas_dasum(nn, v, 1);
01342 #ifdef CUDALA
01343 }else{
01344 ret = cublasDasum(nn, v, 1);
01345 TEST_CUBLAS("cublasDasum");
01346 }
01347 #endif
01348 return ret;
01349 }
01350
01351
01352
01357 template<>
01358 inline const double NRVec<complex<double> >::asum() const {
01359 double ret(0.0);
01360 #ifdef CUDALA
01361 if(location == cpu){
01362 #endif
01363 ret = cblas_dzasum(nn, v, 1);
01364 #ifdef CUDALA
01365 }else{
01366 ret = cublasDzasum(nn, (cuDoubleComplex*)v, 1);
01367 TEST_CUBLAS("cublasDzasum");
01368 }
01369 #endif
01370 return ret;
01371 }
01372
01373
01377 template<>
01378 inline const double NRVec<double>::norm() const {
01379 double ret(0.);
01380 #ifdef CUDALA
01381 if(location == cpu){
01382 #endif
01383 ret = cblas_dnrm2(nn, v, 1);
01384 #ifdef CUDALA
01385 }else{
01386 ret = cublasDnrm2(nn, v, 1);
01387 TEST_CUBLAS("cublasDnrm2");
01388 }
01389 #endif
01390 return ret;
01391 }
01392
01393
01397 template<>
01398 inline const double NRVec< complex<double> >::norm() const {
01399 double ret(0.);
01400 #ifdef CUDALA
01401 if(location == cpu){
01402 #endif
01403 ret = cblas_dznrm2(nn, v, 1);
01404 #ifdef CUDALA
01405 }else{
01406 ret = cublasDznrm2(nn, (cuDoubleComplex*)v, 1);
01407 TEST_CUBLAS("cublasDzrm2");
01408 }
01409 #endif
01410 return ret;
01411 }
01412
01413
01417 template<>
01418 inline const double NRVec<double>::amax() const {
01419 double ret(0.0);
01420 #ifdef CUDALA
01421 if(location == cpu){
01422 #endif
01423 ret = v[cblas_idamax(nn, v, 1) - 1];
01424 #ifdef CUDALA
01425 }else{
01426 const int pozice = cublasIdamax(nn, v, 1) - 1;
01427 TEST_CUBLAS("cublasIdamax");
01428
01429 gpuget(1, sizeof(double), v + pozice, &ret);
01430 }
01431 #endif
01432 return ret;
01433 }
01434
01435
01439 template<>
01440 inline const double NRVec<double>::amin() const {
01441 double ret(std::numeric_limits<double>::max());
01442 #ifdef CUDALA
01443 if(location == cpu){
01444 #endif
01445
01446 double val(0.0);
01447 int index(-1);
01448 for(register int i = 0; i < nn; i++){
01449 val = std::abs(v[i]);
01450 if(val < ret){ index = i; ret = val; }
01451 }
01452 ret = v[index];
01453 #ifdef CUDALA
01454 }else{
01455 const int pozice = cublasIdamin(nn, v, 1) - 1;
01456 TEST_CUBLAS("cublasIdamin");
01457 gpuget(1, sizeof(double), v + pozice, &ret);
01458 }
01459 #endif
01460 return ret;
01461 }
01462
01463
01468 template<>
01469 inline const complex<double> NRVec<complex<double> >::amax() const {
01470 complex<double> ret(0., 0.);
01471 #ifdef CUDALA
01472 if(location == cpu){
01473 #endif
01474 ret = v[cblas_izamax(nn, v, 1) - 1];
01475 #ifdef CUDALA
01476 }else{
01477 const int pozice = cublasIzamax(nn, (cuDoubleComplex*)v, 1) - 1;
01478 TEST_CUBLAS("cublasIzamax");
01479 gpuget(1, sizeof(complex<double>), v + pozice, &ret);
01480 }
01481 #endif
01482 return ret;
01483 }
01484
01485
01490 template<>
01491 inline const complex<double> NRVec<complex<double> >::amin() const {
01492 complex<double> ret(0., 0.);
01493 #ifdef CUDALA
01494 if(location == cpu){
01495 #endif
01496
01497 int index(0);
01498 double val(0.0), min_val(std::numeric_limits<double>::max());
01499 complex<double> z_val(0.0, 0.0);
01500
01501 for(register int i=0; i < nn; i++){
01502 z_val = v[i];
01503 val = std::abs(z_val.real()) + std::abs(z_val.imag());
01504 if(val < min_val){ index = i; min_val = val; }
01505 }
01506 ret = v[index];
01507 #ifdef CUDALA
01508 }else{
01509 const int pozice = cublasIzamin(nn, (cuDoubleComplex*)v, 1) - 1;
01510 TEST_CUBLAS("cublasIzamin");
01511 gpuget(1, sizeof(complex<double>), v + pozice, &ret);
01512 }
01513 #endif
01514 return ret;
01515 }
01516 }
01517
01518 #endif