LA library: vec.h Source File

00001 /* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
00002 /*******************************************************************************
00003     LA: linear algebra C++ interface library
00004     Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
00005                   complex versions written by Roman Curik <roman.curik@jh-inst.cas.cz>
00006 
00007     This program is free software: you can redistribute it and/or modify
00008     it under the terms of the GNU General Public License as published by
00009     the Free Software Foundation, either version 3 of the License, or
00010     (at your option) any later version.
00011 
00012     This program is distributed in the hope that it will be useful,
00013     but WITHOUT ANY WARRANTY; without even the implied warranty of
00014     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015     GNU General Public License for more details.
00016 
00017     You should have received a copy of the GNU General Public License
00018     along with this program.  If not, see <http://www.gnu.org/licenses/>.
00019 *******************************************************************************/
00020 #ifndef _LA_VEC_H_
00021 #define _LA_VEC_H_
00022 
00023 #include "la_traits.h"
00024 
00025 namespace LA {
00026 
00027 /***************************************************************************/
00030 template <typename T> void lawritemat(FILE *file, const T *a, int r, int c,
00031                 const char *form0, int nodim, int modulo, int issym);
00032 
00033 /***************************************************************************/
00036 const static complex<double> CONE = 1.0, CMONE = -1.0, CZERO = 0.0;
00037 #ifdef CUDALA
00038 const static cuDoubleComplex CUONE = {1.,0.}, CUMONE = {-1.,0.}, CUZERO = {0.,0.};
00039 #endif
00040 
00041 /***************************************************************************/
00045 #define NRVECMAT_OPER(E,X) \
00046 template<class T> \
00047         inline const NR##E<T> NR##E<T>::operator X(const T &a) const \
00048 { return NR##E(*this) X##= a; } \
00049         \
00050         template<class T> \
00051         inline const NR##E<T> operator X(const T &a, const NR##E<T> &rhs) \
00052 { return NR##E<T>(rhs) X##= a; }
00053 
00054 #define NRVECMAT_OPER2(E,X) \
00055 template<class T> \
00056 inline const NR##E<T> NR##E<T>::operator X(const NR##E<T> &a) const \
00057 { return NR##E(*this) X##= a; }
00058 
00059 
00060 /***************************************************************************/
00064 template <typename T>
00065 class NRVec {
00066 protected:
00067         int nn;
00068         T *v;
00069         int *count;
00070 #ifdef CUDALA
00071         GPUID location;
00072 #endif
00073 public:
00074         friend class NRSMat<T>;
00075         friend class NRMat<T>;
00076         template <typename U> friend NRVec<complex<U> > complexify(const NRVec<U>&);
00077 
00078         typedef T ROWTYPE;
00079 
00081         ~NRVec();
00082 
00083         /***************************************************************************/
00086         inline NRVec(): nn(0), v(0), count(0) {
00087                 #ifdef CUDALA
00088                 location = DEFAULT_LOC;
00089                 #endif
00090         };
00091 
00092         /***************************************************************************/
00098         explicit inline NRVec(const int n, const GPUID loc = undefined): nn(n), count(new int(1)) {
00099                 #ifdef CUDALA
00100                         location = (loc == undefined)?DEFAULT_LOC:loc;
00101                         if(location == cpu){
00102                 #endif
00103                                 v = new T[n];
00104                 #ifdef CUDALA
00105                         }else{
00106                                 v = (T*) gpualloc(n*sizeof(T));
00107                         }
00108                 #endif
00109         };
00110         
00112         inline NRVec(const T &a, const int n);
00113         
00115         inline NRVec(const T *a, const int n);
00116         
00118         inline NRVec(T *a, const int n, bool skeleton);
00119 
00121         inline NRVec(const NRVec &rhs);
00122 
00124         NRVec(const typename LA_traits_complex<T>::NRVec_Noncomplex_type &rhs, bool imagpart=false);//construct complex from real
00125 
00127         inline explicit NRVec(const NRSMat<T> & S);
00128         
00129         /***************************************************************************/
00132         #ifdef MATPTR
00133                 explicit NRVec(const NRMat<T> &rhs): NRVec(&rhs[0][0], rhs.nrows()*rhs.ncols()) {};
00134         #else
00135                 explicit NRVec(const NRMat<T> &rhs);
00136         #endif
00137 
00138         /***************************************************************************/
00143         #ifdef CUDALA
00144                 inline GPUID getlocation() const { return location; }
00145                 void moveto(const GPUID dest);
00146         #else
00147                 inline GPUID getlocation() const { return cpu; }
00148                 void moveto(const GPUID dest) {};
00149         #endif
00150 
00152         void copyonwrite();
00153 
00155         void clear() { copyonwrite(); LA_traits<T>::clear(v, nn); };
00156 
00158         NRVec& operator=(const NRVec &rhs);
00159 
00161         NRVec& operator=(const T &a);
00162         
00164         void randomize(const typename LA_traits<T>::normtype &x);
00165 
00167         NRVec& operator|=(const NRVec &rhs);
00168 
00170         const bool operator!=(const NRVec &rhs) const {if(nn!=rhs.nn) return 1; return LA_traits<T>::gencmp(v,rhs.v,nn);}
00171         const bool operator==(const NRVec &rhs) const {return !(*this != rhs);};
00172         const bool operator>(const NRVec &rhs) const;
00173         const bool operator<(const NRVec &rhs) const;
00174         const bool operator>=(const NRVec &rhs) const {return !(*this < rhs);};
00175         const bool operator<=(const NRVec &rhs) const {return !(*this > rhs);};
00176 
00178         const NRVec operator-() const;
00179         
00181         inline NRVec& operator+=(const NRVec &rhs);
00182         inline NRVec& operator-=(const NRVec &rhs);
00183         inline NRVec& operator*=(const NRVec &rhs);
00184         inline NRVec& operator/=(const NRVec &rhs);
00185 
00186         inline const NRVec operator+(const NRVec &rhs) const;
00187         inline const NRVec operator-(const NRVec &rhs) const;
00188 
00190         inline NRVec& operator+=(const T &a);
00191         inline NRVec& operator-=(const T &a);
00192         inline NRVec& operator*=(const T &a);
00193 
00194         inline const NRVec operator+(const T &a) const;
00195         inline const NRVec operator-(const T &a) const;
00196         inline const NRVec operator*(const T &a) const;
00197 
00198 
00200         inline int getcount() const {return count?*count:0;}
00201 
00203         inline const T operator*(const NRVec &rhs) const;
00204         inline const T dot(const NRVec &rhs) const {return *this * rhs;};
00205 
00207         inline const T dot(const T *a, const int stride = 1) const;
00208 
00209         void gemv(const T beta, const NRMat<T> &a, const char trans, const T alpha, const NRVec &x);
00210         void gemv(const T beta, const NRSMat<T> &a, const char trans , const T alpha, const NRVec &x);
00211         void gemv(const T beta, const SparseMat<T> &a, const char trans, const T alpha, const NRVec &x,const bool treat_as_symmetric = false);
00212 
00213         void gemv(      const typename LA_traits_complex<T>::Component_type beta, 
00214                         const typename LA_traits_complex<T>::NRMat_Noncomplex_type &a, 
00215                         const char trans,
00216                         const typename LA_traits_complex<T>::Component_type alpha,
00217                         const NRVec &x);
00218 
00219         void gemv(      const typename LA_traits_complex<T>::Component_type beta,
00220                         const typename LA_traits_complex<T>::NRSMat_Noncomplex_type &a,
00221                         const char trans,
00222                         const typename LA_traits_complex<T>::Component_type alpha, const NRVec &x);
00223 
00225         const NRVec operator*(const NRMat<T> &mat) const {
00226                 SAME_LOC(*this, mat);
00227 
00228                 NRVec<T> result(mat.ncols(), mat.getlocation());
00229                 result.gemv((T)0, mat, 't', (T)1, *this);
00230                 return result;
00231         };
00232 
00234         const NRVec operator*(const NRSMat<T> &mat) const {
00235                 SAME_LOC(*this, mat);
00236 
00237                 NRVec<T> result(mat.ncols(), mat.getlocation());
00238                 result.gemv((T)0, mat, 't', (T)1, *this);
00239                 return result;
00240         };
00241 
00243         const NRVec operator*(const SparseMat<T> &mat) const {
00244                 NOT_GPU(*this);
00245 
00246                 NRVec<T> result(mat.ncols());
00247                 result.gemv((T)0, mat, 't', (T)1, *this);
00248                 return result;
00249         };
00250         
00252         const NRMat<T> otimes(const NRVec<T> &rhs, const bool conjugate = false, const T &scale = 1) const;
00254         inline const NRMat<T> operator|(const NRVec<T> &rhs) const { return otimes(rhs,true); };
00255 
00257         inline const T sum() const {
00258                 T sum(0);
00259                 for(register int i=0; i<nn; i++){ sum += v[i];}
00260                 return sum;
00261         };
00262 
00264         inline const typename LA_traits<T>::normtype asum() const;
00265 
00267         inline T & operator[](const int i);
00268         inline const T & operator[](const int i) const;
00269 
00271         inline void setcoldim(int i) {};
00272 
00274         inline operator T*(); 
00276         inline operator const T*() const;
00277 
00279         void axpy(const T alpha, const NRVec &x);
00280         
00282         void axpy(const T alpha, const T *x, const int stride=1);
00283 
00285         inline int size() const;
00286 
00288         void resize(const int n);
00289 
00291         inline const typename LA_traits<T>::normtype norm() const;
00292 
00294         NRVec& normalize(typename LA_traits<T>::normtype* norm = 0);
00295 
00297         inline const NRVec unitvector() const;
00298 
00300         inline const T amax() const;
00302         inline const T amin() const;
00303 
00305         void fprintf(FILE *f, const char *format, const int modulo) const;
00307         void put(int fd, bool dimensions=1, bool transp=0) const;
00308         
00310         void fscanf(FILE *f, const char *format);
00312         void get(int fd, bool dimensions=1, bool transp=0);
00313 
00315         explicit NRVec(const SparseMat<T> &rhs);
00316         
00318         inline void simplify() {}; 
00319 
00321         bool bigger(int i, int j) const {
00322                 NOT_GPU(*this);
00323                 return LA_traits<T>::bigger(v[i], v[j]);
00324         };
00325 
00327         bool smaller(int i, int j) const {
00328                 NOT_GPU(*this);
00329                 return LA_traits<T>::smaller(v[i], v[j]);
00330         };
00331 
00333         void swap(int i, int j) {
00334                 const T tmp(v[i]);
00335                 v[i] = v[j];
00336                 v[j] = tmp;
00337         };
00338 
00340         int sort(int direction = 0, int from = 0, int to = -1, int *perm = NULL);
00341 
00343         NRVec& call_on_me(T (*_F)(const T &) ){
00344                 NOT_GPU(*this);
00345 
00346                 copyonwrite();
00347                 for(int i=0; i<nn; ++i) v[i] = _F(v[i]);
00348                 return *this;
00349         };
00350 };
00351 
00352 }//namespace
00353 
00354 //due to mutual includes this has to be after full class declaration
00355 #include "mat.h"
00356 #include "smat.h"
00357 #include "sparsemat.h"
00358 #include "sparsesmat.h"
00359 
00360 
00361 namespace LA {
00362 
00363 /***************************************************************************/
00369 template <typename T>
00370 std::ostream & operator<<(std::ostream &s, const NRVec<T> &x) {
00371 #ifdef CUDALA
00372         if(x.getlocation() == cpu){
00373 #endif
00374                 const int n = x.size();
00375                 s << n << std::endl;
00376                 for(register int i = 0; i<n; i++){
00377                         s << (typename LA_traits_io<T>::IOtype)x[i] << (i == n-1 ? '\n' : ' ');
00378                 }
00379                 return s;
00380 #ifdef CUDALA
00381         }else{
00382                 NRVec<T> tmp(x);
00383                 tmp.moveto(cpu);
00384                 return s << tmp;
00385         }
00386 #endif
00387 }
00388 
00389 /***************************************************************************/
00395 template <typename T>
00396 std::istream & operator>>(std::istream &s, NRVec<T> &x) {
00397 #ifdef CUDALA
00398         if(x.getlocation() == cpu){
00399 #endif
00400                 int i,n;
00401                 s >> n;
00402                 x.resize(n);
00403                 typename LA_traits_io<T>::IOtype tmp;
00404                 for(i=0; i<n; i++){
00405                         s >> tmp;
00406                         x[i] = tmp;
00407                 }
00408                 return s;
00409 #ifdef CUDALA
00410         }else{
00411                 NRVec<T> tmp;
00412                 tmp.moveto(cpu);
00413                 s >> tmp;
00414                 tmp.moveto(x.getlocation());
00415                 x = tmp;
00416                 return s;
00417         }
00418 #endif
00419 }
00420 
00421 
00422 /***************************************************************************/
00427 template <typename T>
00428 inline NRVec<T>::NRVec(const T& a, const int n): nn(n), count(new int) {
00429         *count = 1;
00430 #ifdef CUDALA
00431         location = DEFAULT_LOC;
00432         if(location == cpu){
00433 #endif
00434                 v = new T[n];
00435                 if(a != (T)0){
00436                         for(register int i=0; i<n; i++) v[i] = a;
00437                 }else{
00438                         memset(v, 0, nn*sizeof(T));
00439                 }
00440 #ifdef CUDALA
00441         }else{
00442                 v = (T*) gpualloc(n*sizeof(T));
00443                 smart_gpu_set(n, a, v);
00444         }
00445 #endif
00446 }
00447 
00448 
00449 /***************************************************************************/
00454 template <typename T>
00455 inline NRVec<T>::NRVec(const T *a, const int n): nn(n), count(new int) {
00456 #ifdef CUDALA
00457         location = DEFAULT_LOC;
00458         if(location == cpu) {
00459 #endif
00460                 v = new T[n];
00461                 *count = 1;
00462                 memcpy(v, a, n*sizeof(T));
00463 #ifdef CUDALA
00464         }else{
00465                 v = (T*) gpualloc(n*sizeof(T));
00466                 cublasSetVector(n, sizeof(T), a, 1, v, 1);
00467                 TEST_CUBLAS("cublasSetVector");
00468         }
00469 #endif
00470 
00471 }
00472 
00473 /***************************************************************************/
00480 template <typename T>
00481 inline NRVec<T>::NRVec(T *a, const int n, bool skeleton) : nn(n), count(new int) {
00482         if(!skeleton){
00483 #ifdef CUDALA
00484                 location = DEFAULT_LOC;
00485                 if(location == cpu){
00486 #endif
00487                         v = new T[n];
00488                         *count = 1;
00489                         memcpy(v, a, n*sizeof(T));
00490 #ifdef CUDALA
00491                 }else{
00492                         v= (T*) gpualloc(n*sizeof(T));
00493                         cublasSetVector(n, sizeof(T), a, 1, v, 1);
00494                         TEST_CUBLAS("cublasSetVector");
00495                 }
00496 #endif
00497         }else{
00498 #ifdef CUDALA
00499                 if(location != cpu) laerror("NRVec() with skeleton option cannot be on GPU");
00500 #endif
00501                 *count = 2;
00502                 v = a;
00503         }
00504 }
00505 
00506 /***************************************************************************/
00510 template <typename T>
00511 inline NRVec<T>::NRVec(const NRVec<T> &rhs) {
00512 #ifdef CUDALA
00513         location = rhs.location;
00514 #endif
00515         v = rhs.v;
00516         nn = rhs.nn;
00517         count = rhs.count;
00518         if(count) (*count)++;
00519 }
00520 
00521 /***************************************************************************/
00527 template <typename T>
00528 inline NRVec<T>::NRVec(const NRSMat<T> &rhs) {
00529 #ifdef CUDALA
00530         location = rhs.location;
00531 #endif
00532         nn = rhs.nn;
00534         nn = NN2;
00535         v = rhs.v;
00536         count = rhs.count;
00537         (*count)++;
00538 }
00539 
00540 /***************************************************************************/
00545 template <typename T>
00546 inline NRVec<T> & NRVec<T>::operator+=(const T &a) {
00547         NOT_GPU(*this);
00548 
00549         copyonwrite();
00550 
00551         if(a != (T)0){ for(register int i=0; i<nn; ++i) v[i] += a; }
00552         return *this;
00553 }
00554 
00555 
00556 /***************************************************************************/
00561 template <typename T>
00562 inline NRVec<T>& NRVec<T>::operator-=(const T &a) {
00563         NOT_GPU(*this);
00564 
00565         copyonwrite();
00566 
00567         if(a != (T)0){ for(register int i=0; i<nn; ++i) v[i] -= a; }
00568         return *this;
00569 }
00570 
00571 
00572 /***************************************************************************/
00578 template <typename T>
00579 inline NRVec<T>& NRVec<T>::operator+=(const NRVec<T> &rhs) {
00580 #ifdef DEBUG
00581         if (nn != rhs.nn) laerror("incompatible dimensions");
00582 #endif
00583         NOT_GPU(*this);
00584         NOT_GPU(rhs);
00585 
00586         copyonwrite();
00587 
00588         for(register int i=0; i<nn; ++i) v[i] += rhs.v[i];
00589         return *this;
00590 }
00591 
00592 /***************************************************************************/
00598 template <typename T>
00599 inline NRVec<T>& NRVec<T>::operator*=(const NRVec<T>& rhs) {
00600 #ifdef DEBUG
00601         if (nn != rhs.nn) laerror("incompatible dimensions");
00602 #endif
00603         NOT_GPU(*this);
00604         NOT_GPU(rhs);
00605         
00606         copyonwrite();
00607 
00608         for(register int i=0; i<nn; ++i) v[i] *= rhs.v[i];
00609         return *this;
00610 }
00611 
00612 /***************************************************************************/
00618 template <typename T>
00619 inline NRVec<T> & NRVec<T>::operator/=(const NRVec<T> &rhs) {
00620 #ifdef DEBUG
00621         if (nn != rhs.nn) laerror("incompatible dimensions");
00622 #endif
00623         NOT_GPU(*this);
00624         NOT_GPU(rhs);
00625 
00626         copyonwrite();
00627 
00628         for(register int i=0; i<nn; ++i) v[i] /= rhs.v[i];
00629         return *this;
00630 }
00631 
00632 
00633 /***************************************************************************/
00639 template <typename T>
00640 inline NRVec<T> & NRVec<T>::operator-=(const NRVec<T> &rhs) {
00641 #ifdef DEBUG
00642         if (nn != rhs.nn) laerror("incompatible dimensions");
00643 #endif
00644         NOT_GPU(*this);
00645         NOT_GPU(rhs);
00646 
00647         copyonwrite();
00648 
00649         for(register int i=0; i<nn; ++i) v[i] -= rhs.v[i];
00650         return *this;
00651 }
00652 
00653 
00654 /***************************************************************************/
00660 template <typename T>
00661 inline NRVec<T> & NRVec<T>::operator*=(const T &a) {
00662         NOT_GPU(*this);
00663         copyonwrite();
00664 
00665         for(register int i=0; i<nn; ++i) v[i] *= a;
00666         return *this;
00667 }
00668 
00669 /***************************************************************************/
00676 template<typename T>
00677 inline const T NRVec<T>::operator*(const NRVec<T> &rhs) const {
00678 #ifdef DEBUG
00679         if (nn != rhs.nn) laerror("incompatible dimensions");
00680 #endif
00681         NOT_GPU(*this);
00682         NOT_GPU(rhs);
00683 
00684         T dot(0);
00685         for(register int i=0; i<nn; ++i) dot += v[i]*rhs.v[i];
00686         return dot;
00687 }
00688 
00689 /***************************************************************************/
00695 template <typename T>
00696 inline T& NRVec<T>::operator[](const int i) {
00697 #ifdef DEBUG
00698         if(_LA_count_check && *count != 1) laerror("possible use of NRVec[] with count>1 as l-value");
00699         if(i < 0 || i >= nn) laerror("out of range");
00700         if(!v) laerror("unallocated NRVec");
00701 #endif
00702         NOT_GPU(*this);
00703 
00704         return v[i];
00705 }
00706 
00707 /***************************************************************************/
00713 template <typename T>
00714 inline const T& NRVec<T>::operator[](const int i) const {
00715 #ifdef DEBUG
00716         if(i < 0 || i >= nn) laerror("out of range");
00717         if(!v) laerror("unallocated NRVec");
00718 #endif
00719         NOT_GPU(*this);
00720 
00721         return v[i];
00722 }
00723 
00724 /***************************************************************************/
00728 template <typename T>
00729 inline int NRVec<T>::size() const {
00730         return nn;
00731 }
00732 
00733 /***************************************************************************/
00737 template <typename T>
00738 inline NRVec<T>::operator T*() {
00739 #ifdef DEBUG
00740         if(!v) laerror("unallocated NRVec");
00741 #endif
00742         return v;
00743 }
00744 
00745 /***************************************************************************/
00749 template <typename T>
00750 inline NRVec<T>::operator const T*() const {
00751 #ifdef DEBUG
00752         if(!v) laerror("unallocated NRVec");
00753 #endif
00754         return v;
00755 }
00756 
00757 
00758 /***************************************************************************/
00763 template <typename T>
00764 inline const NRVec<T> NRVec<T>::unitvector() const {
00765         return NRVec<T>(*this).normalize();
00766 }
00767 
00768 /***************************************************************************/
00771 NRVECMAT_OPER(Vec,+)
00772 NRVECMAT_OPER(Vec,-)
00773 NRVECMAT_OPER(Vec,*)
00774 
00775 /***************************************************************************/
00778 NRVECMAT_OPER2(Vec,+)
00779 NRVECMAT_OPER2(Vec,-)
00780 
00781 /***************************************************************************/
00785 template <typename T>
00786 NRVec<T>::~NRVec() {
00787         if(!count) return;
00788         if(--(*count) <= 0) {
00789                 if(v){
00790 #ifdef CUDALA
00791                         if(location == cpu){
00792 #endif
00793                                 delete[] v;
00794 #ifdef CUDALA
00795                         }else{ gpufree(v); }
00796 #endif
00797                 }
00798                 delete count;
00799         }
00800 }
00801 
00802 /***************************************************************************/
00805 template <typename T>
00806 void NRVec<T>::copyonwrite() {
00807         if(!count) laerror("copyonwrite of an undefined vector");
00808         if(*count > 1) {
00809                 (*count)--;
00810                  count = new int;
00811                 *count = 1;
00812                 T *newv;
00813 #ifdef CUDALA
00814                 if(location == cpu){
00815 #endif
00816                         newv = new T[nn];
00817                         memcpy(newv, v, nn*sizeof(T));
00818 #ifdef CUDALA
00819                 }else{
00820                         newv = (T *) gpualloc(nn*sizeof(T));
00821                         if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRVec<T>::copyonwrite()");
00822                         cublasScopy(nn*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
00823                         TEST_CUBLAS("cublasScopy");//"NRVec<T>::copyonwrite()"
00824                 }
00825 #endif
00826                 v = newv;
00827         }
00828 }
00829 
00830 
00831 /***************************************************************************/
00838 template <typename T>
00839 NRVec<T> & NRVec<T>::operator=(const NRVec<T> &rhs) {
00840         //check for self-assignment
00841         if(this != &rhs){
00842                 if(count){
00843                         if(--(*count) == 0){
00844 #ifdef CUDALA
00845                                 if(location == cpu){
00846 #endif
00847                                         delete[] v;
00848 #ifdef CUDALA
00849                                 }else{
00850                                         gpufree(v);
00851                                 }
00852 #endif
00853                                 delete count;
00854                         }
00855                 }
00856                 v = rhs.v;
00857                 nn = rhs.nn;
00858                 count = rhs.count;
00859 #ifdef CUDALA
00860                 location = rhs.location;
00861 #endif
00862                 if(count){ (*count)++; }
00863         }
00864         return *this;
00865 }
00866 
00867 
00868 /***************************************************************************/
00872 template <typename T>
00873 void NRVec<T>::resize(const int n) {
00874 #ifdef DEBUG
00875         if(n < 0) laerror("illegal dimension");
00876 #endif
00877         if(count){
00878                 if(n == 0){
00879                         if(--(*count) <= 0){
00880                                 if(v){
00881 #ifdef CUDALA
00882                                         if(location == cpu){
00883 #endif
00884                                                 delete[] (v);
00885 #ifdef CUDALA
00886                                         }else{
00887                                                 gpufree(v);
00888                                         }
00889 #endif
00890                                 }
00891                                 delete count;
00892                         }
00893                         count = 0;
00894                         nn = 0;
00895                         v = 0;
00896                         return;
00897                 }
00898                 if(*count > 1) {
00899                         (*count)--;
00900                         count = 0;
00901                         v = 0;
00902                         nn = 0;
00903                 }
00904         }
00905         if(!count){
00906                 count = new int;
00907                 *count = 1;
00908                 nn = n;
00909 #ifdef CUDALA
00910                 if(location == cpu)
00911 #endif
00912                         v = new T[nn];
00913 #ifdef CUDALA
00914                 else
00915                         v = (T*) gpualloc(nn*sizeof(T));
00916 #endif
00917                 return;
00918         }
00919         // *count = 1 in this branch
00920         if (n != nn) {
00921                 nn = n;
00922 #ifdef CUDALA
00923                 if(location == cpu){
00924 #endif
00925                 
00926                         delete[] v;
00927                         v = new T[nn];
00928 #ifdef CUDALA
00929                 }else{
00930                 
00931                         gpufree(v);
00932                         v = (T*) gpualloc(nn*sizeof(T));
00933                 }
00934 #endif
00935         }
00936 }
00937 
00938 
00939 /***************************************************************************/
00944 template <typename T>
00945 NRVec<T> & NRVec<T>::operator|=(const NRVec<T> &rhs) {
00946 #ifdef DEBUG
00947         if(!rhs.v) laerror("unallocated vector");
00948 #endif
00949         if(this == &rhs) return *this;
00950         *this = rhs;
00951         this->copyonwrite();
00952         return *this;
00953 }
00954 
00955 
00956 /***************************************************************************/
00962 template<typename T>
00963 NRVec<complex<T> > complexify(const NRVec<T> &rhs) {
00964         NOT_GPU(rhs);
00965 
00966         NRVec<complex<T> > r(rhs.size(), rhs.getlocation());
00967         for(register int i=0; i<rhs.size(); ++i)  r[i] = rhs[i];
00968         return r;
00969 }
00970 template<> NRVec<complex<double> > complexify<double>(const NRVec<double> &rhs);
00971 
00972 /***************************************************************************/
00977 #ifdef CUDALA
00978 template<typename T>
00979 void NRVec<T>::moveto(const GPUID dest) {
00980         if(location == dest) return;
00981 
00982         CPU_GPU(location, dest);
00983         location = dest;
00984 
00985         if(v && !count) laerror("internal");
00986         if (!count) return;
00987 
00988         if(v && *count == 0) laerror("internal");
00989         if(!v) return;
00990 
00991         T *vold = v;
00992 
00993         if(dest == cpu){ // moving from GPU to CPU
00994                 v = new T[nn];
00995                 gpuget(nn,sizeof(T),vold,v);
00996                 if(*count == 1) gpufree(vold);
00997                 else {--(*count); count = new int(1);}
00998 
00999         }else{ // moving from CPU to GPU
01000                 v = (T *) gpualloc(nn*sizeof(T));
01001                 gpuput(nn,sizeof(T),vold,v);
01002                 if(*count == 1) delete[] vold;
01003                 else {--(*count); count = new int(1);}
01004         }
01005 }
01006 #endif
01007 
01008 /***************************************************************************/
01014 template<>
01015 inline NRVec<double>& NRVec<double>::operator+=(const double &a) {
01016         copyonwrite();
01017 #ifdef CUDALA
01018         if(location == cpu){
01019 #endif
01020                 cblas_daxpy(nn, 1.0, &a, 0, v, 1);
01021 #ifdef CUDALA
01022         }else{
01023                 double *d = gpuputdouble(a);
01024                 cublasDaxpy(nn, 1.0, d, 0, v, 1);
01025                 TEST_CUBLAS("cublasDaxpy");
01026                 gpufree(d);
01027         }
01028 #endif
01029         return *this;
01030 }
01031 
01032 /***************************************************************************/
01038 template<>
01039 inline NRVec<complex<double> >& NRVec<complex<double> >::operator+=(const complex<double> &a) {
01040         copyonwrite();
01041 #ifdef CUDALA
01042         if(location == cpu){
01043 #endif
01044                 cblas_zaxpy(nn, &CONE, &a, 0, v, 1);
01045 #ifdef CUDALA
01046         }else{
01047                 complex<double> *d = gpuputcomplex(a);
01048                 cublasZaxpy(nn, CUONE, (cuDoubleComplex *)d, 0, (cuDoubleComplex *)v, 1);
01049                 TEST_CUBLAS("cublasZaxpy");
01050                 gpufree(d);
01051         }
01052 #endif
01053         return *this;
01054 }
01055 
01056 /***************************************************************************/
01062 template<>
01063 inline NRVec<double>& NRVec<double>::operator-=(const double &a) {
01064         copyonwrite();
01065 #ifdef CUDALA
01066         if(location == cpu){
01067 #endif
01068                 cblas_daxpy(nn, -1.0, &a, 0, v, 1);
01069 #ifdef CUDALA
01070         }else{
01071                 double *d = gpuputdouble(a);
01072                 cublasDaxpy(nn, -1.0, d, 0, v, 1);
01073                 TEST_CUBLAS("cublasDaxpy");
01074                 gpufree(d);
01075         }
01076 #endif
01077         return *this;
01078 }
01079 
01080 /***************************************************************************/
01086 template<>
01087 inline NRVec<complex<double> >& NRVec<complex<double> >::operator-=(const complex<double> &a) {
01088         copyonwrite();
01089 #ifdef CUDALA
01090         if(location == cpu){
01091 #endif
01092                 cblas_zaxpy(nn, &CMONE, &a, 0, v, 1);
01093 #ifdef CUDALA
01094         }else{
01095                 complex<double> *d = gpuputcomplex(a);
01096                 cublasZaxpy(nn, CUMONE, (cuDoubleComplex *)d, 0, (cuDoubleComplex *)v, 1);
01097                 TEST_CUBLAS("cublasZaxpy");
01098                 gpufree(d);
01099         }
01100 #endif
01101         return *this;
01102 }
01103 
01104 /***************************************************************************/
01110 template<>
01111 inline NRVec<double>& NRVec<double>::operator+=(const NRVec<double> &rhs) {
01112 #ifdef DEBUG
01113         if (nn != rhs.nn) laerror("incompatible dimensions");
01114 #endif
01115         SAME_LOC(*this, rhs);
01116         copyonwrite();
01117 #ifdef CUDALA
01118         if(location == cpu){
01119 #endif
01120                 cblas_daxpy(nn, 1.0, rhs.v, 1, v, 1);
01121 #ifdef CUDALA
01122         }else{
01123                 cublasDaxpy(nn, 1.0, rhs.v, 1, v, 1);
01124                 TEST_CUBLAS("cubasDaxpy");
01125         }
01126 #endif
01127         return *this;
01128 }
01129 
01130 /***************************************************************************/
01136 template<>
01137 inline NRVec<complex<double> >& NRVec<complex<double> >::operator+=(const NRVec<complex<double> > &rhs) {
01138 #ifdef DEBUG
01139         if (nn != rhs.nn) laerror("incompatible dimensions");
01140 #endif
01141         SAME_LOC(*this, rhs);
01142         copyonwrite();
01143 #ifdef CUDALA
01144         if(location == cpu){
01145 #endif
01146                 cblas_zaxpy(nn, &CONE, rhs.v, 1, v, 1);
01147 #ifdef CUDALA
01148         }else{
01149                 cublasZaxpy(nn, CUONE, (cuDoubleComplex*)rhs.v, 1, (cuDoubleComplex*)v, 1);
01150                 TEST_CUBLAS("cublasZaxpy");
01151         }
01152 #endif
01153         return *this;
01154 }
01155 
01156 /***************************************************************************/
01162 template<>
01163 inline NRVec<double> & NRVec<double>::operator-=(const NRVec<double> &rhs) {
01164 #ifdef DEBUG
01165         if (nn != rhs.nn) laerror("incompatible dimensions");
01166 #endif
01167         SAME_LOC(*this,rhs);
01168         copyonwrite();
01169 #ifdef CUDALA
01170         if(location == cpu){
01171 #endif
01172                 cblas_daxpy(nn, -1.0, rhs.v, 1, v, 1);
01173 #ifdef CUDALA
01174         }else{
01175                 cublasDaxpy(nn, -1.0, rhs.v, 1, v, 1);
01176                 TEST_CUBLAS("cubasDaxpy");
01177         }
01178 #endif
01179         return *this;
01180 }
01181 
01182 /***************************************************************************/
01188 template<>
01189 inline NRVec<complex<double> >& NRVec<complex<double> >::operator-=(const NRVec<complex<double> > &rhs) {
01190 #ifdef DEBUG
01191         if (nn != rhs.nn) laerror("incompatible dimensions");
01192 #endif
01193         SAME_LOC(*this, rhs);
01194         copyonwrite();
01195 #ifdef CUDALA
01196         if(location == cpu){
01197 #endif
01198                 cblas_zaxpy(nn, &CMONE, rhs.v, 1, v, 1);
01199 #ifdef CUDALA
01200         }else{
01201                 cublasZaxpy(nn, CUMONE, (cuDoubleComplex*)rhs.v, 1, (cuDoubleComplex*)v, 1);
01202                 TEST_CUBLAS("cublasZaxpy");
01203         }
01204 #endif
01205         return *this;
01206 }
01207 
01208 /***************************************************************************/
01214 template<>
01215 inline NRVec<double>& NRVec<double>::operator*=(const double &a) {
01216         copyonwrite();
01217 #ifdef CUDALA
01218         if(location == cpu){
01219 #endif
01220                 cblas_dscal(nn, a, v, 1);
01221 #ifdef CUDALA
01222         }else{
01223                 cublasDscal(nn, a, v, 1);
01224                 TEST_CUBLAS("cublasDscal");
01225         }
01226 #endif
01227         return *this;
01228 }
01229 
01230 /***************************************************************************/
01236 template<>
01237 inline NRVec<complex<double> >& NRVec<complex<double> >::operator*=(const complex<double> &a) {
01238         copyonwrite();
01239 #ifdef CUDALA
01240         if(location == cpu){
01241 #endif
01242                 cblas_zscal(nn, &a, v, 1);
01243 #ifdef CUDALA
01244         }else{
01245                 const cuDoubleComplex alpha = make_cuDoubleComplex(a.real(), a.imag());
01246                 cublasZscal(nn, alpha, (cuDoubleComplex*)v, 1);
01247                 TEST_CUBLAS("cublasZscal");
01248         }
01249 #endif
01250         return *this;
01251 }
01252 
01253 /***************************************************************************/
01258 template<>
01259 inline const double NRVec<double>::operator*(const NRVec<double> &rhs) const {
01260         double ret(0.0);
01261 #ifdef DEBUG
01262         if(nn != rhs.nn) laerror("incompatible dimensions");
01263 #endif
01264         SAME_LOC(*this, rhs);
01265 #ifdef CUDALA
01266         if(location == cpu){
01267 #endif
01268                 ret = cblas_ddot(nn, v, 1, rhs.v, 1);
01269 #ifdef CUDALA
01270         }else{
01271                 ret = cublasDdot(nn, v, 1, rhs.v, 1);
01272                 TEST_CUBLAS("cublasDdot");
01273         }
01274 #endif
01275         return ret;
01276 }
01277 
01278 /***************************************************************************/
01284 template<>
01285 inline const complex<double> NRVec<complex<double> >::operator*(const NRVec< complex<double> > &rhs) const {
01286 #ifdef DEBUG
01287         if(nn != rhs.nn) laerror("incompatible dimensions");
01288 #endif
01289         complex<double> dot;
01290         SAME_LOC(*this, rhs);
01291 #ifdef CUDALA
01292         if(location == cpu){
01293 #endif
01294                 cblas_zdotc_sub(nn, v, 1, rhs.v, 1, &dot);
01295 #ifdef CUDALA
01296         }else{
01297                 const cuDoubleComplex val = cublasZdotc(nn, (cuDoubleComplex*)v, 1, (cuDoubleComplex*)rhs.v, 1);
01298                 TEST_CUBLAS("cublasZdotc");
01299                 dot = complex<double>(cuCreal(val), cuCimag(val)); 
01300         }
01301 #endif
01302         return dot;
01303 }
01304 
01305 /***************************************************************************/
01311 template<>
01312 inline const double NRVec<double>::dot(const double *y, const int stride) const {
01313         NOT_GPU(*this);
01314         return cblas_ddot(nn, y, stride, v, 1);
01315 }
01316 
01317 /***************************************************************************/
01323 template<>
01324 inline const complex<double> NRVec<complex<double> >::dot(const complex<double> *y, const int stride) const {
01325         complex<double> dot;
01326         NOT_GPU(*this);
01327         cblas_zdotc_sub(nn, y, stride, v, 1, &dot);
01328         return dot;
01329 }
01330 
01331 /***************************************************************************/
01335 template<>
01336 inline const double NRVec<double>::asum() const {
01337         double ret(0.0);
01338 #ifdef CUDALA
01339         if(location == cpu){
01340 #endif
01341                 ret = cblas_dasum(nn, v, 1);
01342 #ifdef CUDALA
01343         }else{
01344                 ret = cublasDasum(nn, v, 1);
01345                 TEST_CUBLAS("cublasDasum");
01346         }
01347 #endif
01348         return ret;
01349 }
01350 
01351 
01352 /***************************************************************************/
01357 template<>
01358 inline const double NRVec<complex<double> >::asum() const {
01359         double ret(0.0);
01360 #ifdef CUDALA
01361         if(location == cpu){
01362 #endif
01363                 ret = cblas_dzasum(nn, v, 1);
01364 #ifdef CUDALA
01365         }else{
01366                 ret = cublasDzasum(nn, (cuDoubleComplex*)v, 1);
01367                 TEST_CUBLAS("cublasDzasum");
01368         }
01369 #endif
01370         return ret;
01371 }
01372 
01373 /***************************************************************************/
01377 template<>
01378 inline const double  NRVec<double>::norm() const {
01379         double ret(0.);
01380 #ifdef CUDALA
01381         if(location == cpu){
01382 #endif
01383                 ret = cblas_dnrm2(nn, v, 1);
01384 #ifdef CUDALA
01385         }else{
01386                 ret = cublasDnrm2(nn, v, 1);
01387                 TEST_CUBLAS("cublasDnrm2");
01388         }
01389 #endif
01390         return ret;
01391 }
01392 
01393 /***************************************************************************/
01397 template<>
01398 inline const double NRVec< complex<double> >::norm() const {
01399         double ret(0.);
01400 #ifdef CUDALA
01401         if(location == cpu){
01402 #endif
01403                 ret = cblas_dznrm2(nn, v, 1);
01404 #ifdef CUDALA
01405         }else{
01406                 ret = cublasDznrm2(nn, (cuDoubleComplex*)v, 1);
01407                 TEST_CUBLAS("cublasDzrm2");
01408         }
01409 #endif
01410         return ret;
01411 }
01412 
01413 /***************************************************************************/
01417 template<>
01418 inline const double NRVec<double>::amax() const {
01419         double ret(0.0);
01420 #ifdef CUDALA
01421         if(location == cpu){
01422 #endif
01423                 ret = v[cblas_idamax(nn, v, 1) - 1];
01424 #ifdef CUDALA
01425         }else{
01426                 const int pozice = cublasIdamax(nn, v, 1) - 1;
01427                 TEST_CUBLAS("cublasIdamax");
01428 
01429                 gpuget(1, sizeof(double), v + pozice, &ret);
01430         }
01431 #endif
01432         return ret;
01433 }
01434 
01435 /***************************************************************************/
01439 template<>
01440 inline const double NRVec<double>::amin() const {
01441         double ret(std::numeric_limits<double>::max());
01442 #ifdef CUDALA
01443         if(location == cpu){
01444 #endif
01445                 //BLAS routine idamin seems no to be supported
01446                 double val(0.0);
01447                 int index(-1);
01448                 for(register int i = 0; i < nn; i++){
01449                         val = std::abs(v[i]);
01450                         if(val < ret){ index = i; ret = val; }
01451                 }
01452                 ret = v[index];
01453 #ifdef CUDALA
01454         }else{
01455                 const int pozice = cublasIdamin(nn, v, 1) - 1;
01456                 TEST_CUBLAS("cublasIdamin");
01457                 gpuget(1, sizeof(double), v + pozice, &ret);
01458         }
01459 #endif
01460         return ret;
01461 }
01462 
01463 /***************************************************************************/
01468 template<>
01469 inline const complex<double> NRVec<complex<double> >::amax() const {
01470         complex<double> ret(0., 0.);
01471 #ifdef CUDALA
01472         if(location == cpu){
01473 #endif
01474                 ret = v[cblas_izamax(nn, v, 1) - 1];
01475 #ifdef CUDALA
01476         }else{
01477                 const int pozice = cublasIzamax(nn, (cuDoubleComplex*)v, 1) - 1;
01478                 TEST_CUBLAS("cublasIzamax");
01479                 gpuget(1, sizeof(complex<double>), v + pozice, &ret);
01480         }
01481 #endif
01482         return ret;
01483 }
01484 
01485 /***************************************************************************/
01490 template<>
01491 inline const complex<double> NRVec<complex<double> >::amin() const {
01492         complex<double> ret(0., 0.);
01493 #ifdef CUDALA
01494         if(location == cpu){
01495 #endif
01496                 // izamin seems not to be supported
01497                 int index(0);
01498                 double val(0.0), min_val(std::numeric_limits<double>::max());
01499                 complex<double> z_val(0.0, 0.0);
01500 
01501                 for(register int i=0; i < nn; i++){
01502                         z_val = v[i];
01503                         val = std::abs(z_val.real()) + std::abs(z_val.imag());
01504                         if(val < min_val){ index = i; min_val = val; }  
01505                 }
01506                 ret = v[index];
01507 #ifdef CUDALA
01508         }else{
01509                 const int pozice = cublasIzamin(nn, (cuDoubleComplex*)v, 1) - 1;
01510                 TEST_CUBLAS("cublasIzamin");
01511                 gpuget(1, sizeof(complex<double>), v + pozice, &ret);
01512         }
01513 #endif
01514         return ret;
01515 }
01516 }//namespace
01517 
01518 #endif /* _LA_VEC_H_ */