00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 #ifndef _LA_SMAT_H_
00024 #define _LA_SMAT_H_
00025 #include "la_traits.h"
00026 
00027 namespace LA {
00028 #define NN2 (nn*(nn+1)/2)
00029 
00030 
00031 
00037 template <class T>
00038 class NRSMat{
00039 protected:
00040         int nn;
00041         T *v;
00042         int *count;
00043 #ifdef CUDALA
00044        GPUID location;
00045 #endif
00046 public:
00047         friend class NRVec<T>;
00048         friend class NRMat<T>;
00049         
00050         ~NRSMat();
00051         
00053         inline NRSMat() : nn(0),v(0),count(0) {
00054         #ifdef CUDALA
00055                 location = DEFAULT_LOC;
00056         #endif
00057         };
00058 
00060         inline explicit NRSMat(const int n, const GPUID loc = undefined);
00061         
00063         inline NRSMat(const T &a, const int n);
00064 
00066         inline NRSMat(const T *a, const int n);
00067 
00069         inline NRSMat(const NRSMat &rhs);
00070 
00072         NRSMat(const typename LA_traits_complex<T>::NRSMat_Noncomplex_type &rhs, bool imagpart = false);
00073 
00075         explicit NRSMat(const NRMat<T> &rhs);
00076 
00078         explicit NRSMat(const NRVec<T> &rhs, const int n);
00079 
00081         NRSMat & operator=(const NRSMat &rhs);
00082 
00084         NRSMat & operator|=(const NRSMat &rhs);
00085 
00087         void randomize(const typename LA_traits<T>::normtype &x);
00088 
00090         NRSMat & operator=(const T &a);
00091 
00092 
00093         inline int getcount() const {return count?*count:0;}
00094 
00095         #ifdef CUDALA
00096                 inline GPUID getlocation() const {return location;}
00097                 void moveto(const GPUID dest);
00098         #else
00099                 inline GPUID getlocation() const {return cpu;}
00100                 void moveto(const GPUID dest) {};
00101         #endif
00102 
00104         const bool operator!=(const NRSMat &rhs) const {if(nn!=rhs.nn) return 1; return LA_traits<T>::gencmp(v,rhs.v,NN2);};
00106         const bool operator==(const NRSMat &rhs) const {return !(*this != rhs);};
00107 
00108         inline NRSMat & operator*=(const T &a);
00109         inline NRSMat & operator+=(const T &a);
00110         inline NRSMat & operator-=(const T &a);
00111         inline NRSMat & operator+=(const NRSMat &rhs);
00112         inline NRSMat & operator-=(const NRSMat &rhs);
00113         const NRSMat operator-() const;
00114 
00115         inline const NRSMat operator*(const T &a) const;
00116         inline const NRSMat operator+(const T &a) const;
00117         inline const NRSMat operator-(const T &a) const;
00118         inline const NRSMat operator+(const NRSMat &rhs) const;
00119         inline const NRSMat operator-(const NRSMat &rhs) const;
00120 
00121         inline const NRMat<T> operator+(const NRMat<T> &rhs) const;
00122         inline const NRMat<T> operator-(const NRMat<T> &rhs) const;
00123         const NRMat<T> operator*(const NRSMat &rhs) const;
00124         const NRMat<T> operator*(const NRMat<T> &rhs) const;
00125 
00126         const T dot(const NRSMat &rhs) const;
00127         const T dot(const NRVec<T> &rhs) const;
00128 
00129         const NRVec<T> operator*(const NRVec<T> &rhs) const {NRVec<T> result(nn,rhs.getlocation()); result.gemv((T)0,*this,'n',(T)1,rhs); return result;};
00130         const NRVec<complex<T> > operator*(const NRVec<complex<T> > &rhs) const {NRVec<complex<T> > result(nn,rhs.getlocation()); result.gemv((T)0,*this,'n',(T)1,rhs); return result;};
00131 
00132         const T* diagonalof(NRVec<T> &, const bool divide = 0, bool cache = false) const;
00133 
00134         void gemv(const T beta, NRVec<T> &r, const char trans, const T alpha, const NRVec<T> &x) const {r.gemv(beta,*this,trans,alpha,x);};
00135         void gemv(const T beta, NRVec<complex<T> > &r, const char trans, const T alpha, const NRVec<complex<T> > &x) const {r.gemv(beta,*this,trans,alpha,x);};
00136 
00137         inline const T& operator[](const int ij) const;
00138         inline T& operator[](const int ij);
00139 
00140         inline const T& operator()(const int i, const int j) const;
00141         inline T& operator()(const int i, const int j);
00142 
00143         inline int nrows() const;
00144         inline int ncols() const;
00145         inline int size() const;
00146 
00147         inline bool transp(const int i, const int j) const {return i>j;} 
00148         const typename LA_traits<T>::normtype norm(const T scalar = (T)0) const;
00149         void axpy(const T alpha, const NRSMat &x); 
00150 
00151         inline const T amax() const;
00152         inline const T amin() const;
00153 
00154         const T trace() const;
00155         void get(int fd, bool dimensions = 1, bool transp = 0);
00156         void put(int fd, bool dimensions = 1, bool transp = 0) const;
00157 
00158         void copyonwrite();
00159 
00160         void clear() {copyonwrite(); LA_traits<T>::clear(v,NN2);}; 
00161         void resize(const int n);
00162 
00163         inline operator T*();
00164         inline operator const T*() const;
00165         void fprintf(FILE *f, const char *format, const int modulo) const; 
00166         void fscanf(FILE *f, const char *format); 
00167 
00168         explicit NRSMat(const SparseMat<T> &rhs);               
00169         explicit NRSMat(const SparseSMat<T> &rhs);               
00170         inline void simplify() {}; 
00171         bool issymmetric() const {return 1;}
00172 };
00173 
00174 }
00175 
00176 #include "vec.h"
00177 #include "mat.h"
00178 
00179 namespace LA {
00180 
00181 
00187 template <typename T>
00188 inline NRSMat<T>::NRSMat(const int n, const GPUID loc): nn(n), count(new int(1)) {
00189 #ifdef CUDALA
00190         location = (loc == undefined?DEFAULT_LOC:loc);
00191         if(location == cpu){
00192 #endif
00193                 v = new T[NN2];
00194 #ifdef CUDALA
00195         }else{
00196                 v = (T*) gpualloc(NN2*sizeof(T));
00197         }
00198 #endif
00199 }
00200 
00201 
00207 template <typename T>
00208 inline NRSMat<T>::NRSMat(const T& a, const int n) : nn(n), count(new int(1)) {
00209 #ifdef CUDALA
00210         location = DEFAULT_LOC;
00211         if(location == cpu){
00212 #endif
00213                 v = new T[NN2];
00214                 if(a != (T)0) for(register int i = 0; i<NN2; i++) v[i] = a;
00215                 else memset(v, 0, NN2*sizeof(T));
00216 
00217 #ifdef CUDALA
00218         }else{
00219                 v = (T*) gpualloc(NN2*sizeof(T));
00220                 cublasSetVector(NN2, sizeof(T), &a, 0, v, 1);
00221         }
00222 #endif
00223 }
00224 
00225 
00231 template <typename T>
00232 inline NRSMat<T>::NRSMat(const T *a, const int n) : nn(n), count(new int(1)) {
00233 #ifdef CUDALA
00234         location = DEFAULT_LOC;
00235         if(location == cpu){
00236 #endif
00237                 memcpy(v, a, NN2*sizeof(T));
00238 #ifdef CUDALA
00239         }else{
00240                 v = (T*) gpualloc(NN2*sizeof(T));
00241                 cublasSetVector(NN2, sizeof(T), a, 1, v, 1);
00242         }
00243 #endif
00244 
00245 }
00246 
00247 
00252 template <typename T>
00253 inline NRSMat<T>::NRSMat(const NRSMat<T> &rhs) {
00254 #ifdef CUDALA
00255         location = rhs.location;
00256 #endif
00257         v = rhs.v;
00258         nn = rhs.nn;
00259         count = rhs.count;
00260         if(count) (*count)++;
00261 }
00262 
00263 
00269 template <typename T>
00270 NRSMat<T>::NRSMat(const NRVec<T> &rhs, const int n) {
00271 #ifdef CUDALA
00272         location = rhs.location;
00273 #endif
00274         nn = n;
00275 #ifdef DEBUG
00276         if(NN2 != rhs.size()){ laerror("incompatible dimensions in NRSMat<T>::NRSMat(const NRVec<T>&, const int)"); }
00277 #endif
00278         count = rhs.count;
00279         v = rhs.v;
00280         (*count)++;
00281 }
00282 
00283 
00284 
00289 template<>
00290 inline NRSMat<double> & NRSMat<double>::operator*=(const double &a) {
00291         copyonwrite();
00292 #ifdef CUDALA
00293         if(location == cpu){
00294 #endif
00295                 cblas_dscal(NN2, a, v, 1);
00296 #ifdef CUDALA
00297         }else{
00298                 cublasDscal(NN2, a, v, 1);
00299                 TEST_CUBLAS("cublasDscal");
00300         }
00301 #endif
00302         return *this;
00303 }
00304 
00305 
00310 template<>
00311 inline NRSMat<complex<double> > &
00312 NRSMat<complex<double> >::operator*=(const complex<double> &a) {
00313         copyonwrite();
00314 #ifdef CUDALA
00315         if(location == cpu){
00316 #endif
00317                 cblas_zscal(NN2, &a, v, 1);
00318 #ifdef CUDALA
00319         }else{
00320                 const cuDoubleComplex _a = make_cuDoubleComplex(a.real(), a.imag());
00321                 cublasZscal(NN2, _a, (cuDoubleComplex*)v, 1);
00322                 TEST_CUBLAS("cublasZscal");
00323         }
00324 #endif
00325         return *this;
00326 }
00327 
00328 
00329 
00335 template <typename T>
00336 inline NRSMat<T> & NRSMat<T>::operator*=(const T &a) {
00337         NOT_GPU(*this);
00338 
00339         copyonwrite();
00340         for(register int i = 0; i<NN2; ++i) v[i] *= a;
00341         return *this;
00342 }
00343 
00344 
00345 
00350 template <typename T>
00351 inline NRSMat<T> & NRSMat<T>::operator+=(const T &a) {
00352         NOT_GPU(*this);
00353 
00354         copyonwrite();
00355         for(register int i = 0; i<nn; i++) v[i*(i+1)/2 + i] += a;
00356         return *this;
00357 }
00358 
00359 
00365 template <typename T>
00366 inline NRSMat<T> & NRSMat<T>::operator-=(const T &a) {
00367         NOT_GPU(*this);
00368 
00369         copyonwrite();
00370         for(register int i = 0; i<nn; i++) v[i*(i+1)/2+i] -= a;
00371         return *this;
00372 }
00373 
00374 
00379 template<>
00380 inline NRSMat<double>& NRSMat<double>::operator+=(const NRSMat<double> & rhs) {
00381 #ifdef DEBUG
00382         if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat<double>& NRSMat<double>::operator+=(const NRSMat<double> &)");
00383 #endif
00384         SAME_LOC(*this, rhs);
00385         copyonwrite();
00386 
00387 #ifdef CUDALA
00388         if(location == cpu){
00389 #endif
00390                 cblas_daxpy(NN2, 1.0, rhs.v, 1, v, 1);
00391 #ifdef CUDALA
00392         }else{
00393                 cublasDaxpy(NN2, 1.0, rhs.v, 1, v, 1);
00394                 TEST_CUBLAS("cublasDaxpy");
00395         }
00396 #endif
00397         return *this;
00398 }
00399 
00400 
00405 template<>
00406 inline NRSMat<complex<double> >& NRSMat<complex<double> >::operator+=(const NRSMat<complex<double> > & rhs) {
00407 #ifdef DEBUG
00408         if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat<complex<double> >& NRSMat<complex<double> >::operator+=(const NRSMat<complex<double> > &)");
00409 #endif
00410         SAME_LOC(*this, rhs);
00411         copyonwrite();
00412 
00413 #ifdef CUDALA
00414         if(location == cpu){
00415 #endif
00416                 cblas_zaxpy(NN2, &CONE, rhs.v, 1, v, 1);
00417 #ifdef CUDALA
00418         }else{
00419                 cublasZaxpy(NN2, CUONE, (cuDoubleComplex*)(rhs.v), 1, (cuDoubleComplex*)v, 1);
00420                 TEST_CUBLAS("cublasZaxpy");
00421         }
00422 #endif
00423         return *this;
00424 }
00425 
00426 
00431 template <typename T>
00432 inline NRSMat<T>& NRSMat<T>::operator+=(const NRSMat<T>& rhs) {
00433 #ifdef DEBUG
00434         if(nn != rhs.nn) laerror("incompatible NRSMat<T>& NRSMat<T>::operator+=(const NRSMat<T> &)");
00435 #endif
00436         NOT_GPU(*this);
00437         SAME_LOC(*this, rhs);
00438 
00439         copyonwrite();
00440         for(register int i = 0; i<NN2; ++i) v[i] += rhs.v[i];
00441         return *this;
00442 }
00443 
00444 
00449 template<>
00450 inline NRSMat<double>& NRSMat<double>::operator-=(const NRSMat<double>& rhs) {
00451 #ifdef DEBUG
00452         if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat<double>& NRSMat<double>::operator-=(const NRSMat<double> &)");
00453 #endif
00454         SAME_LOC(*this, rhs);
00455         copyonwrite();
00456 
00457 #ifdef CUDALA
00458         if(location == cpu){
00459 #endif
00460                 cblas_daxpy(NN2, -1.0, rhs.v, 1, v, 1);
00461 #ifdef CUDALA
00462         }else{
00463                 cublasDaxpy(NN2, -1.0, rhs.v, 1, v, 1);
00464                 TEST_CUBLAS("cublasDaxpy");
00465         }
00466 #endif
00467         return *this;
00468 }
00469 
00470 
00475 template<>
00476 inline NRSMat<complex<double> >& NRSMat<complex<double> >::operator-=(const NRSMat<complex<double> >& rhs) {
00477 #ifdef DEBUG
00478         if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat<complex<double> >& NRSMat<complex<double> >::operator-=(const NRSMat<complex<double> > &)");
00479 #endif
00480         SAME_LOC(*this, rhs);
00481         copyonwrite();
00482 
00483 #ifdef CUDALA
00484         if(location == cpu){
00485 #endif
00486                 cblas_zaxpy(NN2, &CMONE, rhs.v, 1, v, 1);
00487 #ifdef CUDALA
00488         }else{
00489                 cublasZaxpy(NN2, CUMONE, (cuDoubleComplex*)(rhs.v), 1, (cuDoubleComplex*)v, 1);
00490                 TEST_CUBLAS("cublasZaxpy");
00491         }
00492 #endif
00493         return *this;
00494 }
00495 
00496 
00501 template <typename T>
00502 inline NRSMat<T>& NRSMat<T>::operator-=(const NRSMat<T>& rhs) {
00503 #ifdef DEBUG
00504         if(nn != rhs.nn) laerror("incompatible NRSMat<T>& NRSMat<T>::operator-=(const NRSMat<T> &)");
00505 #endif
00506         NOT_GPU(*this);
00507         copyonwrite();
00508 
00509         for(register int i = 0; i<NN2; ++i) v[i] -= rhs.v[i];
00510         return *this;
00511 }
00512 
00513 
00514 
00519 template <typename T>
00520 inline const NRMat<T> NRSMat<T>::operator+(const NRMat<T> &rhs) const {
00521         return NRMat<T>(rhs) += *this;
00522 }
00523 
00524 
00529 template <typename T>
00530 inline const NRMat<T> NRSMat<T>::operator-(const NRMat<T> &rhs) const {
00531         return NRMat<T>(-rhs) += *this;
00532 }
00533 
00534 
00541 template <typename T>
00542 inline T& NRSMat<T>::operator[](const int ij) {
00543 #ifdef DEBUG
00544         if(_LA_count_check && *count != 1) laerror("T& NRSMat<T>::operator[] used for matrix with count>1");
00545         if(ij<0 || ij>=NN2) laerror("T& NRSMat<T>::operator[] out of range");
00546         if(!v) laerror("T& NRSMat<T>::operator[] used for unallocated NRSmat<T> object");
00547 #endif
00548         NOT_GPU(*this);
00549 
00550         return v[ij];
00551 }
00552 
00553 
00561 template <typename T>
00562 inline const T & NRSMat<T>::operator[](const int ij) const {
00563 #ifdef DEBUG
00564         if(ij<0 || ij>=NN2) laerror("T& NRSMat<T>::operator[] out of range");
00565         if(!v) laerror("T& NRSMat<T>::operator[] used for unallocated NRSmat<T> object");
00566 #endif
00567         NOT_GPU(*this);
00568 
00569         return v[ij];
00570 }
00571 
00572 
00579 template<typename T>
00580 inline T SMat_index(T i, T j) {
00581         return (i>=j) ? i*(i+1)/2+j : j*(j+1)/2+i;
00582 }
00583 
00584 
00591 template<typename T>
00592 inline T SMat_index_igej(T i, T j) {
00593         return i*(i+1)/2+j;
00594 }
00595 
00596 
00603 template<typename T>
00604 inline T SMat_index_ilej(T i, T j) {
00605         return j*(j+1)/2+i;
00606 }
00607 
00608 
00615 template<typename T>
00616 inline T SMat_index_1(T i, T j) {
00617         return (i>=j)? i*(i-1)/2+j-1 : j*(j-1)/2+i-1;
00618 }
00619 
00620 
00627 template<typename T>
00628 inline T SMat_index_1igej(T i, T j) {
00629         return i*(i-1)/2+j-1;
00630 }
00631 
00632 
00639 template<typename T>
00640 inline T SMat_index_1ilej(T i, T j) {
00641         return j*(j-1)/2+i-1;
00642 }
00643 
00644 
00645 
00646 template<typename T>
00647 inline T ASMat_index(T i, T j)
00648 {
00649 if(i == j) return -1;
00650 return (i>j) ? i*(i-1)/2+j : j*(j-1)/2+i;
00651 }
00652 
00653 template<typename T>
00654 inline T ASMat_index_1(T i, T j)
00655 {
00656 if(i == j) return -1;
00657 return (i>j)? (i-2)*(i-1)/2+j-1 : (j-2)*(j-1)/2+i-1;
00658 }
00659 
00660 
00667 template <typename T>
00668 inline T & NRSMat<T>::operator()(const int i, const int j) {
00669 #ifdef DEBUG
00670         if(_LA_count_check && *count != 1) laerror("T & NRSMat<T>::operator()(const int, const int) used for matrix with count > 1");
00671         if(i<0 || i>=nn || j<0 || j>=nn) laerror("T & NRSMat<T>::operator()(const int, const int) out of range");
00672         if(!v) laerror("T & NRSMat<T>::operator()(const int, const int) used for unallocated NRSmat<T> object");
00673 #endif
00674         NOT_GPU(*this);
00675 
00676         return v[SMat_index(i,j)];
00677 }
00678 
00679 
00686 template <typename T>
00687 inline const T & NRSMat<T>::operator()(const int i, const int j) const {
00688 #ifdef DEBUG
00689         if(i<0 || i>=nn || j<0 || j>=nn) laerror("T & NRSMat<T>::operator()(const int, const int) out of range");
00690         if(!v) laerror("T & NRSMat<T>::operator()(const int, const int) used for unallocated NRSmat<T> object");
00691 #endif
00692         NOT_GPU(*this);
00693 
00694         return v[SMat_index(i,j)];
00695 }
00696 
00697 
00700 template <typename T>
00701 inline int NRSMat<T>::nrows() const {
00702         return nn;
00703 }
00704 
00705 
00708 template <typename T>
00709 inline int NRSMat<T>::ncols() const {
00710         return nn;
00711 }
00712 
00713 
00716 template <typename T>
00717 inline int NRSMat<T>::size() const {
00718         return NN2;
00719 }
00720 
00721 
00722 
00727 template<>
00728 inline const double NRSMat<double>::amax() const {
00729         double ret(0.0);
00730 #ifdef CUDALA
00731         if(location == cpu){
00732 #endif
00733                 ret = v[cblas_idamax(NN2, v, 1) - 1];
00734 #ifdef CUDALA
00735         }else{
00736                 const int pozice = cublasIdamax(NN2, v, 1) - 1;
00737                 TEST_CUBLAS("cublasIdamax");
00738 
00739                 gpuget(1, sizeof(double), v + pozice, &ret);
00740         }
00741 #endif
00742         return ret;
00743 }
00744 
00745 
00750 template<>
00751 inline const double NRSMat<double>::amin() const {
00752         double ret(0.0);
00753 #ifdef CUDALA
00754         if(location == cpu){
00755 #endif
00756                 
00757                 double val(0.0);
00758                 int index(-1);
00759                 ret = std::numeric_limits<double>::max();
00760                 for(register int i = 0; i < NN2; i++){
00761                         val = std::abs(v[i]);
00762                         if(val < ret){ index = i; ret = val; }
00763                 }
00764                 ret = v[index];
00765 #ifdef CUDALA
00766         }else{
00767                 const int pozice = cublasIdamin(nn, v, 1) - 1;
00768                 TEST_CUBLAS("cublasIdamin");
00769                 gpuget(1, sizeof(double), v + pozice, &ret);
00770         }
00771 #endif
00772         return ret;
00773 }
00774 
00775 
00780 template<>
00781 inline const complex<double> NRSMat< complex<double> >::amax() const{
00782         complex<double> ret(0., 0.);
00783 #ifdef CUDALA
00784         if(location == cpu){
00785 #endif
00786                 ret = v[cblas_izamax(NN2, v, 1) - 1];
00787 #ifdef CUDALA
00788         }else{
00789                 const int pozice = cublasIzamax(NN2, (cuDoubleComplex*)v, 1) - 1;
00790                 TEST_CUBLAS("cublasIzamax");
00791                 gpuget(1, sizeof(complex<double>), v + pozice, &ret);
00792         }
00793 #endif
00794         return ret;
00795 }
00796 
00797 
00802 template<>
00803 inline const complex<double> NRSMat<complex<double> >::amin() const{
00804         complex<double> ret(0., 0.);
00805 #ifdef CUDALA
00806         if(location == cpu){
00807 #endif
00808                 
00809                 int index(0);
00810                 double val(0.0), min_val(0.0);
00811                 complex<double> z_val(0.0, 0.0);
00812 
00813                 min_val = std::numeric_limits<double>::max();
00814                 for(register int i = 0; i < NN2; i++){
00815                         z_val = v[i];
00816                         val = std::abs(z_val.real()) + std::abs(z_val.imag());
00817                         if(val < min_val){ index = i; min_val = val; }  
00818                 }
00819                 ret = v[index];
00820 #ifdef CUDALA
00821         }else{
00822                 const int pozice = cublasIzamin(nn, (cuDoubleComplex*)v, 1) - 1;
00823                 TEST_CUBLAS("cublasIzamin");
00824                 gpuget(1, sizeof(complex<double>), v + pozice, &ret);
00825         }
00826 #endif
00827         return ret;
00828 }
00829 
00830 
00833 template <typename T>
00834 inline NRSMat<T>::operator T*() {
00835 #ifdef DEBUG
00836         if(!v) laerror("unallocated NRSMat object in NRSMat<T>::operator T*()");
00837 #endif
00838         return v;
00839 }
00840 
00841 
00844 template <typename T>
00845 inline NRSMat<T>::operator const T*() const {
00846 #ifdef DEBUG
00847         if(!v) laerror("unallocated NRSMat object in NRSMat<T>::operator const T*()");
00848 #endif
00849         return v;
00850 }
00851 
00852 
00856 template <typename T>
00857 NRSMat<T>::~NRSMat() {
00858         if(!count) return;
00859         if(--(*count) <= 0) {
00860                 if(v){
00861 #ifdef CUDALA
00862                         if(location == cpu){
00863 #endif
00864                                 delete[] v;
00865 #ifdef CUDALA
00866                         }else{ gpufree(v); }
00867 #endif
00868                 }
00869                 delete count;
00870         }
00871 }
00872 
00873 
00877 template <typename T>
00878 NRSMat<T> & NRSMat<T>::operator|=(const NRSMat<T> &rhs) {
00879 #ifdef DEBUG
00880         if(!rhs.v) laerror("unallocated NRSMat<T> object in NRSMat<T> & NRSMat<T>::operator|=(const NRSMat<T> &)");
00881 #endif
00882         if(this == &rhs) return *this;
00883         *this = rhs;
00884         this->copyonwrite();
00885         return *this;
00886 }
00887 
00888 
00889 
00893 template <typename T>
00894 NRSMat<T> & NRSMat<T>::operator=(const NRSMat<T> & rhs) {
00895         if(this == &rhs) return *this;
00896         if(count)
00897                 if(--(*count) == 0){
00898 #ifdef CUDALA
00899                         if(location == cpu){
00900 #endif
00901                                 delete [] v;
00902 #ifdef CUDALA
00903                         }else{ gpufree(v); }
00904 #endif
00905                         delete count;
00906                 }
00907         v = rhs.v;
00908         nn = rhs.nn;
00909         count = rhs.count;
00910 #ifdef CUDALA
00911         location = rhs.location;
00912 #endif
00913         if(count) (*count)++;
00914         return *this;
00915 }
00916 
00917 
00921 template <typename T>
00922 void NRSMat<T>::copyonwrite() {
00923         if(!count) laerror("calling NRSMat<T>::copyonwrite() for undefined NRSMat<T> object");
00924         if(*count > 1){
00925                 (*count)--;
00926                 count = new int;
00927                 *count = 1;
00928                 T *newv;
00929 #ifdef CUDALA
00930                 if(location == cpu) {
00931 #endif
00932                         newv = new T[NN2];
00933                         memcpy(newv, v, NN2*sizeof(T));
00934 #ifdef CUDALA
00935                 }else{
00936                         newv = (T *) gpualloc(NN2*sizeof(T));
00937                         if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRSMat<T>::copyonwrite()");
00938                         cublasScopy(NN2*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
00939                         TEST_CUBLAS("cublasScopy");
00940                 }
00941 #endif
00942 
00943                  v = newv;
00944         }
00945 }
00946 
00947 
00951 template <typename T>
00952 void NRSMat<T>::resize(const int n) {
00953 #ifdef DEBUG
00954         if(n < 0) laerror("illegal dimension in NRSMat<T>::resize(const int)");
00955 #endif
00956         if(count){
00957                 if(n == 0){
00958                         if(--(*count) <= 0) {
00959                                 if(v) {
00960 #ifdef CUDALA
00961                                         if(location == cpu){
00962 #endif
00963                                                 delete[] (v);
00964 #ifdef CUDALA
00965                                         }else{ gpufree(v); }
00966 #endif
00967                                 }
00968                                 delete count;
00969                         }
00970                         count = 0;
00971                         nn = 0;
00972                         v = 0;
00973                         return;
00974                 }
00975                 if(*count > 1){
00976                         (*count)--;
00977                         count = 0;
00978                         v = 0;
00979                         nn = 0;
00980                 }
00981         }
00982         if(!count){
00983                 count = new int;
00984                 *count = 1;
00985                 nn = n;
00986 #ifdef CUDALA
00987                 if(location == cpu){
00988 #endif
00989                         v = new T[NN2];
00990 #ifdef CUDALA
00991                 }else{ v = (T*) gpualloc(NN2*sizeof(T)); }
00992 #endif
00993 
00994                 return;
00995         }
00996         if(n != nn){
00997                 nn = n;
00998 #ifdef CUDALA
00999                 if(location == cpu){
01000 #endif
01001                         delete[] v;
01002                         v = new T[NN2];
01003 #ifdef CUDALA
01004                 }else{
01005 
01006                         gpufree(v);
01007                         v = (T*) gpualloc(NN2*sizeof(T));
01008                 }
01009 #endif
01010 
01011         }
01012 }
01013 
01014 
01019 #ifdef CUDALA
01020 template<typename T>
01021 void NRSMat<T>::moveto(const GPUID dest) {
01022         if(location == dest) return;
01023 
01024         CPU_GPU(location,dest);
01025         location = dest;
01026 
01027         if(v && !count) laerror("internal inconsistency of reference counting 1");
01028         if(!count) return;
01029 
01030         if(v && *count == 0) laerror("internal inconsistency of reference counting 2");
01031         if(!v) return;
01032 
01033         T *vold = v;
01034 
01035         if(dest == cpu){ 
01036                 v = new T[NN2];
01037                 gpuget(NN2, sizeof(T), vold, v);
01038                 if(*count == 1) gpufree(vold);
01039                 else {--(*count); count = new int(1);}
01040 
01041         }else{    
01042 
01043                 v = (T *) gpualloc(NN2*sizeof(T));
01044                 gpuput(NN2, sizeof(T), vold, v);
01045                 if(*count == 1) delete[] vold;
01046                 else {--(*count); count = new int(1);}
01047         }
01048 }
01049 #endif
01050 
01051 
01052 
01057 template<typename T>
01058 NRSMat<complex<T> > complexify(const NRSMat<T> &rhs) {
01059         NOT_GPU(rhs);
01060 
01061         NRSMat<complex<T> > r(rhs.nrows());
01062         for(register int i = 0; i<rhs.nrows(); ++i) 
01063                 for(register int j = 0; j<=i; ++j) r(i,j) = rhs(i,j);
01064         return r;
01065 }
01066 
01067 
01068 
01073 
01074 
01075 
01076 
01077 
01078 
01079 
01080 
01081 
01082 
01083 
01084 
01085 
01086 
01087 
01088 
01089 
01090 
01096 template <typename T>
01097 std::ostream& operator<<(std::ostream &s, const NRSMat<T> &x) {
01098 #ifdef CUDALA
01099         if(x.getlocation() == cpu){
01100 #endif
01101                 int i,j,n;
01102                 n = x.nrows();
01103                 s << n << ' ' << n << '\n';
01104                 for(i = 0;i<n;i++){
01105                         for(j = 0; j<n;j++) s << (typename LA_traits_io<T>::IOtype)x(i,j) << (j == n-1 ? '\n' : ' ');
01106                 }
01107                 return s;
01108 #ifdef CUDALA
01109         }else{
01110                 NRSMat<T> tmp = x;
01111                 tmp.moveto(cpu);
01112                 return s<<tmp;
01113         }
01114 #endif
01115 }
01116 
01117 
01118 
01124 template <typename T>
01125 std::istream& operator>>(std::istream  &s, NRSMat<T> &x) {
01126 #ifdef CUDALA
01127         if(x.getlocation() == cpu){
01128 #endif
01129                 int i,j,n,m;
01130                 s >> n >> m;
01131                 if(n!=m) laerror("input symmetric matrix not square");
01132                 x.resize(n);
01133                 typename LA_traits_io<T>::IOtype tmp;
01134                 for(i = 0;i<n;i++) for(j = 0; j<m;j++) {s>>tmp; x(i,j)=tmp;}
01135                 return s;
01136 #ifdef CUDALA
01137         }else{
01138                 NRSMat<T> tmp;
01139                 tmp.moveto(cpu);
01140                 s >> tmp;
01141                 tmp.moveto(x.getlocation());
01142                 x = tmp;
01143                 return s;
01144         }
01145 #endif
01146 }
01147 
01148 
01149 
01153 NRVECMAT_OPER(SMat,+)
01154 NRVECMAT_OPER(SMat,-)
01155 NRVECMAT_OPER(SMat,*)
01156 
01157 
01161 NRVECMAT_OPER2(SMat,+)
01162 NRVECMAT_OPER2(SMat,-)
01163 
01164 
01168 template<typename T>
01169 class NRSMat_from1 : public NRSMat<T> {
01170 public:
01171         NRSMat_from1(): NRSMat<T>() {};
01172         explicit NRSMat_from1(const int n): NRSMat<T>(n) {};
01173         NRSMat_from1(const NRSMat<T> &rhs): NRSMat<T>(rhs) {}; 
01174         NRSMat_from1(const T &a, const int n): NRSMat<T>(a,n) {}; 
01175         NRSMat_from1(const T *a, const int n): NRSMat<T>(a,n) {}; 
01176         explicit NRSMat_from1(const NRMat<T> &rhs): NRSMat<T>(rhs) {};
01177         explicit NRSMat_from1(const NRVec<T> &rhs, const int n): NRSMat<T>(rhs,n) {};
01178 
01179         inline const T& operator() (const int i, const int j) const {
01180         #ifdef DEBUG
01181                 if(i<=0||j<=0||i>NRSMat<T>::nn||j>NRSMat<T>::nn) laerror("index in const T& NRSMat<T>::operator() (const int, const int) out of range");
01182         #endif
01183                 return NRSMat<T>::v[SMat_index_1(i,j)];
01184         }
01185         
01186         inline  T& operator() (const int i, const int j){
01187         #ifdef DEBUG
01188                 if(i<=0||j<=0||i>NRSMat<T>::nn||j>NRSMat<T>::nn) laerror("index in T& NRSMat<T>::operator() (const int, const int) out of range");
01189         #endif
01190                 return NRSMat<T>::v[SMat_index_1(i,j)];
01191         }
01192 };
01193 
01194 }
01195 #endif