00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef _LA_SMAT_H_
00024 #define _LA_SMAT_H_
00025 #include "la_traits.h"
00026
00027 namespace LA {
00028 #define NN2 (nn*(nn+1)/2)
00029
00030
00031
00037 template <class T>
00038 class NRSMat{
00039 protected:
00040 int nn;
00041 T *v;
00042 int *count;
00043 #ifdef CUDALA
00044 GPUID location;
00045 #endif
00046 public:
00047 friend class NRVec<T>;
00048 friend class NRMat<T>;
00049
00050 ~NRSMat();
00051
00053 inline NRSMat() : nn(0),v(0),count(0) {
00054 #ifdef CUDALA
00055 location = DEFAULT_LOC;
00056 #endif
00057 };
00058
00060 inline explicit NRSMat(const int n, const GPUID loc = undefined);
00061
00063 inline NRSMat(const T &a, const int n);
00064
00066 inline NRSMat(const T *a, const int n);
00067
00069 inline NRSMat(const NRSMat &rhs);
00070
00072 NRSMat(const typename LA_traits_complex<T>::NRSMat_Noncomplex_type &rhs, bool imagpart = false);
00073
00075 explicit NRSMat(const NRMat<T> &rhs);
00076
00078 explicit NRSMat(const NRVec<T> &rhs, const int n);
00079
00081 NRSMat & operator=(const NRSMat &rhs);
00082
00084 NRSMat & operator|=(const NRSMat &rhs);
00085
00087 void randomize(const typename LA_traits<T>::normtype &x);
00088
00090 NRSMat & operator=(const T &a);
00091
00092
00093 inline int getcount() const {return count?*count:0;}
00094
00095 #ifdef CUDALA
00096 inline GPUID getlocation() const {return location;}
00097 void moveto(const GPUID dest);
00098 #else
00099 inline GPUID getlocation() const {return cpu;}
00100 void moveto(const GPUID dest) {};
00101 #endif
00102
00104 const bool operator!=(const NRSMat &rhs) const {if(nn!=rhs.nn) return 1; return LA_traits<T>::gencmp(v,rhs.v,NN2);};
00106 const bool operator==(const NRSMat &rhs) const {return !(*this != rhs);};
00107
00108 inline NRSMat & operator*=(const T &a);
00109 inline NRSMat & operator+=(const T &a);
00110 inline NRSMat & operator-=(const T &a);
00111 inline NRSMat & operator+=(const NRSMat &rhs);
00112 inline NRSMat & operator-=(const NRSMat &rhs);
00113 const NRSMat operator-() const;
00114
00115 inline const NRSMat operator*(const T &a) const;
00116 inline const NRSMat operator+(const T &a) const;
00117 inline const NRSMat operator-(const T &a) const;
00118 inline const NRSMat operator+(const NRSMat &rhs) const;
00119 inline const NRSMat operator-(const NRSMat &rhs) const;
00120
00121 inline const NRMat<T> operator+(const NRMat<T> &rhs) const;
00122 inline const NRMat<T> operator-(const NRMat<T> &rhs) const;
00123 const NRMat<T> operator*(const NRSMat &rhs) const;
00124 const NRMat<T> operator*(const NRMat<T> &rhs) const;
00125
00126 const T dot(const NRSMat &rhs) const;
00127 const T dot(const NRVec<T> &rhs) const;
00128
00129 const NRVec<T> operator*(const NRVec<T> &rhs) const {NRVec<T> result(nn,rhs.getlocation()); result.gemv((T)0,*this,'n',(T)1,rhs); return result;};
00130 const NRVec<complex<T> > operator*(const NRVec<complex<T> > &rhs) const {NRVec<complex<T> > result(nn,rhs.getlocation()); result.gemv((T)0,*this,'n',(T)1,rhs); return result;};
00131
00132 const T* diagonalof(NRVec<T> &, const bool divide = 0, bool cache = false) const;
00133
00134 void gemv(const T beta, NRVec<T> &r, const char trans, const T alpha, const NRVec<T> &x) const {r.gemv(beta,*this,trans,alpha,x);};
00135 void gemv(const T beta, NRVec<complex<T> > &r, const char trans, const T alpha, const NRVec<complex<T> > &x) const {r.gemv(beta,*this,trans,alpha,x);};
00136
00137 inline const T& operator[](const int ij) const;
00138 inline T& operator[](const int ij);
00139
00140 inline const T& operator()(const int i, const int j) const;
00141 inline T& operator()(const int i, const int j);
00142
00143 inline int nrows() const;
00144 inline int ncols() const;
00145 inline int size() const;
00146
00147 inline bool transp(const int i, const int j) const {return i>j;}
00148 const typename LA_traits<T>::normtype norm(const T scalar = (T)0) const;
00149 void axpy(const T alpha, const NRSMat &x);
00150
00151 inline const T amax() const;
00152 inline const T amin() const;
00153
00154 const T trace() const;
00155 void get(int fd, bool dimensions = 1, bool transp = 0);
00156 void put(int fd, bool dimensions = 1, bool transp = 0) const;
00157
00158 void copyonwrite();
00159
00160 void clear() {copyonwrite(); LA_traits<T>::clear(v,NN2);};
00161 void resize(const int n);
00162
00163 inline operator T*();
00164 inline operator const T*() const;
00165 void fprintf(FILE *f, const char *format, const int modulo) const;
00166 void fscanf(FILE *f, const char *format);
00167
00168 explicit NRSMat(const SparseMat<T> &rhs);
00169 explicit NRSMat(const SparseSMat<T> &rhs);
00170 inline void simplify() {};
00171 bool issymmetric() const {return 1;}
00172 };
00173
00174 }
00175
00176 #include "vec.h"
00177 #include "mat.h"
00178
00179 namespace LA {
00180
00181
00187 template <typename T>
00188 inline NRSMat<T>::NRSMat(const int n, const GPUID loc): nn(n), count(new int(1)) {
00189 #ifdef CUDALA
00190 location = (loc == undefined?DEFAULT_LOC:loc);
00191 if(location == cpu){
00192 #endif
00193 v = new T[NN2];
00194 #ifdef CUDALA
00195 }else{
00196 v = (T*) gpualloc(NN2*sizeof(T));
00197 }
00198 #endif
00199 }
00200
00201
00207 template <typename T>
00208 inline NRSMat<T>::NRSMat(const T& a, const int n) : nn(n), count(new int(1)) {
00209 #ifdef CUDALA
00210 location = DEFAULT_LOC;
00211 if(location == cpu){
00212 #endif
00213 v = new T[NN2];
00214 if(a != (T)0) for(register int i = 0; i<NN2; i++) v[i] = a;
00215 else memset(v, 0, NN2*sizeof(T));
00216
00217 #ifdef CUDALA
00218 }else{
00219 v = (T*) gpualloc(NN2*sizeof(T));
00220 cublasSetVector(NN2, sizeof(T), &a, 0, v, 1);
00221 }
00222 #endif
00223 }
00224
00225
00231 template <typename T>
00232 inline NRSMat<T>::NRSMat(const T *a, const int n) : nn(n), count(new int(1)) {
00233 #ifdef CUDALA
00234 location = DEFAULT_LOC;
00235 if(location == cpu){
00236 #endif
00237 memcpy(v, a, NN2*sizeof(T));
00238 #ifdef CUDALA
00239 }else{
00240 v = (T*) gpualloc(NN2*sizeof(T));
00241 cublasSetVector(NN2, sizeof(T), a, 1, v, 1);
00242 }
00243 #endif
00244
00245 }
00246
00247
00252 template <typename T>
00253 inline NRSMat<T>::NRSMat(const NRSMat<T> &rhs) {
00254 #ifdef CUDALA
00255 location = rhs.location;
00256 #endif
00257 v = rhs.v;
00258 nn = rhs.nn;
00259 count = rhs.count;
00260 if(count) (*count)++;
00261 }
00262
00263
00269 template <typename T>
00270 NRSMat<T>::NRSMat(const NRVec<T> &rhs, const int n) {
00271 #ifdef CUDALA
00272 location = rhs.location;
00273 #endif
00274 nn = n;
00275 #ifdef DEBUG
00276 if(NN2 != rhs.size()){ laerror("incompatible dimensions in NRSMat<T>::NRSMat(const NRVec<T>&, const int)"); }
00277 #endif
00278 count = rhs.count;
00279 v = rhs.v;
00280 (*count)++;
00281 }
00282
00283
00284
00289 template<>
00290 inline NRSMat<double> & NRSMat<double>::operator*=(const double &a) {
00291 copyonwrite();
00292 #ifdef CUDALA
00293 if(location == cpu){
00294 #endif
00295 cblas_dscal(NN2, a, v, 1);
00296 #ifdef CUDALA
00297 }else{
00298 cublasDscal(NN2, a, v, 1);
00299 TEST_CUBLAS("cublasDscal");
00300 }
00301 #endif
00302 return *this;
00303 }
00304
00305
00310 template<>
00311 inline NRSMat<complex<double> > &
00312 NRSMat<complex<double> >::operator*=(const complex<double> &a) {
00313 copyonwrite();
00314 #ifdef CUDALA
00315 if(location == cpu){
00316 #endif
00317 cblas_zscal(NN2, &a, v, 1);
00318 #ifdef CUDALA
00319 }else{
00320 const cuDoubleComplex _a = make_cuDoubleComplex(a.real(), a.imag());
00321 cublasZscal(NN2, _a, (cuDoubleComplex*)v, 1);
00322 TEST_CUBLAS("cublasZscal");
00323 }
00324 #endif
00325 return *this;
00326 }
00327
00328
00329
00335 template <typename T>
00336 inline NRSMat<T> & NRSMat<T>::operator*=(const T &a) {
00337 NOT_GPU(*this);
00338
00339 copyonwrite();
00340 for(register int i = 0; i<NN2; ++i) v[i] *= a;
00341 return *this;
00342 }
00343
00344
00345
00350 template <typename T>
00351 inline NRSMat<T> & NRSMat<T>::operator+=(const T &a) {
00352 NOT_GPU(*this);
00353
00354 copyonwrite();
00355 for(register int i = 0; i<nn; i++) v[i*(i+1)/2 + i] += a;
00356 return *this;
00357 }
00358
00359
00365 template <typename T>
00366 inline NRSMat<T> & NRSMat<T>::operator-=(const T &a) {
00367 NOT_GPU(*this);
00368
00369 copyonwrite();
00370 for(register int i = 0; i<nn; i++) v[i*(i+1)/2+i] -= a;
00371 return *this;
00372 }
00373
00374
00379 template<>
00380 inline NRSMat<double>& NRSMat<double>::operator+=(const NRSMat<double> & rhs) {
00381 #ifdef DEBUG
00382 if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat<double>& NRSMat<double>::operator+=(const NRSMat<double> &)");
00383 #endif
00384 SAME_LOC(*this, rhs);
00385 copyonwrite();
00386
00387 #ifdef CUDALA
00388 if(location == cpu){
00389 #endif
00390 cblas_daxpy(NN2, 1.0, rhs.v, 1, v, 1);
00391 #ifdef CUDALA
00392 }else{
00393 cublasDaxpy(NN2, 1.0, rhs.v, 1, v, 1);
00394 TEST_CUBLAS("cublasDaxpy");
00395 }
00396 #endif
00397 return *this;
00398 }
00399
00400
00405 template<>
00406 inline NRSMat<complex<double> >& NRSMat<complex<double> >::operator+=(const NRSMat<complex<double> > & rhs) {
00407 #ifdef DEBUG
00408 if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat<complex<double> >& NRSMat<complex<double> >::operator+=(const NRSMat<complex<double> > &)");
00409 #endif
00410 SAME_LOC(*this, rhs);
00411 copyonwrite();
00412
00413 #ifdef CUDALA
00414 if(location == cpu){
00415 #endif
00416 cblas_zaxpy(NN2, &CONE, rhs.v, 1, v, 1);
00417 #ifdef CUDALA
00418 }else{
00419 cublasZaxpy(NN2, CUONE, (cuDoubleComplex*)(rhs.v), 1, (cuDoubleComplex*)v, 1);
00420 TEST_CUBLAS("cublasZaxpy");
00421 }
00422 #endif
00423 return *this;
00424 }
00425
00426
00431 template <typename T>
00432 inline NRSMat<T>& NRSMat<T>::operator+=(const NRSMat<T>& rhs) {
00433 #ifdef DEBUG
00434 if(nn != rhs.nn) laerror("incompatible NRSMat<T>& NRSMat<T>::operator+=(const NRSMat<T> &)");
00435 #endif
00436 NOT_GPU(*this);
00437 SAME_LOC(*this, rhs);
00438
00439 copyonwrite();
00440 for(register int i = 0; i<NN2; ++i) v[i] += rhs.v[i];
00441 return *this;
00442 }
00443
00444
00449 template<>
00450 inline NRSMat<double>& NRSMat<double>::operator-=(const NRSMat<double>& rhs) {
00451 #ifdef DEBUG
00452 if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat<double>& NRSMat<double>::operator-=(const NRSMat<double> &)");
00453 #endif
00454 SAME_LOC(*this, rhs);
00455 copyonwrite();
00456
00457 #ifdef CUDALA
00458 if(location == cpu){
00459 #endif
00460 cblas_daxpy(NN2, -1.0, rhs.v, 1, v, 1);
00461 #ifdef CUDALA
00462 }else{
00463 cublasDaxpy(NN2, -1.0, rhs.v, 1, v, 1);
00464 TEST_CUBLAS("cublasDaxpy");
00465 }
00466 #endif
00467 return *this;
00468 }
00469
00470
00475 template<>
00476 inline NRSMat<complex<double> >& NRSMat<complex<double> >::operator-=(const NRSMat<complex<double> >& rhs) {
00477 #ifdef DEBUG
00478 if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat<complex<double> >& NRSMat<complex<double> >::operator-=(const NRSMat<complex<double> > &)");
00479 #endif
00480 SAME_LOC(*this, rhs);
00481 copyonwrite();
00482
00483 #ifdef CUDALA
00484 if(location == cpu){
00485 #endif
00486 cblas_zaxpy(NN2, &CMONE, rhs.v, 1, v, 1);
00487 #ifdef CUDALA
00488 }else{
00489 cublasZaxpy(NN2, CUMONE, (cuDoubleComplex*)(rhs.v), 1, (cuDoubleComplex*)v, 1);
00490 TEST_CUBLAS("cublasZaxpy");
00491 }
00492 #endif
00493 return *this;
00494 }
00495
00496
00501 template <typename T>
00502 inline NRSMat<T>& NRSMat<T>::operator-=(const NRSMat<T>& rhs) {
00503 #ifdef DEBUG
00504 if(nn != rhs.nn) laerror("incompatible NRSMat<T>& NRSMat<T>::operator-=(const NRSMat<T> &)");
00505 #endif
00506 NOT_GPU(*this);
00507 copyonwrite();
00508
00509 for(register int i = 0; i<NN2; ++i) v[i] -= rhs.v[i];
00510 return *this;
00511 }
00512
00513
00514
00519 template <typename T>
00520 inline const NRMat<T> NRSMat<T>::operator+(const NRMat<T> &rhs) const {
00521 return NRMat<T>(rhs) += *this;
00522 }
00523
00524
00529 template <typename T>
00530 inline const NRMat<T> NRSMat<T>::operator-(const NRMat<T> &rhs) const {
00531 return NRMat<T>(-rhs) += *this;
00532 }
00533
00534
00541 template <typename T>
00542 inline T& NRSMat<T>::operator[](const int ij) {
00543 #ifdef DEBUG
00544 if(_LA_count_check && *count != 1) laerror("T& NRSMat<T>::operator[] used for matrix with count>1");
00545 if(ij<0 || ij>=NN2) laerror("T& NRSMat<T>::operator[] out of range");
00546 if(!v) laerror("T& NRSMat<T>::operator[] used for unallocated NRSmat<T> object");
00547 #endif
00548 NOT_GPU(*this);
00549
00550 return v[ij];
00551 }
00552
00553
00561 template <typename T>
00562 inline const T & NRSMat<T>::operator[](const int ij) const {
00563 #ifdef DEBUG
00564 if(ij<0 || ij>=NN2) laerror("T& NRSMat<T>::operator[] out of range");
00565 if(!v) laerror("T& NRSMat<T>::operator[] used for unallocated NRSmat<T> object");
00566 #endif
00567 NOT_GPU(*this);
00568
00569 return v[ij];
00570 }
00571
00572
00579 template<typename T>
00580 inline T SMat_index(T i, T j) {
00581 return (i>=j) ? i*(i+1)/2+j : j*(j+1)/2+i;
00582 }
00583
00584
00591 template<typename T>
00592 inline T SMat_index_igej(T i, T j) {
00593 return i*(i+1)/2+j;
00594 }
00595
00596
00603 template<typename T>
00604 inline T SMat_index_ilej(T i, T j) {
00605 return j*(j+1)/2+i;
00606 }
00607
00608
00615 template<typename T>
00616 inline T SMat_index_1(T i, T j) {
00617 return (i>=j)? i*(i-1)/2+j-1 : j*(j-1)/2+i-1;
00618 }
00619
00620
00627 template<typename T>
00628 inline T SMat_index_1igej(T i, T j) {
00629 return i*(i-1)/2+j-1;
00630 }
00631
00632
00639 template<typename T>
00640 inline T SMat_index_1ilej(T i, T j) {
00641 return j*(j-1)/2+i-1;
00642 }
00643
00644
00645
00646 template<typename T>
00647 inline T ASMat_index(T i, T j)
00648 {
00649 if(i == j) return -1;
00650 return (i>j) ? i*(i-1)/2+j : j*(j-1)/2+i;
00651 }
00652
00653 template<typename T>
00654 inline T ASMat_index_1(T i, T j)
00655 {
00656 if(i == j) return -1;
00657 return (i>j)? (i-2)*(i-1)/2+j-1 : (j-2)*(j-1)/2+i-1;
00658 }
00659
00660
00667 template <typename T>
00668 inline T & NRSMat<T>::operator()(const int i, const int j) {
00669 #ifdef DEBUG
00670 if(_LA_count_check && *count != 1) laerror("T & NRSMat<T>::operator()(const int, const int) used for matrix with count > 1");
00671 if(i<0 || i>=nn || j<0 || j>=nn) laerror("T & NRSMat<T>::operator()(const int, const int) out of range");
00672 if(!v) laerror("T & NRSMat<T>::operator()(const int, const int) used for unallocated NRSmat<T> object");
00673 #endif
00674 NOT_GPU(*this);
00675
00676 return v[SMat_index(i,j)];
00677 }
00678
00679
00686 template <typename T>
00687 inline const T & NRSMat<T>::operator()(const int i, const int j) const {
00688 #ifdef DEBUG
00689 if(i<0 || i>=nn || j<0 || j>=nn) laerror("T & NRSMat<T>::operator()(const int, const int) out of range");
00690 if(!v) laerror("T & NRSMat<T>::operator()(const int, const int) used for unallocated NRSmat<T> object");
00691 #endif
00692 NOT_GPU(*this);
00693
00694 return v[SMat_index(i,j)];
00695 }
00696
00697
00700 template <typename T>
00701 inline int NRSMat<T>::nrows() const {
00702 return nn;
00703 }
00704
00705
00708 template <typename T>
00709 inline int NRSMat<T>::ncols() const {
00710 return nn;
00711 }
00712
00713
00716 template <typename T>
00717 inline int NRSMat<T>::size() const {
00718 return NN2;
00719 }
00720
00721
00722
00727 template<>
00728 inline const double NRSMat<double>::amax() const {
00729 double ret(0.0);
00730 #ifdef CUDALA
00731 if(location == cpu){
00732 #endif
00733 ret = v[cblas_idamax(NN2, v, 1) - 1];
00734 #ifdef CUDALA
00735 }else{
00736 const int pozice = cublasIdamax(NN2, v, 1) - 1;
00737 TEST_CUBLAS("cublasIdamax");
00738
00739 gpuget(1, sizeof(double), v + pozice, &ret);
00740 }
00741 #endif
00742 return ret;
00743 }
00744
00745
00750 template<>
00751 inline const double NRSMat<double>::amin() const {
00752 double ret(0.0);
00753 #ifdef CUDALA
00754 if(location == cpu){
00755 #endif
00756
00757 double val(0.0);
00758 int index(-1);
00759 ret = std::numeric_limits<double>::max();
00760 for(register int i = 0; i < NN2; i++){
00761 val = std::abs(v[i]);
00762 if(val < ret){ index = i; ret = val; }
00763 }
00764 ret = v[index];
00765 #ifdef CUDALA
00766 }else{
00767 const int pozice = cublasIdamin(nn, v, 1) - 1;
00768 TEST_CUBLAS("cublasIdamin");
00769 gpuget(1, sizeof(double), v + pozice, &ret);
00770 }
00771 #endif
00772 return ret;
00773 }
00774
00775
00780 template<>
00781 inline const complex<double> NRSMat< complex<double> >::amax() const{
00782 complex<double> ret(0., 0.);
00783 #ifdef CUDALA
00784 if(location == cpu){
00785 #endif
00786 ret = v[cblas_izamax(NN2, v, 1) - 1];
00787 #ifdef CUDALA
00788 }else{
00789 const int pozice = cublasIzamax(NN2, (cuDoubleComplex*)v, 1) - 1;
00790 TEST_CUBLAS("cublasIzamax");
00791 gpuget(1, sizeof(complex<double>), v + pozice, &ret);
00792 }
00793 #endif
00794 return ret;
00795 }
00796
00797
00802 template<>
00803 inline const complex<double> NRSMat<complex<double> >::amin() const{
00804 complex<double> ret(0., 0.);
00805 #ifdef CUDALA
00806 if(location == cpu){
00807 #endif
00808
00809 int index(0);
00810 double val(0.0), min_val(0.0);
00811 complex<double> z_val(0.0, 0.0);
00812
00813 min_val = std::numeric_limits<double>::max();
00814 for(register int i = 0; i < NN2; i++){
00815 z_val = v[i];
00816 val = std::abs(z_val.real()) + std::abs(z_val.imag());
00817 if(val < min_val){ index = i; min_val = val; }
00818 }
00819 ret = v[index];
00820 #ifdef CUDALA
00821 }else{
00822 const int pozice = cublasIzamin(nn, (cuDoubleComplex*)v, 1) - 1;
00823 TEST_CUBLAS("cublasIzamin");
00824 gpuget(1, sizeof(complex<double>), v + pozice, &ret);
00825 }
00826 #endif
00827 return ret;
00828 }
00829
00830
00833 template <typename T>
00834 inline NRSMat<T>::operator T*() {
00835 #ifdef DEBUG
00836 if(!v) laerror("unallocated NRSMat object in NRSMat<T>::operator T*()");
00837 #endif
00838 return v;
00839 }
00840
00841
00844 template <typename T>
00845 inline NRSMat<T>::operator const T*() const {
00846 #ifdef DEBUG
00847 if(!v) laerror("unallocated NRSMat object in NRSMat<T>::operator const T*()");
00848 #endif
00849 return v;
00850 }
00851
00852
00856 template <typename T>
00857 NRSMat<T>::~NRSMat() {
00858 if(!count) return;
00859 if(--(*count) <= 0) {
00860 if(v){
00861 #ifdef CUDALA
00862 if(location == cpu){
00863 #endif
00864 delete[] v;
00865 #ifdef CUDALA
00866 }else{ gpufree(v); }
00867 #endif
00868 }
00869 delete count;
00870 }
00871 }
00872
00873
00877 template <typename T>
00878 NRSMat<T> & NRSMat<T>::operator|=(const NRSMat<T> &rhs) {
00879 #ifdef DEBUG
00880 if(!rhs.v) laerror("unallocated NRSMat<T> object in NRSMat<T> & NRSMat<T>::operator|=(const NRSMat<T> &)");
00881 #endif
00882 if(this == &rhs) return *this;
00883 *this = rhs;
00884 this->copyonwrite();
00885 return *this;
00886 }
00887
00888
00889
00893 template <typename T>
00894 NRSMat<T> & NRSMat<T>::operator=(const NRSMat<T> & rhs) {
00895 if(this == &rhs) return *this;
00896 if(count)
00897 if(--(*count) == 0){
00898 #ifdef CUDALA
00899 if(location == cpu){
00900 #endif
00901 delete [] v;
00902 #ifdef CUDALA
00903 }else{ gpufree(v); }
00904 #endif
00905 delete count;
00906 }
00907 v = rhs.v;
00908 nn = rhs.nn;
00909 count = rhs.count;
00910 #ifdef CUDALA
00911 location = rhs.location;
00912 #endif
00913 if(count) (*count)++;
00914 return *this;
00915 }
00916
00917
00921 template <typename T>
00922 void NRSMat<T>::copyonwrite() {
00923 if(!count) laerror("calling NRSMat<T>::copyonwrite() for undefined NRSMat<T> object");
00924 if(*count > 1){
00925 (*count)--;
00926 count = new int;
00927 *count = 1;
00928 T *newv;
00929 #ifdef CUDALA
00930 if(location == cpu) {
00931 #endif
00932 newv = new T[NN2];
00933 memcpy(newv, v, NN2*sizeof(T));
00934 #ifdef CUDALA
00935 }else{
00936 newv = (T *) gpualloc(NN2*sizeof(T));
00937 if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRSMat<T>::copyonwrite()");
00938 cublasScopy(NN2*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
00939 TEST_CUBLAS("cublasScopy");
00940 }
00941 #endif
00942
00943 v = newv;
00944 }
00945 }
00946
00947
00951 template <typename T>
00952 void NRSMat<T>::resize(const int n) {
00953 #ifdef DEBUG
00954 if(n < 0) laerror("illegal dimension in NRSMat<T>::resize(const int)");
00955 #endif
00956 if(count){
00957 if(n == 0){
00958 if(--(*count) <= 0) {
00959 if(v) {
00960 #ifdef CUDALA
00961 if(location == cpu){
00962 #endif
00963 delete[] (v);
00964 #ifdef CUDALA
00965 }else{ gpufree(v); }
00966 #endif
00967 }
00968 delete count;
00969 }
00970 count = 0;
00971 nn = 0;
00972 v = 0;
00973 return;
00974 }
00975 if(*count > 1){
00976 (*count)--;
00977 count = 0;
00978 v = 0;
00979 nn = 0;
00980 }
00981 }
00982 if(!count){
00983 count = new int;
00984 *count = 1;
00985 nn = n;
00986 #ifdef CUDALA
00987 if(location == cpu){
00988 #endif
00989 v = new T[NN2];
00990 #ifdef CUDALA
00991 }else{ v = (T*) gpualloc(NN2*sizeof(T)); }
00992 #endif
00993
00994 return;
00995 }
00996 if(n != nn){
00997 nn = n;
00998 #ifdef CUDALA
00999 if(location == cpu){
01000 #endif
01001 delete[] v;
01002 v = new T[NN2];
01003 #ifdef CUDALA
01004 }else{
01005
01006 gpufree(v);
01007 v = (T*) gpualloc(NN2*sizeof(T));
01008 }
01009 #endif
01010
01011 }
01012 }
01013
01014
01019 #ifdef CUDALA
01020 template<typename T>
01021 void NRSMat<T>::moveto(const GPUID dest) {
01022 if(location == dest) return;
01023
01024 CPU_GPU(location,dest);
01025 location = dest;
01026
01027 if(v && !count) laerror("internal inconsistency of reference counting 1");
01028 if(!count) return;
01029
01030 if(v && *count == 0) laerror("internal inconsistency of reference counting 2");
01031 if(!v) return;
01032
01033 T *vold = v;
01034
01035 if(dest == cpu){
01036 v = new T[NN2];
01037 gpuget(NN2, sizeof(T), vold, v);
01038 if(*count == 1) gpufree(vold);
01039 else {--(*count); count = new int(1);}
01040
01041 }else{
01042
01043 v = (T *) gpualloc(NN2*sizeof(T));
01044 gpuput(NN2, sizeof(T), vold, v);
01045 if(*count == 1) delete[] vold;
01046 else {--(*count); count = new int(1);}
01047 }
01048 }
01049 #endif
01050
01051
01052
01057 template<typename T>
01058 NRSMat<complex<T> > complexify(const NRSMat<T> &rhs) {
01059 NOT_GPU(rhs);
01060
01061 NRSMat<complex<T> > r(rhs.nrows());
01062 for(register int i = 0; i<rhs.nrows(); ++i)
01063 for(register int j = 0; j<=i; ++j) r(i,j) = rhs(i,j);
01064 return r;
01065 }
01066
01067
01068
01073
01074
01075
01076
01077
01078
01079
01080
01081
01082
01083
01084
01085
01086
01087
01088
01089
01090
01096 template <typename T>
01097 std::ostream& operator<<(std::ostream &s, const NRSMat<T> &x) {
01098 #ifdef CUDALA
01099 if(x.getlocation() == cpu){
01100 #endif
01101 int i,j,n;
01102 n = x.nrows();
01103 s << n << ' ' << n << '\n';
01104 for(i = 0;i<n;i++){
01105 for(j = 0; j<n;j++) s << (typename LA_traits_io<T>::IOtype)x(i,j) << (j == n-1 ? '\n' : ' ');
01106 }
01107 return s;
01108 #ifdef CUDALA
01109 }else{
01110 NRSMat<T> tmp = x;
01111 tmp.moveto(cpu);
01112 return s<<tmp;
01113 }
01114 #endif
01115 }
01116
01117
01118
01124 template <typename T>
01125 std::istream& operator>>(std::istream &s, NRSMat<T> &x) {
01126 #ifdef CUDALA
01127 if(x.getlocation() == cpu){
01128 #endif
01129 int i,j,n,m;
01130 s >> n >> m;
01131 if(n!=m) laerror("input symmetric matrix not square");
01132 x.resize(n);
01133 typename LA_traits_io<T>::IOtype tmp;
01134 for(i = 0;i<n;i++) for(j = 0; j<m;j++) {s>>tmp; x(i,j)=tmp;}
01135 return s;
01136 #ifdef CUDALA
01137 }else{
01138 NRSMat<T> tmp;
01139 tmp.moveto(cpu);
01140 s >> tmp;
01141 tmp.moveto(x.getlocation());
01142 x = tmp;
01143 return s;
01144 }
01145 #endif
01146 }
01147
01148
01149
01153 NRVECMAT_OPER(SMat,+)
01154 NRVECMAT_OPER(SMat,-)
01155 NRVECMAT_OPER(SMat,*)
01156
01157
01161 NRVECMAT_OPER2(SMat,+)
01162 NRVECMAT_OPER2(SMat,-)
01163
01164
01168 template<typename T>
01169 class NRSMat_from1 : public NRSMat<T> {
01170 public:
01171 NRSMat_from1(): NRSMat<T>() {};
01172 explicit NRSMat_from1(const int n): NRSMat<T>(n) {};
01173 NRSMat_from1(const NRSMat<T> &rhs): NRSMat<T>(rhs) {};
01174 NRSMat_from1(const T &a, const int n): NRSMat<T>(a,n) {};
01175 NRSMat_from1(const T *a, const int n): NRSMat<T>(a,n) {};
01176 explicit NRSMat_from1(const NRMat<T> &rhs): NRSMat<T>(rhs) {};
01177 explicit NRSMat_from1(const NRVec<T> &rhs, const int n): NRSMat<T>(rhs,n) {};
01178
01179 inline const T& operator() (const int i, const int j) const {
01180 #ifdef DEBUG
01181 if(i<=0||j<=0||i>NRSMat<T>::nn||j>NRSMat<T>::nn) laerror("index in const T& NRSMat<T>::operator() (const int, const int) out of range");
01182 #endif
01183 return NRSMat<T>::v[SMat_index_1(i,j)];
01184 }
01185
01186 inline T& operator() (const int i, const int j){
01187 #ifdef DEBUG
01188 if(i<=0||j<=0||i>NRSMat<T>::nn||j>NRSMat<T>::nn) laerror("index in T& NRSMat<T>::operator() (const int, const int) out of range");
01189 #endif
01190 return NRSMat<T>::v[SMat_index_1(i,j)];
01191 }
01192 };
01193
01194 }
01195 #endif