LA library: matexp.h Source File

00001 /*
00002     LA: linear algebra C++ interface library
00003     Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
00004 
00005     This program is free software: you can redistribute it and/or modify
00006     it under the terms of the GNU General Public License as published by
00007     the Free Software Foundation, either version 3 of the License, or
00008     (at your option) any later version.
00009 
00010     This program is distributed in the hope that it will be useful,
00011     but WITHOUT ANY WARRANTY; without even the implied warranty of
00012     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013     GNU General Public License for more details.
00014 
00015     You should have received a copy of the GNU General Public License
00016     along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017 */
00018 #ifndef _MATEXP_H_
00019 #define _MATEXP_H_
00020 //general routine for polynomial of a matrix, tuned to minimize the number
00021 //of matrix-matrix multiplications on cost of additions and memory
00022 // the polynom and exp routines will work on any type, for which traits class
00023 // is defined containing definition of an element type, norm and axpy operation
00024 #include "la_traits.h"
00025 #include "laerror.h"
00026 #include <math.h>
00027 
00028 namespace LA {
00029 
00030 template<class T,class R>
00031 const T polynom0(const T &x, const NRVec<R> &c)
00032 {
00033 int order=c.size()-1;
00034 T z,y;
00035 
00036 //trivial reference implementation by horner scheme
00037 if(order==0) {y=x; y=c[0];} //to avoid the problem: we do not know the size of the matrix to contruct a scalar one
00038 else
00039         {
00040         int i;
00041         z=x*c[order];
00042         for(i=order-1; i>=0; i--)
00043                 {
00044                 //std::cerr<<"TEST polynom0 "<<i<<'\n';
00045                 if(i<order-1) z=y*x;
00046                 y=z+c[i];
00047                 }
00048         }
00049 
00050 return y;
00051 }
00052 
00053 
00054 
00055 //algorithm which minimazes number of multiplications, at the cost of storage
00056 template<class T,class R>
00057 const T polynom(const T &x, const NRVec<R> &c)
00058 {
00059 int n=c.size()-1;
00060 int i,j,k,m=0,t;
00061 
00062 if(n<=4) return polynom0(x,c); //here the horner scheme is optimal
00063 
00064 //first find m which minimizes the number of multiplications
00065 j=10*n;
00066 for(i=2;i<=n+1;i++)
00067     {   
00068     t=i-2+2*(n/i)-(n%i)?0:1;
00069     if(t<j)
00070         {
00071         j=t;
00072         m=i;
00073         }
00074     }
00075 
00076 
00077 //allocate array for powers up to m
00078 T *xpows = new T[m];
00079 xpows[0]=x;
00080 for(i=1;i<m;i++) xpows[i]=xpows[i-1]*x;
00081 
00082 
00083 //run the summation loop
00084 T r,s,f;
00085 k= -1;
00086 for(i=0; i<=n/m;i++)
00087         {
00088         for(j=0;j<m;j++)
00089                 {
00090                 k++;
00091                 if(k>n) break;
00092                 if(j==0) {
00093                           if(i==0) s=x; /*just to get the dimensions of the matrix*/ 
00094                           s=c[k]; /*create diagonal matrix*/
00095                           }
00096                 else  
00097                         LA_traits<T>::axpy(s,xpows[j-1],c[k]); //general  s+=xpows[j-1]*c[k]; but more efficient for matrices
00098                 }
00099 
00100         if(i==0) {r=s; f=xpows[m-1];}
00101         else
00102                 {
00103                 r+= s*f;
00104                 f=f*xpows[m-1];
00105                 }
00106         }
00107  
00108 delete[] xpows;
00109 return r;
00110 }
00111 
00112 
00113 //for general objects
00114 template<class T>
00115 const T ncommutator ( const T &x, const T &y, int nest=1, const bool right=1)
00116 {
00117 T z;
00118 if(right) {z=x; while(--nest>=0) z=z*y-y*z;}
00119 else {z=y; while(--nest>=0) z=x*z-z*x;}
00120 return z;
00121 }
00122 
00123 template<class T>
00124 const T nanticommutator ( const T &x, const T &y, int nest=1, const bool right=1)
00125 {
00126 T z;
00127 if(right) {z=x; while(--nest>=0) z=z*y+y*z;}
00128 else {z=y; while(--nest>=0) z=x*z+z*x;}
00129 return z;
00130 }
00131 
00132 //general BCH expansion (can be written more efficiently in a specialization for matrices)
00133 template<class T>
00134 const T BCHexpansion (const T &h, const T &t, const int n, const bool verbose=0)\
00135 {
00136 T result=h;
00137 double factor=1.;
00138 T z=h;
00139 for(int i=1; i<=n; ++i)
00140         {
00141         factor/=i;
00142         z= z*t-t*z;
00143         if(verbose) std::cerr << "BCH contribution at order "<<i<<" : "<<z.norm()*factor<<std::endl;
00144         result+= z*factor; 
00145         }
00146 return result;
00147 }
00148 
00149 
00150 template<class T>
00151 const T ipow( const T &x, int i)
00152 {
00153 if(i<0) laerror("negative exponent in ipow");
00154 if(i==0) {T r=x; r=(typename LA_traits<T>::elementtype)1; return r;}//trick for matrix dimension
00155 if(i==1) return x;
00156 T y,z;
00157 z=x;
00158 while(!(i&1))
00159         {
00160         z = z*z;
00161         i >>= 1;
00162         }
00163 y=z; 
00164 while((i >>= 1))
00165                 {
00166                 z = z*z;
00167                 if(i&1) y = y*z;
00168                 }
00169 return y;
00170 }
00171 
00172 inline int nextpow2(const double n)
00173 {
00174 const double log2=std::log(2.);
00175 if(n<=.75) return 0; //try to keep the taylor expansion short
00176 if(n<=1.) return 1;
00177 return int(std::ceil(std::log(n)/log2-std::log(.75)));
00178 }
00179 
00180 //should better be computed by mathematica to have accurate last digits, perhaps chebyshev instead, see exp in glibc
00181 //is shared also for sine and cosine now
00182 static const double exptaylor[]={
00183 1.,
00184 1.,
00185 0.5,
00186 0.1666666666666666666666,
00187 0.0416666666666666666666,
00188 0.0083333333333333333333,
00189 0.0013888888888888888888,
00190 0.00019841269841269841253,
00191 2.4801587301587301566e-05,
00192 2.7557319223985892511e-06,
00193 2.7557319223985888276e-07,
00194 2.5052108385441720224e-08,
00195 2.0876756987868100187e-09,
00196 1.6059043836821613341e-10,
00197 1.1470745597729724507e-11,
00198 7.6471637318198164055e-13,
00199 4.7794773323873852534e-14,
00200 2.8114572543455205981e-15,
00201 1.5619206968586225271e-16,
00202 8.2206352466243294955e-18,
00203 4.1103176233121648441e-19,
00204 1.9572941063391262595e-20,
00205 0.};
00206 
00207 
00208 //S is element type of T, but T may be any user-defined
00209 template<class T, class C, class S>
00210 NRVec<C> exp_aux(const T &x, int &power, int maxpower, int maxtaylor, S prescale)
00211 {
00212 
00213 double mnorm= x.norm() * std::abs(prescale);
00214 power=nextpow2(mnorm); 
00215 if(maxpower>=0 && power>maxpower) power=maxpower;
00216 double scale=std::exp(-std::log(2.)*power);
00217 
00218 
00219 //find how long taylor expansion will be necessary
00220 const double precision=1e-14; //further decreasing brings nothing
00221 double s,t;
00222 s=mnorm*scale;
00223 int n=0;
00224 t=1.;
00225 do      {
00226         n++;
00227         t*=s;
00228         }
00229 while(t*exptaylor[n]>precision);//taylor 0 will terminate in any case
00230 
00231 if(maxtaylor>=0 && n>maxtaylor) n=maxtaylor; //useful e.g. if the matrix is nilpotent in order n+1 as the CC T operator for n electrons
00232 
00233 
00234 int i; //adjust the coefficients in order to avoid scaling the argument
00235 NRVec<C> taylor2(n+1);
00236 for(i=0,t=1.;i<=n;i++)
00237         {
00238         taylor2[i]=exptaylor[i]*t;
00239         t*=scale;
00240         }
00241 //std::cout <<"TEST power, scale "<<power<<" "<<scale<<std::endl; 
00242 //std::cout <<"TEST taylor2 "<<taylor2<<std::endl;
00243 return taylor2;
00244 }
00245 
00246 
00247 
00248 template<class T, class C, class S>
00249 void sincos_aux(NRVec<C> &si, NRVec<C> &co, const T &x, int &power,int maxpower, int maxtaylor, const S prescale)
00250 {
00251 double mnorm= x.norm() * std::abs(prescale);
00252 power=nextpow2(mnorm); 
00253 if(maxpower>=0 && power>maxpower) power=maxpower;
00254 double scale=std::exp(-std::log(2.)*power);
00255 
00256 //find how long taylor expansion will be necessary
00257 const double precision=1e-14; //further decreasing brings nothing
00258 double s,t;
00259 s=mnorm*scale;
00260 int n=0;
00261 t=1.;
00262 do      {
00263         n++;
00264         t*=s;
00265         }
00266 while(t*exptaylor[n]>precision);//taylor 0 will terminate in any case
00267 
00268 if(maxtaylor>=0 && n>maxtaylor) n=maxtaylor; //useful e.g. if the matrix is nilpotent in order n+1 as the CC T operator for n electrons
00269 if((n&1)==0) ++n; //force it to be odd to have same length in sine and cosine
00270 si.resize((n+1)/2);
00271 co.resize((n+1)/2);
00272 
00273 int i; //adjust the coefficients in order to avoid scaling the argument
00274 for(i=0,t=1.;i<=n;i++)
00275         {
00276         if(i&1) si[i>>1] = exptaylor[i]* (i&2?-t:t);
00277         else    co[i>>1] = exptaylor[i]* (i&2?-t:t);
00278         t*=scale;
00279         }
00280 //std::cout <<"TEST sin "<<si<<std::endl;
00281 //std::cout <<"TEST cos "<<co<<std::endl;
00282 }
00283 
00284 
00285 
00286 //it seems that we do not gain anything by polynom vs polynom0, check the m-optimization!
00287 template<class T>
00288 const T exp(const T &x, bool horner=true, int maxpower= -1, int maxtaylor= -1 )
00289 {
00290 int power;
00291 
00292 //prepare the polynom of and effectively scale T
00293 NRVec<typename LA_traits<T>::normtype> taylor2=exp_aux<T,typename LA_traits<T>::normtype,double>(x,power,maxpower,maxtaylor,1.);
00294 
00295 //std::cerr <<"TEST power "<<power<<std::endl;
00296 
00297 T r= horner?polynom0(x,taylor2):polynom(x,taylor2); 
00298 //for accuracy summing from the smallest terms up would be better, but this is more efficient for matrices
00299 
00300 //power the result back
00301 for(int i=0; i<power; i++) r=r*r;
00302 return r;
00303 }
00304 
00305 
00306 //make exp(iH) with real H in real arithmetics
00307 template<class T>
00308 void sincos(T &s, T &c, const T &x, bool horner=true, int maxpower= -1, int maxtaylor= -1 )
00309 {
00310 int power;
00311 
00312 NRVec<typename LA_traits<T>::normtype> taylors,taylorc;
00313 sincos_aux<T,typename LA_traits<T>::normtype>(taylors,taylorc,x,power,maxpower,maxtaylor,1.);
00314 
00315 
00316 //could we save something by computing both polynoms simultaneously?
00317 {
00318 T x2 = x*x;
00319 s = horner?polynom0(x2,taylors):polynom(x2,taylors);
00320 c = horner?polynom0(x2,taylorc):polynom(x2,taylorc);
00321 }
00322 s = s * x;
00323 
00324 //power the results back
00325 for(int i=0; i<power; i++)
00326         {
00327         T tmp = c*c - s*s;
00328         s = s*c; s *= 2.;
00329         c=tmp;
00330         }
00331 }
00332 
00333 
00334 
00335 //this simple implementation seems not to be numerically stable enough
00336 //and probably not efficient either
00337 
00338 template<class M, class V, class MEL>
00339 void exptimesdestructive(const M &mat, V &result, V &rhs,  bool transpose, const MEL scale, int maxpower= -1, int maxtaylor= -1, bool mat_is_0=false) //uses just matrix vector multiplication
00340 {
00341 if(mat_is_0) {result = rhs; LA_traits<V>::copyonwrite(result); return;} //prevent returning a shallow copy of rhs
00342 if(mat.nrows()!=mat.ncols()||(unsigned int) mat.nrows() != (unsigned int)rhs.size()) laerror("inappropriate sizes in exptimes");
00343 
00344 int power;
00345 //prepare the polynom of and effectively scale the matrix
00346 NRVec<typename LA_traits<V>::normtype> taylor2=exp_aux<M,typename LA_traits<V>::normtype>(mat,power,maxpower,maxtaylor,scale);
00347 
00348 V tmp;
00349 
00350 for(int i=1; i<=(1<<power); ++i) //unfortunatelly, here we have to repeat it many times, unlike if the matrix is stored explicitly
00351         {
00352         if(i>1) rhs=result; //apply again to the result of previous application
00353         else result=rhs;
00354         tmp=rhs; //now rhs can be used as scratch       
00355         result*=taylor2[0];
00356         for(int j=1; j<taylor2.size(); ++j)
00357                 {
00358                 mat.gemv(0.,rhs,transpose?'t':'n',scale,tmp);
00359                 tmp=rhs;
00360                 result.axpy(taylor2[j],tmp);
00361                 }
00362         }
00363 
00364 return;
00365 }
00366 
00367 
00368 //actually scale should be elementtype of M, but we do not have it since M can be anything user-defined
00369 //and template paramter for it does not work due to optional arguments
00370 //undecent solution: exptimesreal
00371 //
00372 template<class M, class V>
00373 const V exptimes(const M &mat, V rhs, bool transpose=false, const typename LA_traits<V>::elementtype scale=1., int maxpower= -1, int maxtaylor= -1, bool mat_is_0=false )
00374 {
00375 V result;
00376 exptimesdestructive(mat,result,rhs,transpose,scale,maxpower,maxtaylor,mat_is_0);
00377 return result;
00378 }
00379 
00380 template<class M, class V>
00381 const V exptimesreal(const M &mat, V rhs, bool transpose=false, const typename LA_traits<V>::normtype scale=1., int maxpower= -1, int maxtaylor= -1, bool mat_is_0=false )
00382 {
00383 V result;
00384 exptimesdestructive(mat,result,rhs,transpose,scale,maxpower,maxtaylor,mat_is_0);
00385 return result;
00386 }
00387 
00388 
00389 
00390 
00391 
00392 
00393 
00394 
00395 template<class M, class V, class S>
00396 void sincostimes_simple(const M &mat, V &si, V &co, const V &rhs, const NRVec<typename LA_traits<V>::normtype> &taylors, const NRVec<typename LA_traits<V>::normtype> &taylorc, bool transpose, const S scale)
00397 {
00398 si=rhs * taylors[0];
00399 co=rhs * taylorc[0];
00400 V tmp=rhs;
00401 for(int j=1; j<taylors.size(); ++j)
00402         {
00403         V tmp2(tmp.size());
00404         //multiply by a square of the matrix
00405         mat.gemv(0.,tmp2,transpose?'t':'n',scale,tmp);
00406         mat.gemv(0.,tmp,transpose?'t':'n',scale,tmp2);
00407         si.axpy(taylors[j],tmp);
00408         co.axpy(taylorc[j],tmp);
00409         }
00410 mat.gemv(0.,tmp,transpose?'t':'n',scale,si);
00411 si=tmp;
00412 }
00413 
00414 //this recursion is very inefficient, it is better to use complex exptimes!
00415 template<class M, class V, class S>
00416 void sincostimes_aux(const M &mat, V &si, V &co, const V &rhs, const NRVec<typename LA_traits<V>::normtype> &taylors, const NRVec<typename LA_traits<V>::normtype> &taylorc, bool transpose, const S scale, int power)
00417 {
00418 if(power==0) sincostimes_simple(mat,si,co,rhs,taylors,taylorc,transpose,scale);
00419 else
00420         {
00421         V si2,co2; //no large memory allocated yet - size 0
00422         sincostimes_aux(mat,si2,co2,rhs,taylors,taylorc,transpose,scale,power-1);
00423         sincostimes_aux(mat,si,co,co2,taylors,taylorc,transpose,scale,power-1);
00424         V ss,cs;
00425         sincostimes_aux(mat,ss,cs,si2,taylors,taylorc,transpose,scale,power-1);
00426         co -= ss;
00427         si += cs;
00428         }
00429 }
00430 
00431 
00432 //inefficient, it is better to use complex exptimes!
00433 //again scale should actually be elementtype of M which is inaccessible
00434 template<class M, class V>
00435 void sincostimes(const M &mat, V &si, V &co, const V &rhs,  bool transpose=false, const typename LA_traits<V>::normtype scale=1., int maxpower= -1, int maxtaylor= -1, bool mat_is_0=false) //uses just matrix vector multiplication
00436 {
00437 if(mat_is_0) //prevent returning a shallow copy of rhs
00438         {
00439         co = rhs; 
00440         LA_traits<V>::copyonwrite(co); 
00441         LA_traits<V>::clearme(si);
00442         return;
00443         }
00444 
00445 if(mat.nrows()!=mat.ncols()||(unsigned int) mat.nrows() != (unsigned int)rhs.size()) laerror("inappropriate sizes in sincostimes");
00446 
00447 //prepare the polynom of and effectively scale the matrix
00448 int power;
00449 NRVec<typename LA_traits<V>::normtype> taylors,taylorc;
00450 sincos_aux<M,typename LA_traits<V>::normtype>(taylors,taylorc,mat,power,maxpower,maxtaylor,scale);
00451 if(taylors.size()!=taylorc.size()) laerror("internal error - same size of sin and cos expansions assumed");
00452 //the actual computation and resursive "squaring"
00453 //std::cout <<"TEST power "<<power<<std::endl;
00454 sincostimes_aux(mat,si,co,rhs,taylors,taylorc,transpose,scale,power);
00455 
00456 return;
00457 }
00458 
00459 
00460 //@@@ power series matrix logarithm?
00461 
00462 }//namespace
00463 #endif