| #ifndef TH_GENERIC_FILE |
| #define TH_GENERIC_FILE "generic/THTensorLapack.c" |
| #else |
| |
| static int THTensor_(lapackClone)(THTensor *r_, THTensor *m, int forced) |
| { |
| int clone; |
| |
| if (!forced && m->stride[0] == 1 && m->stride[1] == m->size[0]) |
| { |
| clone = 0; |
| THTensor_(set)(r_,m); |
| } |
| else |
| { |
| clone = 1; |
| /* we need to copy */ |
| THTensor_(resize2d)(r_,m->size[1],m->size[0]); |
| THTensor_(transpose)(r_,NULL,0,1); |
| THTensor_(copy)(r_,m); |
| } |
| return clone; |
| } |
| |
| TH_API void THTensor_(gesv)(THTensor *rb_, THTensor *ra_, THTensor *b, THTensor *a) |
| { |
| int n, nrhs, lda, ldb, info; |
| THIntTensor *ipiv; |
| THTensor *ra__; |
| THTensor *rb__; |
| |
| int clonea; |
| int cloneb; |
| int destroya; |
| int destroyb; |
| |
| |
| if (a == NULL || ra_ == a) /* possibly destroy the inputs */ |
| { |
| ra__ = THTensor_(new)(); |
| clonea = THTensor_(lapackClone)(ra__,ra_,0); |
| destroya = 1; |
| } |
| else /*we want to definitely clone and use ra_ and rb_ as computational space*/ |
| { |
| clonea = THTensor_(lapackClone)(ra_,a,1); |
| ra__ = ra_; |
| destroya = 0; |
| } |
| if (b == NULL || rb_ == b) /* possibly destroy the inputs */ |
| { |
| rb__ = THTensor_(new)(); |
| cloneb = THTensor_(lapackClone)(rb__,rb_,0); |
| destroyb = 1; |
| } |
| else /*we want to definitely clone and use ra_ and rb_ as computational space*/ |
| { |
| cloneb = THTensor_(lapackClone)(rb_,b,1); |
| rb__ = rb_; |
| destroyb = 0; |
| } |
| |
| THArgCheck(ra__->nDimension == 2, 1, "A should be 2 dimensional"); |
| THArgCheck(rb__->nDimension == 2, 2, "b should be 2 dimensional"); |
| THArgCheck(ra__->size[0] == ra__->size[1], 1, "A should be square"); |
| THArgCheck(rb__->size[0] == ra__->size[0], 2, "A,b size incomptable"); |
| |
| n = (int)ra__->size[0]; |
| nrhs = (int)rb__->size[1]; |
| lda = n; |
| ldb = n; |
| |
| ipiv = THIntTensor_newWithSize1d((long)n); |
| THLapack_(gesv)(n, nrhs, |
| THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), |
| THTensor_(data)(rb__), ldb, &info); |
| |
| /* clean up */ |
| if (destroya) |
| { |
| if (clonea) |
| { |
| THTensor_(copy)(ra_,ra__); |
| } |
| THTensor_(free)(ra__); |
| } |
| if (destroyb) |
| { |
| if (cloneb) |
| { |
| THTensor_(copy)(rb_,rb__); |
| } |
| THTensor_(free)(rb__); |
| } |
| |
| if (info < 0) |
| { |
| THError("Lapack gesv : Argument %d : illegal value", -info); |
| } |
| else if (info > 0) |
| { |
| THError("Lapack gesv : U(%d,%d) is zero, singular U.", info,info); |
| } |
| |
| THIntTensor_free(ipiv); |
| } |
| |
| TH_API void THTensor_(gels)(THTensor *rb_, THTensor *ra_, THTensor *b, THTensor *a) |
| { |
| int m, n, nrhs, lda, ldb, info, lwork; |
| char transpose; |
| THTensor *work = NULL; |
| real wkopt = 0; |
| |
| THTensor *ra__; |
| THTensor *rb__; |
| |
| int clonea; |
| int cloneb; |
| int destroya; |
| int destroyb; |
| |
| |
| if (a == NULL || ra_ == a) /* possibly destroy the inputs */ |
| { |
| ra__ = THTensor_(new)(); |
| clonea = THTensor_(lapackClone)(ra__,ra_,0); |
| destroya = 1; |
| } |
| else /*we want to definitely clone and use ra_ and rb_ as computational space*/ |
| { |
| clonea = THTensor_(lapackClone)(ra_,a,1); |
| ra__ = ra_; |
| destroya = 0; |
| } |
| if (b == NULL || rb_ == b) /* possibly destroy the inputs */ |
| { |
| rb__ = THTensor_(new)(); |
| cloneb = THTensor_(lapackClone)(rb__,rb_,0); |
| destroyb = 1; |
| } |
| else /*we want to definitely clone and use ra_ and rb_ as computational space*/ |
| { |
| cloneb = THTensor_(lapackClone)(rb_,b,1); |
| rb__ = rb_; |
| destroyb = 0; |
| } |
| |
| THArgCheck(ra__->nDimension == 2, 1, "A should be 2 dimensional"); |
| THArgCheck(ra_->size[0] == rb__->size[0], 2, "size incompatible A,b"); |
| |
| m = ra__->size[0]; |
| n = ra__->size[1]; |
| nrhs = rb__->size[1]; |
| lda = m; |
| ldb = m; |
| info = 0; |
| |
| // get optimal workspace size |
| THLapack_(gels)('N', m, n, nrhs, THTensor_(data)(ra__), lda, |
| THTensor_(data)(rb__), ldb, |
| &wkopt, -1, &info); |
| lwork = (int)wkopt; |
| work = THTensor_(newWithSize1d)(lwork); |
| THLapack_(gels)('N', m, n, nrhs, THTensor_(data)(ra__), lda, |
| THTensor_(data)(rb__), ldb, |
| THTensor_(data)(work), lwork, &info); |
| |
| //printf("lwork = %d,%g\n",lwork,THTensor_(data)(work)[0]); |
| if (info != 0) |
| { |
| THError("Lapack gels : Argument %d : illegal value", -info); |
| } |
| /* clean up */ |
| if (destroya) |
| { |
| if (clonea) |
| { |
| THTensor_(copy)(ra_,ra__); |
| } |
| THTensor_(free)(ra__); |
| } |
| if (destroyb) |
| { |
| if (cloneb) |
| { |
| THTensor_(copy)(rb_,rb__); |
| } |
| THTensor_(free)(rb__); |
| } |
| THTensor_(free)(work); |
| } |
| |
| TH_API void THTensor_(syev)(THTensor *re_, THTensor *rv_, THTensor *a, const char *jobz, const char *uplo) |
| { |
| int n, lda, lwork, info; |
| THTensor *work; |
| real wkopt; |
| |
| THTensor *rv__; |
| |
| int clonea; |
| int destroy; |
| |
| if (a == NULL) /* possibly destroy the inputs */ |
| { |
| rv__ = THTensor_(new)(); |
| clonea = THTensor_(lapackClone)(rv__,rv_,0); |
| destroy = 1; |
| } |
| else /*we want to definitely clone and use ra_ and rb_ as computational space*/ |
| { |
| clonea = THTensor_(lapackClone)(rv_,a,1); |
| rv__ = rv_; |
| destroy = 0; |
| } |
| |
| THArgCheck(rv__->nDimension == 2, 2, "A should be 2 dimensional"); |
| |
| n = rv__->size[0]; |
| lda = n; |
| |
| THTensor_(resize1d)(re_,n); |
| |
| // get optimal workspace size |
| THLapack_(syev)(jobz[0], uplo[0], n, THTensor_(data)(rv__), lda, |
| THTensor_(data)(re_), &wkopt, -1, &info); |
| lwork = (int)wkopt; |
| work = THTensor_(newWithSize1d)(lwork); |
| THLapack_(syev)(jobz[0], uplo[0], n, THTensor_(data)(rv__), lda, |
| THTensor_(data)(re_), THTensor_(data)(work), lwork, &info); |
| |
| if (info > 0) |
| { |
| THError(" Lapack syev : Failed to converge. %d off-diagonal elements of an didn't converge to zero",info); |
| } |
| else if (info < 0) |
| { |
| THError("Lapack syev : Argument %d : illegal value", -info); |
| } |
| /* clean up */ |
| if (destroy) |
| { |
| if (clonea) |
| { |
| THTensor_(copy)(rv_,rv__); |
| } |
| THTensor_(free)(rv__); |
| } |
| THTensor_(free)(work); |
| } |
| |
| TH_API void THTensor_(gesvd)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *a, const char* jobu) |
| { |
| THTensor *ra_ = THTensor_(new)(); |
| THTensor_(gesvd2)(ru_, rs_, rv_, ra_, a, jobu); |
| THTensor_(free)(ra_); |
| } |
| |
| TH_API void THTensor_(gesvd2)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *ra_, THTensor *a, const char* jobu) |
| { |
| int k,m, n, lda, ldu, ldvt, lwork, info; |
| THTensor *work; |
| real wkopt; |
| |
| THTensor *ra__; |
| |
| int clonea; |
| int destroy; |
| |
| if (a == NULL) /* possibly destroy the inputs */ |
| { |
| ra__ = THTensor_(new)(); |
| clonea = THTensor_(lapackClone)(ra__,ra_,0); |
| destroy = 1; |
| } |
| else /*we want to definitely clone */ |
| { |
| clonea = THTensor_(lapackClone)(ra_,a,1); |
| ra__ = ra_; |
| destroy = 0; |
| } |
| |
| THArgCheck(ra__->nDimension == 2, 2, "A should be 2 dimensional"); |
| |
| m = ra__->size[0]; |
| n = ra__->size[1]; |
| k = (m < n ? m : n); |
| |
| lda = m; |
| ldu = m; |
| ldvt = n; |
| THTensor_(resize1d)(rs_,k); |
| THTensor_(resize2d)(rv_,ldvt,n); |
| if (*jobu == 'A') |
| { |
| THTensor_(resize2d)(ru_,m,ldu); |
| } |
| else |
| { |
| THTensor_(resize2d)(ru_,k,ldu); |
| } |
| THTensor_(transpose)(ru_,NULL,0,1); |
| /* we want to return V not VT*/ |
| /*THTensor_(transpose)(rv_,NULL,0,1);*/ |
| |
| THLapack_(gesvd)(jobu[0],jobu[0], |
| m,n,THTensor_(data)(ra__),lda, |
| THTensor_(data)(rs_), |
| THTensor_(data)(ru_), |
| ldu, |
| THTensor_(data)(rv_), ldvt, |
| &wkopt, -1, &info); |
| lwork = (int)wkopt; |
| work = THTensor_(newWithSize1d)(lwork); |
| THLapack_(gesvd)(jobu[0],jobu[0], |
| m,n,THTensor_(data)(ra__),lda, |
| THTensor_(data)(rs_), |
| THTensor_(data)(ru_), |
| ldu, |
| THTensor_(data)(rv_), ldvt, |
| THTensor_(data)(work),lwork, &info); |
| if (info > 0) |
| { |
| THError(" Lapack gesvd : %d superdiagonals failed to converge.",info); |
| } |
| else if (info < 0) |
| { |
| THError("Lapack gesvd : Argument %d : illegal value", -info); |
| } |
| |
| /* clean up */ |
| if (destroy) |
| { |
| if (clonea) |
| { |
| THTensor_(copy)(ra_,ra__); |
| } |
| THTensor_(free)(ra__); |
| } |
| THTensor_(free)(work); |
| } |
| |
| TH_API void THTensor_(getri)(THTensor *ra_, THTensor *a) |
| { |
| int m, n, lda, info, lwork; |
| real wkopt; |
| THIntTensor *ipiv; |
| THTensor *work; |
| THTensor *ra__; |
| |
| int clonea; |
| int destroy; |
| |
| if (a == NULL) /* possibly destroy the inputs */ |
| { |
| ra__ = THTensor_(new)(); |
| clonea = THTensor_(lapackClone)(ra__,ra_,0); |
| destroy = 1; |
| } |
| else /*we want to definitely clone */ |
| { |
| clonea = THTensor_(lapackClone)(ra_,a,1); |
| ra__ = ra_; |
| destroy = 0; |
| } |
| |
| THArgCheck(ra__->nDimension == 2, 2, "A should be 2 dimensional"); |
| m = ra__->size[0]; |
| n = ra__->size[1]; |
| THArgCheck(m == n, 2, "A should be square"); |
| lda = m; |
| ipiv = THIntTensor_newWithSize1d((long)m); |
| |
| /* Run LU */ |
| THLapack_(getrf)(n, n, THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), &info); |
| if (info > 0) |
| { |
| THError("Lapack getrf : U(%d,%d) is 0, U is singular",info, info); |
| } |
| else if (info < 0) |
| { |
| THError("Lapack getrf : Argument %d : illegal value", -info); |
| } |
| |
| /* Run inverse */ |
| THLapack_(getri)(n, THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), &wkopt, -1, &info); |
| lwork = (int)wkopt; |
| work = THTensor_(newWithSize1d)(lwork); |
| THLapack_(getri)(n, THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), THTensor_(data)(work), lwork, &info); |
| if (info > 0) |
| { |
| THError("Lapack getri : U(%d,%d) is 0, U is singular",info, info); |
| } |
| else if (info < 0) |
| { |
| THError("Lapack getri : Argument %d : illegal value", -info); |
| } |
| |
| /* clean up */ |
| if (destroy) |
| { |
| if (clonea) |
| { |
| THTensor_(copy)(ra_,ra__); |
| } |
| THTensor_(free)(ra__); |
| } |
| THTensor_(free)(work); |
| THIntTensor_free(ipiv); |
| } |
| |
| #endif |