Revert D26723384: [pytorch][PR] Implements `torch.linalg.lstsq`
Test Plan: revert-hammer
Differential Revision:
D26723384 (https://github.com/pytorch/pytorch/commit/3ac90132351b3dab51d6ecede52db62e101257e1)
Original commit changeset: c9866a95f140
fbshipit-source-id: 3e5263d71facdc91ca09d7dcbbbe3ba818ee2821
diff --git a/aten/src/ATen/native/BatchLinearAlgebra.cpp b/aten/src/ATen/native/BatchLinearAlgebra.cpp
index 5b0ab41..8df4322 100644
--- a/aten/src/ATen/native/BatchLinearAlgebra.cpp
+++ b/aten/src/ATen/native/BatchLinearAlgebra.cpp
@@ -122,78 +122,6 @@
extern "C" void cgetrs_(char *trans, int *n, int *nrhs, std::complex<float> *a, int *lda, int *ipiv, std::complex<float> *b, int *ldb, int *info);
extern "C" void dgetrs_(char *trans, int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, int *info);
extern "C" void sgetrs_(char *trans, int *n, int *nrhs, float *a, int *lda, int *ipiv, float *b, int *ldb, int *info);
-
-// gels
-extern "C" void zgels_(char *trans, int *m, int *n, int *nrhs,
- std::complex<double> *a, int *lda, std::complex<double> *b, int *ldb,
- std::complex<double> *work, int *lwork, int *info);
-extern "C" void cgels_(char *trans, int *m, int *n, int *nrhs,
- std::complex<float> *a, int *lda, std::complex<float> *b, int *ldb,
- std::complex<float> *work, int *lwork, int *info);
-extern "C" void dgels_(char *trans, int *m, int *n, int *nrhs,
- double *a, int *lda, double *b, int *ldb,
- double *work, int *lwork, int *info);
-extern "C" void sgels_(char *trans, int *m, int *n, int *nrhs,
- float *a, int *lda, float *b, int *ldb,
- float *work, int *lwork, int *info);
-
-// gelsd
-extern "C" void zgelsd_(int *m, int *n, int *nrhs,
- std::complex<double> *a, int *lda, std::complex<double> *b, int *ldb,
- double *s, double *rcond, int *rank,
- std::complex<double> *work, int *lwork, double *rwork, int *iwork, int *info);
-extern "C" void cgelsd_(int *m, int *n, int *nrhs,
- std::complex<float> *a, int *lda, std::complex<float> *b, int *ldb,
- float *s, float *rcond, int *rank,
- std::complex<float> *work, int *lwork, float *rwork, int *iwork, int *info);
-extern "C" void dgelsd_(int *m, int *n, int *nrhs,
- double *a, int *lda, double *b, int *ldb,
- double *s, double *rcond, int *rank,
- double *work, int *lwork, int *iwork, int *info);
-extern "C" void sgelsd_(int *m, int *n, int *nrhs,
- float *a, int *lda, float *b, int *ldb,
- float *s, float *rcond, int *rank,
- float *work, int *lwork, int *iwork, int *info);
-
-// gelsy
-extern "C" void zgelsy_(int *m, int *n, int *nrhs,
- std::complex<double> *a, int *lda, std::complex<double> *b, int *ldb,
- int *jpvt, double *rcond, int *rank,
- std::complex<double> *work, int *lwork,
- double *rwork, int *info);
-extern "C" void cgelsy_(int *m, int *n, int *nrhs,
- std::complex<float> * a, int *lda, std::complex<float> *b, int *ldb,
- int *jpvt, float *rcond, int *rank,
- std::complex<float> *work, int *lwork,
- float *rwork, int *info);
-extern "C" void dgelsy_(int *m, int *n, int *nrhs,
- double *a, int *lda, double *b, int *ldb,
- int *jpvt, double *rcond, int *rank,
- double *work, int *lwork, int *info);
-extern "C" void sgelsy_(int *m, int *n, int *nrhs,
- float *a, int *lda, float *b, int *ldb,
- int *jpvt, float *rcond, int *rank,
- float *work, int *lwork, int *info);
-
-// gelss
-extern "C" void zgelss_(int *m, int *n, int *nrhs,
- std::complex<double> *a, int *lda, std::complex<double> *b, int *ldb,
- double *s, double *rcond, int *rank,
- std::complex<double> *work, int *lwork,
- double *rwork, int *info);
-extern "C" void cgelss_(int *m, int *n, int *nrhs,
- std::complex<float> *a, int *lda, std::complex<float> *b, int *ldb,
- float *s, float *rcond, int *rank,
- std::complex<float> *work, int *lwork,
- float *rwork, int *info);
-extern "C" void dgelss_(int *m, int *n, int *nrhs,
- double *a, int *lda, double *b, int *ldb,
- double *s, double *rcond, int *rank,
- double *work, int *lwork, int *info);
-extern "C" void sgelss_(int *m, int *n, int *nrhs,
- float *a, int *lda, float *b, int *ldb,
- float *s, float *rcond, int *rank,
- float *work, int *lwork, int *info);
#endif
namespace at {
@@ -236,126 +164,6 @@
template<class scalar_t>
void lapackLuSolve(char trans, int n, int nrhs, scalar_t *a, int lda, int *ipiv, scalar_t *b, int ldb, int *info);
-template<class scalar_t>
-void lapackGels(char trans, int m, int n, int nrhs,
- scalar_t *a, int lda, scalar_t *b, int ldb,
- scalar_t *work, int lwork, int *info);
-
-template<class scalar_t, class value_t = scalar_t>
-void lapackGelsd(int m, int n, int nrhs,
- scalar_t *a, int lda, scalar_t *b, int ldb,
- value_t *s, value_t rcond, int *rank,
- scalar_t* work, int lwork,
- value_t *rwork, int* iwork, int *info);
-
-template<class scalar_t, class value_t = scalar_t>
-void lapackGelsy(int m, int n, int nrhs,
- scalar_t *a, int lda, scalar_t *b, int ldb,
- int *jpvt, value_t rcond, int *rank,
- scalar_t *work, int lwork, value_t* rwork, int *info);
-
-template<class scalar_t, class value_t = scalar_t>
-void lapackGelss(int m, int n, int nrhs,
- scalar_t *a, int lda, scalar_t *b, int ldb,
- value_t *s, value_t rcond, int *rank,
- scalar_t *work, int lwork,
- value_t *rwork, int *info);
-
-enum class LapackLstsqDriverType : int64_t { Gels, Gelsd, Gelsy, Gelss};
-
-template<LapackLstsqDriverType, class scalar_t, class value_t = scalar_t>
-struct lapackLstsq_impl;
-
-template<class scalar_t, class value_t>
-struct lapackLstsq_impl<LapackLstsqDriverType::Gels, scalar_t, value_t> {
- static void call(
- char trans, int m, int n, int nrhs,
- scalar_t *a, int lda, scalar_t *b, int ldb,
- scalar_t *work, int lwork, int *info, // Gels flavor
- int *jpvt, value_t rcond, int *rank, value_t* rwork, // Gelsy flavor
- value_t *s, // Gelss flavor
- int *iwork // Gelsd flavor
- ) {
- lapackGels<scalar_t>(
- trans, m, n, nrhs,
- a, lda, b, ldb,
- work, lwork, info);
- }
-};
-
-template<class scalar_t, class value_t>
-struct lapackLstsq_impl<LapackLstsqDriverType::Gelsy, scalar_t, value_t> {
- static void call(
- char trans, int m, int n, int nrhs,
- scalar_t *a, int lda, scalar_t *b, int ldb,
- scalar_t *work, int lwork, int *info, // Gels flavor
- int *jpvt, value_t rcond, int *rank, value_t* rwork, // Gelsy flavor
- value_t *s, // Gelss flavor
- int *iwork // Gelsd flavor
- ) {
- lapackGelsy<scalar_t, value_t>(
- m, n, nrhs,
- a, lda, b, ldb,
- jpvt, rcond, rank,
- work, lwork, rwork, info);
- }
-};
-
-template<class scalar_t, class value_t>
-struct lapackLstsq_impl<LapackLstsqDriverType::Gelsd, scalar_t, value_t> {
- static void call(
- char trans, int m, int n, int nrhs,
- scalar_t *a, int lda, scalar_t *b, int ldb,
- scalar_t *work, int lwork, int *info, // Gels flavor
- int *jpvt, value_t rcond, int *rank, value_t* rwork, // Gelsy flavor
- value_t *s, // Gelss flavor
- int *iwork // Gelsd flavor
- ) {
- lapackGelsd<scalar_t, value_t>(
- m, n, nrhs,
- a, lda, b, ldb,
- s, rcond, rank,
- work, lwork,
- rwork, iwork, info);
- }
-};
-
-template<class scalar_t, class value_t>
-struct lapackLstsq_impl<LapackLstsqDriverType::Gelss, scalar_t, value_t> {
- static void call(
- char trans, int m, int n, int nrhs,
- scalar_t *a, int lda, scalar_t *b, int ldb,
- scalar_t *work, int lwork, int *info, // Gels flavor
- int *jpvt, value_t rcond, int *rank, value_t* rwork, // Gelsy flavor
- value_t *s, // Gelss flavor
- int *iwork // Gelsd flavor
- ) {
- lapackGelss<scalar_t, value_t>(
- m, n, nrhs,
- a, lda, b, ldb,
- s, rcond, rank,
- work, lwork,
- rwork, info);
- }
-};
-
-template<LapackLstsqDriverType driver_type, class scalar_t, class value_t = scalar_t>
-void lapackLstsq(
- char trans, int m, int n, int nrhs,
- scalar_t *a, int lda, scalar_t *b, int ldb,
- scalar_t *work, int lwork, int *info, // Gels flavor
- int *jpvt, value_t rcond, int *rank, value_t* rwork, // Gelsy flavor
- value_t *s, // Gelss flavor
- int *iwork // Gelsd flavor
- ) {
- lapackLstsq_impl<driver_type, scalar_t, value_t>::call(
- trans, m, n, nrhs,
- a, lda, b, ldb,
- work, lwork, info,
- jpvt, rcond, rank, rwork,
- s,
- iwork);
-}
template<> void lapackSolve<c10::complex<double>>(int n, int nrhs, c10::complex<double> *a, int lda, int *ipiv, c10::complex<double> *b, int ldb, int *info) {
zgesv_(&n, &nrhs, reinterpret_cast<std::complex<double>*>(a), &lda, ipiv, reinterpret_cast<std::complex<double>*>(b), &ldb, info);
@@ -614,196 +422,6 @@
template<> void lapackLuSolve<float>(char trans, int n, int nrhs, float *a, int lda, int *ipiv, float *b, int ldb, int *info) {
sgetrs_(&trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, info);
}
-
-template<> void lapackGels<c10::complex<double>>(
- char trans, int m, int n, int nrhs,
- c10::complex<double> *a, int lda, c10::complex<double> *b, int ldb,
- c10::complex<double> *work, int lwork, int *info) {
- zgels_(&trans, &m, &n, &nrhs,
- reinterpret_cast<std::complex<double>*>(a), &lda,
- reinterpret_cast<std::complex<double>*>(b), &ldb,
- reinterpret_cast<std::complex<double>*>(work), &lwork, info);
-}
-
-template<> void lapackGels<c10::complex<float>>(
- char trans, int m, int n, int nrhs,
- c10::complex<float> *a, int lda, c10::complex<float> *b, int ldb,
- c10::complex<float> *work, int lwork, int *info) {
- cgels_(&trans, &m, &n, &nrhs,
- reinterpret_cast<std::complex<float>*>(a), &lda,
- reinterpret_cast<std::complex<float>*>(b), &ldb,
- reinterpret_cast<std::complex<float>*>(work), &lwork, info);
-}
-
-template<> void lapackGels<double>(
- char trans, int m, int n, int nrhs,
- double *a, int lda, double *b, int ldb,
- double *work, int lwork, int *info) {
- dgels_(&trans, &m, &n, &nrhs,
- a, &lda, b, &ldb, work, &lwork, info);
-}
-
-template<> void lapackGels<float>(
- char trans, int m, int n, int nrhs,
- float *a, int lda, float *b, int ldb,
- float *work, int lwork, int *info) {
- sgels_(&trans, &m, &n, &nrhs,
- a, &lda, b, &ldb, work, &lwork, info);
-}
-
-template<> void lapackGelsd<c10::complex<double>, double>(
- int m, int n, int nrhs,
- c10::complex<double> *a, int lda, c10::complex<double> *b, int ldb,
- double *s, double rcond, int *rank,
- c10::complex<double> *work, int lwork,
- double *rwork, int *iwork, int *info) {
- zgelsd_(&m, &n, &nrhs,
- reinterpret_cast<std::complex<double>*>(a), &lda,
- reinterpret_cast<std::complex<double>*>(b), &ldb,
- s, &rcond, rank,
- reinterpret_cast<std::complex<double>*>(work), &lwork,
- rwork, iwork, info);
-}
-
-template<> void lapackGelsd<c10::complex<float>, float>(
- int m, int n, int nrhs,
- c10::complex<float> *a, int lda, c10::complex<float> *b, int ldb,
- float *s, float rcond, int *rank,
- c10::complex<float> *work, int lwork,
- float *rwork, int *iwork, int *info) {
- cgelsd_(&m, &n, &nrhs,
- reinterpret_cast<std::complex<float>*>(a), &lda,
- reinterpret_cast<std::complex<float>*>(b), &ldb,
- s, &rcond, rank,
- reinterpret_cast<std::complex<float>*>(work), &lwork,
- rwork, iwork, info);
-}
-
-template<> void lapackGelsd<double>(
- int m, int n, int nrhs,
- double *a, int lda, double *b, int ldb,
- double *s, double rcond, int *rank,
- double *work, int lwork,
- double *rwork, int *iwork, int *info) {
- dgelsd_(&m, &n, &nrhs,
- a, &lda, b, &ldb,
- s, &rcond, rank,
- work, &lwork, iwork, info);
-}
-
-template<> void lapackGelsd<float>(
- int m, int n, int nrhs,
- float *a, int lda, float *b, int ldb,
- float *s, float rcond, int *rank,
- float *work, int lwork,
- float *rwork, int *iwork, int *info) {
- sgelsd_(&m, &n, &nrhs,
- a, &lda, b, &ldb,
- s, &rcond, rank,
- work, &lwork, iwork, info);
-}
-
-template<> void lapackGelsy<c10::complex<double>, double>(
- int m, int n, int nrhs,
- c10::complex<double> *a, int lda, c10::complex<double> *b, int ldb,
- int *jpvt, double rcond, int *rank,
- c10::complex<double> *work, int lwork, double *rwork, int *info) {
- zgelsy_(&m, &n, &nrhs,
- reinterpret_cast<std::complex<double>*>(a), &lda,
- reinterpret_cast<std::complex<double>*>(b), &ldb,
- jpvt, &rcond, rank,
- reinterpret_cast<std::complex<double>*>(work), &lwork,
- rwork, info);
-}
-
-template<> void lapackGelsy<c10::complex<float>, float>(
- int m, int n, int nrhs,
- c10::complex<float> *a, int lda, c10::complex<float> *b, int ldb,
- int *jpvt, float rcond, int *rank,
- c10::complex<float> *work, int lwork, float *rwork, int *info) {
- cgelsy_(&m, &n, &nrhs,
- reinterpret_cast<std::complex<float>*>(a), &lda,
- reinterpret_cast<std::complex<float>*>(b), &ldb,
- jpvt, &rcond, rank,
- reinterpret_cast<std::complex<float>*>(work), &lwork,
- rwork, info);
-}
-
-template<> void lapackGelsy<double>(
- int m, int n, int nrhs,
- double *a, int lda, double *b, int ldb,
- int *jpvt, double rcond, int *rank,
- double *work, int lwork, double *rwork, int *info) {
- dgelsy_(&m, &n, &nrhs,
- a, &lda, b, &ldb,
- jpvt, &rcond, rank,
- work, &lwork, info);
-}
-
-template<> void lapackGelsy<float>(
- int m, int n, int nrhs,
- float *a, int lda, float *b, int ldb,
- int *jpvt, float rcond, int *rank,
- float *work, int lwork, float *rwork, int *info) {
- sgelsy_(&m, &n, &nrhs,
- a, &lda, b, &ldb,
- jpvt, &rcond, rank,
- work, &lwork, info);
-}
-
-template<> void lapackGelss<c10::complex<double>, double>(
- int m, int n, int nrhs,
- c10::complex<double> *a, int lda, c10::complex<double> *b, int ldb,
- double *s, double rcond, int *rank,
- c10::complex<double> *work, int lwork,
- double *rwork, int *info
- ) {
- zgelss_(&m, &n, &nrhs,
- reinterpret_cast<std::complex<double>*>(a), &lda,
- reinterpret_cast<std::complex<double>*>(b), &ldb,
- s, &rcond, rank,
- reinterpret_cast<std::complex<double>*>(work), &lwork,
- rwork, info);
-}
-
-template<> void lapackGelss<c10::complex<float>, float>(
- int m, int n, int nrhs,
- c10::complex<float> *a, int lda, c10::complex<float> *b, int ldb,
- float *s, float rcond, int *rank,
- c10::complex<float> *work, int lwork,
- float *rwork, int *info
- ) {
- cgelss_(&m, &n, &nrhs,
- reinterpret_cast<std::complex<float>*>(a), &lda,
- reinterpret_cast<std::complex<float>*>(b), &ldb,
- s, &rcond, rank,
- reinterpret_cast<std::complex<float>*>(work), &lwork,
- rwork, info);
-}
-
-template<> void lapackGelss<double>(
- int m, int n, int nrhs,
- double *a, int lda, double *b, int ldb,
- double *s, double rcond, int *rank,
- double *work, int lwork,
- double *rwork, int *info) {
- dgelss_(&m, &n, &nrhs,
- a, &lda, b, &ldb,
- s, &rcond, rank,
- work, &lwork, info);
-}
-
-template<> void lapackGelss<float>(
- int m, int n, int nrhs,
- float *a, int lda, float *b, int ldb,
- float *s, float rcond, int *rank,
- float *work, int lwork,
- float *rwork, int *info) {
- sgelss_(&m, &n, &nrhs,
- a, &lda, b, &ldb,
- s, &rcond, rank,
- work, &lwork, info);
-}
#endif
// Below of the definitions of the functions operating on a batch that are going to be dispatched
@@ -2306,474 +1924,6 @@
return std::tuple<Tensor&, Tensor&, Tensor&>(U, S, VT);
}
-// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lstsq ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#ifdef USE_LAPACK
-template<class scalar_t, class value_t, class func_t>
-struct LapackLstsqHelper {
- using self_type = LapackLstsqHelper;
-
- // we use `driver_type` to decide how to initialize
- // relevant to specific drivers parameters
- LapackLstsqDriverType driver_type;
- func_t driver;
-
- bool is_complex;
- at::ScalarType scalar_type;
- IntArrayRef batch_shape;
- // the strides below store the offsets to different lstsq problems in a batch
- int64_t a_stride;
- int64_t b_stride;
- int64_t s_stride;
-
- // variables below correspond to LAPACK inputs.
- // for more information check the LAPACK documentation on
- // `?gels`, `?gelsy`, `?gelsd`, `?gelss`
- char trans;
- int m;
- int n;
- int nrhs;
- scalar_t* a_working_ptr = nullptr;
- int lda;
- scalar_t* b_working_ptr = nullptr;
- int ldb;
- Tensor work;
- scalar_t work_opt; // used to decide the opt `work` size with lwork=-1
- scalar_t* work_ptr = &work_opt;
- int lwork = -1; // default value to decide the opt size for workspace arrays
- int info = 0;
- Tensor jpvt;
- int* jpvt_ptr = nullptr;
- value_t rcond;
- Tensor rank, rank_1d;
- int rank_opt;
- int64_t* rank_working_ptr = nullptr;
- Tensor rwork;
- value_t rwork_opt; // used to decide the opt `rwork` size with lwork=-1
- value_t* rwork_ptr = &rwork_opt;
- Tensor s, s_2d;
- value_t* s_working_ptr = nullptr;
- Tensor iwork;
- int iwork_opt; // used to decide the opt `iwork` size with lwork=-1
- int* iwork_ptr = &iwork_opt;
-
- LapackLstsqHelper(LapackLstsqDriverType driver_type, func_t driver)
- : driver_type{driver_type}, driver{driver}
- {}
-
- self_type& set_trans(char trans) { this->trans = trans; return *this; }
- self_type& set_m(int m) { this->m = m; return *this; }
- self_type& set_n(int n) { this->n = n; return *this; }
- self_type& set_nrhs(int nrhs) { this->nrhs = nrhs; return *this; }
- self_type& set_a(const Tensor& a) {
- this->a_working_ptr = a.data_ptr<scalar_t>();
- this->scalar_type = a.scalar_type();
- this->is_complex = a.is_complex();
- // `a` is persistent, should be safe to store its properties in references.
- this->batch_shape = IntArrayRef(a.sizes().data(), a.dim() - 2);
- this->a_stride = matrixStride(a);
- return *this;
- }
- self_type& set_lda(int lda) { this->lda = lda; return *this; }
- self_type& set_b(const Tensor& b) {
- this->b_working_ptr = b.data_ptr<scalar_t>();
- this->b_stride = matrixStride(b);
- return *this;
- }
- self_type& set_ldb(int ldb) { this->ldb = ldb; return *this; }
- self_type& set_work() {
- lwork = static_cast<int>(real_impl<scalar_t, value_t>(work_opt));
- work = at::empty({lwork}, scalar_type);
- work_ptr = work.data_ptr<scalar_t>();
- return *this;
- }
- self_type& set_jpvt() {
- // handle `jpvt` workspace array (relevant for `?gelsy` which uses
- // a QR factorization with column pivoting).
- if (LapackLstsqDriverType::Gelsy == driver_type) {
- jpvt = at::empty({std::max<int64_t>(1, n)}, at::kInt);
- jpvt_ptr = jpvt.data_ptr<int>();
- }
- return *this;
- }
- self_type& set_rcond(double cond) { this->rcond = static_cast<value_t>(cond); return *this; }
- self_type& set_rank() {
- // only `?gels` is not rank-revealing
- if (LapackLstsqDriverType::Gels != driver_type) {
- if (!batch_shape.size()) {
- rank = at::empty({}, at::kLong);
- }
- else {
- rank = at::empty(batch_shape, at::kLong);
- }
- rank_1d = rank.view({-1});
- rank_working_ptr = rank.data_ptr<int64_t>();
- }
- return *this;
- }
- self_type& set_rwork() {
- // `rwork` only makes sense for complex flavors and
- // `?gelsy`, `?gelsd` and `?gelss` drivers
- if (!this->is_complex || LapackLstsqDriverType::Gels == driver_type) {
- return *this;
- }
-
- int64_t rwork_len;
- switch (this->driver_type) {
- case LapackLstsqDriverType::Gelsy:
- rwork_len = std::max<int64_t>(1, 2 * n);
- break;
- case LapackLstsqDriverType::Gelss:
- rwork_len = std::max<int64_t>(1, 5 * std::min(m, n));
- break;
- // case LapackLstsqDriverType::Gelsd:
- default:
- rwork_len = static_cast<int64_t>(rwork_opt);
- }
- rwork = at::empty({rwork_len}, c10::toValueType(scalar_type));
- rwork_ptr = rwork.data_ptr<value_t>();
- return *this;
- }
- self_type& set_s() {
- // `?gelsd` and `?gelss` are SVD-based
- // so we can extract singular values from them.
- if (LapackLstsqDriverType::Gelsd == driver_type
- || LapackLstsqDriverType::Gelss == driver_type) {
- auto s_shape = batch_shape.vec();
- s_shape.push_back(std::min(m, n));
- s = at::empty(s_shape, c10::toValueType(scalar_type));
- s_working_ptr = s.data_ptr<value_t>();
- s_stride = s.size(-1);
- s_2d = s.view({-1, std::min(m, n)});
- }
- return *this;
- }
- self_type& set_iwork() {
- // handle `iwork` workspace array (relevant only for `?gelsd`)
- if (LapackLstsqDriverType::Gelsd == driver_type) {
- iwork = at::empty({iwork_opt}, at::kInt);
- iwork_ptr = iwork.data_ptr<int>();
- }
- return *this;
- }
-
- self_type& call_driver() {
- driver(trans, m, n, nrhs,
- a_working_ptr, lda,
- b_working_ptr, ldb,
- work_ptr, lwork,
- &info,
- jpvt_ptr,
- rcond,
- &rank_opt,
- rwork_ptr,
- s_working_ptr,
- iwork_ptr);
- // we want the output `rank` Tensor to be of type int64_t,
- // however LAPACK accepts int. That is why we use an integer
- // variable that then gets promoted and written into `rank`.
- // We use this approach over a tensor cast for better performance.
- if (rank_working_ptr) {
- *rank_working_ptr = static_cast<int64_t>(rank_opt);
- }
- return *this;
- }
-
- self_type& next() {
- // advance to the next problem in a batch.
- // Should only be used if a.shape[:-2] == b.shape[:-2]
- a_working_ptr += a_stride;
- b_working_ptr += b_stride;
- rank_working_ptr = rank_working_ptr ? rank_working_ptr + 1 : nullptr;
- s_working_ptr = s_working_ptr ? s_working_ptr + s_stride : nullptr;
- return *this;
- }
-
- self_type& next(scalar_t* a_working_ptr, scalar_t* b_working_ptr,
- int64_t a_linear_batch_idx) {
- // advance to the next problem in a batch.
- // Designed to be used with the `batch_iterator_with_broadcasting` method.
- this->a_working_ptr = a_working_ptr;
- this->b_working_ptr = b_working_ptr;
- rank_working_ptr = rank_working_ptr
- ? rank_1d.select(0, a_linear_batch_idx).template data_ptr<int64_t>()
- : nullptr;
- s_working_ptr = s_working_ptr
- ? s_2d.select(0, a_linear_batch_idx).template data_ptr<value_t>()
- : nullptr;
- return *this;
- }
-};
-
-// we use `enum class LapackLstsqDriverType` as keys in an unordered_map.
-// Clang5 and Gcc5 do not support std::hash for enum classes, hence
-// we provide our own hash function.
-struct LapackLstsqDriverTypeHash {
- std::size_t operator()(const LapackLstsqDriverType& driver_type) const {
- return static_cast<std::size_t>(driver_type);
- }
-};
-#endif
-
-std::tuple<Tensor, Tensor, Tensor> _lstsq_helper_cpu(
- const Tensor& a, const Tensor& b, double cond, c10::optional<std::string> driver_name) {
-#ifndef USE_LAPACK
- AT_ERROR("torch.linalg.lstsq: LAPACK library not found in compilation");
-#else
-
- static auto driver_string_to_type = std::unordered_map<std::string, LapackLstsqDriverType>({
- {"gels", LapackLstsqDriverType::Gels},
- {"gelsy", LapackLstsqDriverType::Gelsy},
- {"gelsd", LapackLstsqDriverType::Gelsd},
- {"gelss", LapackLstsqDriverType::Gelss}
- });
- // driver_name is never nullopt for CPU
- auto driver_str = driver_name.value();
- auto driver_type = driver_string_to_type[driver_str];
-
- Tensor rank;
- Tensor singular_values;
-
- AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES(a.scalar_type(), "torch.linalg.lstsq_cpu", [&] {
- using value_t = typename c10::scalar_value_type<scalar_t>::type;
-
- auto driver = lapackLstsq<LapackLstsqDriverType::Gelsd, scalar_t, value_t>;
- static auto driver_type_to_func
- = std::unordered_map<LapackLstsqDriverType, decltype(driver), LapackLstsqDriverTypeHash>({
- {LapackLstsqDriverType::Gels, lapackLstsq<LapackLstsqDriverType::Gels, scalar_t, value_t>},
- {LapackLstsqDriverType::Gelsy, lapackLstsq<LapackLstsqDriverType::Gelsy, scalar_t, value_t>},
- {LapackLstsqDriverType::Gelsd, lapackLstsq<LapackLstsqDriverType::Gelsd, scalar_t, value_t>},
- {LapackLstsqDriverType::Gelss, lapackLstsq<LapackLstsqDriverType::Gelss, scalar_t, value_t>}
- });
- driver = driver_type_to_func[driver_type];
-
- auto m = a.size(-2);
- auto n = a.size(-1);
- auto nrhs = b.size(-1);
- auto driver_helper = LapackLstsqHelper<scalar_t, value_t, decltype(driver)>(driver_type, driver)
- .set_trans('N')
- .set_m(m)
- .set_n(n)
- .set_nrhs(nrhs)
- .set_a(a)
- .set_lda(std::max<int64_t>(1, m))
- .set_b(b)
- .set_ldb(std::max<int64_t>(1, std::max(m, n)))
- .set_jpvt()
- .set_rcond(cond)
- .set_rank()
- .set_s()
- .call_driver() // initial call to deduce optimal sizes for workspace arrays
- .set_work()
- .set_rwork()
- .set_iwork();
-
- // If batch dims for `a` and `b` are equivalent, i.e.
- // a.shape[:-2] == b.shape[:-2], the call to `batch_iterator_with_broadcasting`
- // is equivalent to:
- // for (int64_t i = 0; i < batchCount(a); ++i) {
- // driver_helper.call_driver().next();
- // if (driver_helper.info) {
- // break;
- // }
- // }
- // which does correspond to a batch-wise iteration for methods that do not
- // broadcast with size expansion over batch dimensions.
- batch_iterator_with_broadcasting<scalar_t>(a, b,
- [&](scalar_t* a_working_ptr, scalar_t* b_working_ptr,
- int64_t a_linear_batch_idx) {
- driver_helper.next(a_working_ptr, b_working_ptr, a_linear_batch_idx)
- .call_driver();
- singleCheckErrors(driver_helper.info, "torch.linalg.lstsq_cpu");
- }
- );
-
- rank = driver_helper.rank;
- singular_values = driver_helper.s;
- });
-
- return std::make_tuple(b, rank, singular_values);
-#endif
-}
-
-std::tuple<Tensor, Tensor, Tensor, Tensor> linalg_lstsq(
- const Tensor& self, const Tensor& b,
- c10::optional<double> cond,
- c10::optional<std::string> driver) {
- TORCH_CHECK(
- self.device().type() == b.device().type(),
- "torch.linalg.lstsq: input tensors should be on the same device"
- );
- TORCH_CHECK(
- self.scalar_type() == b.scalar_type(),
- "torch.linalg.lstsq: input tensors should be of the same dtype"
- );
- TORCH_CHECK(
- self.dim() >= 2,
- "torch.linalg.lstsq: input `self` Tensor should be at least 2D"
- );
- TORCH_CHECK(
- b.dim() >= 1,
- "torch.linalg.lstsq: input `b` Tensor should be at least 1D"
- );
- auto dim_diff = self.dim() - b.dim();
- TORCH_CHECK(
- 0 <= dim_diff && dim_diff <= 1,
- "torch.linalg.lstsq: self.dim() must be greater or equal to b.dim() and "
- "(self.dim() - b.dim()) <= 1"
- );
- Tensor b_2d = dim_diff ? b.unsqueeze(-1) : b;
- TORCH_CHECK(
- self.size(-2) == b_2d.size(-2),
- dim_diff ? "torch.linalg.lstsq: self.size(-2) should match b.size(-1)" :
- "torch.linalg.lstsq: self.size(-2) should match b.size(-2)"
- );
-
- // if `driver` is empty, we use `driver_opt` to be set to
- // c10::nullopt if working with CUDA tensors,
- // otherwise to "gelsy" driver.
- // CUDA tensors are treated specially because MAGMA
- // has only 'gels' driver supported.
- c10::optional<std::string> driver_opt = driver;
- // check whether the user provided name is a valid driver name
- if (driver.has_value()) {
- auto driver_str = driver.value();
- // convert `driver_str` to lower case inplace.
- std::transform(driver_str.begin(), driver_str.end(), driver_str.begin(),
- [](unsigned char c) { return std::tolower(c); });
- static std::unordered_set<std::string> allowed_drivers = {
- "gels", "gelsy", "gelsd", "gelss"
- };
- if (at::kCPU == self.device().type()) {
- TORCH_CHECK(
- allowed_drivers.find(driver_str) != allowed_drivers.end(),
- "torch.linalg.lstsq: parameter `driver` should be one of "
- "(gels, gelsy, gelsd, gelss)"
- );
- }
- //else if (at::kCUDA == self.device().type()) {
- else {
- TORCH_CHECK(
- driver_str == "gels",
- "torch.linalg.lstsq: `driver` other than `gels` is not supported on CUDA"
- );
- }
- }
- // if driver name is not provided, set to default 'gelsy' if on CPU,
- // or to `gels` if on CUDA.
- else {
- driver_opt = (at::kCPU == self.device().type())
- ? c10::optional<std::string>("gelsy")
- : c10::optional<std::string>("gels");
- }
-
- // CUDA has only `gels` driver now which ONLY works with overdetermined systems
- if (at::kCUDA == self.device().type()) {
- TORCH_CHECK(
- self.size(-2) >= self.size(-1),
- "torch.linalg.lstsq: only overdetermined systems (m >= n) are allowed on CUDA"
- );
- }
-
- // LAPACK/MAGMA requries inputs to be in the column-major-order.
- auto self_working_copy = copyBatchedColumnMajor(self);
-
- // Tensor b must be of size (..., max(m, n), nrhs)
- // and in the column-major order.
- // We allow the batch dims of `self` to broadcast over the batch
- // dims of `b` so that it is possible to solve multiple systems with
- // with the same lhs (encoded by `self`) / rhs (encoded by `b`).
- // `b_working_copy` is modified in-place and the combination of
- // batch broadcasting plus LAPACK/MAGMA requirements impose the following
- // restrictions on sizes/strides of `b`:
- // 1. b.size = (broadcasted_batch_size(self, b), max(m, n), nrhs).
- // 2. b.stride should correspond to an almost contiguous Tensor in the column-major-order,
- // i.e. b.stride = b.transpose(-2, -1).contiguous().transpose(-2, -1).strides()
- auto m = self.size(-2);
- auto n = self.size(-1);
- auto b_working_copy = copyBatchedColumnMajor(b_2d,
- /*nrows=*/std::max(m, n),
- /*desired_batch_sizes=*/broadcast_batch_size(self, b_2d, self.dim() - 2));
-
- double rcond = cond.has_value() && (cond.value() > 0)
- ? cond.value()
- : _get_epsilon(c10::toValueType(self.scalar_type()));
-
- Tensor x, residuals, rank, singular_values;
- // path if neither `self` nor `b` is empty
- if (self.numel() && b.numel()) {
- std::tie(x, rank, singular_values) =
- at::_lstsq_helper(self_working_copy, b_working_copy, rcond, driver_opt);
- if (m > n && driver_opt.value() != "gelsy") {
- residuals = x.narrow(-2, n, std::max(m, n) - n).abs().pow_(2).sum(-2);
- }
- x = x.narrow(-2, 0, n);
- }
- // if either `self` or `b` is empty, return an empty tensor or,
- // if non-zero sizes, return a tensor of zeros.
- else {
- x = b_working_copy.zero_().narrow(-2, 0, n);
- }
-
- auto return_empty_if_undefined = [&self](Tensor& t,
- c10::optional<at::ScalarType> dtype = c10::nullopt,
- c10::optional<std::vector<int64_t>> shape = c10::nullopt) {
- if (t.defined()) {
- return t;
- }
- else {
- auto output_dtype = dtype.has_value() ? dtype.value() : self.scalar_type();
- if (shape.has_value()) {
- return at::empty(shape.value(), self.options().dtype(output_dtype));
- }
- else {
- return at::empty({0}, self.options().dtype(output_dtype));
- }
- }
- };
-
- // Some output stays undefined for some values of driver.
- // Instead of returning undefined tensors which get exposed as
- // Nones in the Python interface, we return empty tensors.
- // This way we follow the convention of output types in the
- // torch.linalg namespace.
- // NOTE: we run drivers only if both inputs are non-empty!
- // Hence the code below explicitly handles each and every output
- // if `self` is empty.
-
- // Numpy and Scipy always return ranks for empty matrices,
- // even for drivers which are not rank-revealing.
- auto batch_sizes = IntArrayRef(self.sizes().data(), self.dim() - 2);
- if (self.numel()) {
- rank = return_empty_if_undefined(rank, at::kLong);
- }
- else {
- rank = at::zeros(batch_sizes, self.options().dtype(at::kLong));
- }
-
- // undefined residuals could only be an empty Tensor of shape (0)
- residuals = return_empty_if_undefined(residuals);
-
- if (!self.numel()
- && (driver_opt.value() == "gelss" || driver_opt.value() == "gelsd")) {
- // when `self` is empty, return singular_values of shape
- // (*self.shape[:-2], 0) only if driver is in ('gelss', 'gelsd')
- auto singular_values_empty_shape = batch_sizes.vec();
- singular_values_empty_shape.push_back(0);
- singular_values = return_empty_if_undefined(
- singular_values,
- at::toValueType(self.scalar_type()),
- singular_values_empty_shape);
- }
- else {
- // otherwise return an empty tensor of shape (0)
- singular_values = return_empty_if_undefined(
- singular_values,
- at::toValueType(self.scalar_type()));
- }
-
- return std::make_tuple(x, residuals, rank, singular_values);
-}
-
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lu_solve ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
template<typename scalar_t>
diff --git a/aten/src/ATen/native/LinearAlgebraUtils.h b/aten/src/ATen/native/LinearAlgebraUtils.h
index 5572a8d..59625d6 100644
--- a/aten/src/ATen/native/LinearAlgebraUtils.h
+++ b/aten/src/ATen/native/LinearAlgebraUtils.h
@@ -4,7 +4,6 @@
#include <ATen/ATen.h>
#include <ATen/ExpandUtils.h>
#include <ATen/TensorUtils.h>
-#include <ATen/native/TensorIterator.h>
#include <limits>
#include <sstream>
#include <cstring>
@@ -33,32 +32,6 @@
}
/*
- * This method is designed to be a faster alternative to
- * `cloneBatchedColumnMajor` with some additional features,
- * namely:
- * 1. It uses `copy` instead of `clone` which could be much faster.
- * 2. `nrows` parameter used to create inputs with the number of rows larger
- * than the original input, which is required for some LAPACK/MAGMA methods.
- * 3. `desired_batch_size` is used to create copies with the batch size
- * which is either the original batch size of the input, or its larger
- * broadcasted shape.
- */
-static inline Tensor copyBatchedColumnMajor(const Tensor& src, int64_t nrows = -1,
- c10::optional<IntArrayRef> desired_batch_sizes = c10::nullopt) {
- nrows = (nrows == -1) ? src.size(-2) : nrows;
- auto copy_sizes = desired_batch_sizes.has_value()
- ? desired_batch_sizes.value().vec()
- : IntArrayRef(src.sizes().data(), src.dim() - 2).vec();
- copy_sizes.insert(copy_sizes.end(), {nrows, src.size(-1)});
- auto copy_strides = at::detail::defaultStrides(copy_sizes);
- copy_strides[src.dim() - 2] = 1;
- copy_strides[src.dim() - 1] = nrows;
- auto copy = at::empty_strided(copy_sizes, copy_strides, src.options());
- copy.narrow(-2, 0, src.size(-2)).copy_(src);
- return copy;
-}
-
-/*
* Given batches of matrices with arbitrary batch dim,
* computes the number of batches.
*/
@@ -75,103 +48,6 @@
return batched_matrices.size(-1) * batched_matrices.size(-2);
}
-// This function is designed to be used with linear algebra methods that minimize
-// L(ax - b) = 0, where L is generally the identity map (`solve`, for example)
-// or the L2 norm (`lstsq`).
-// It is expected that `a` and `b` are contiguous tensors of column-major matrices
-// (so that a.view({-1, a.size(-2), a.size(-1)}) succeeds, same for `b`),
-// with the following additional properties:
-//
-// 1. a.dim() == b.dim()
-// 2. a.shape[:-2] broadcasts over b.shape[:-2]
-// 3. a.size(i) <= b.size(i) for i=0,..., a.dim() - 3 (only for batch dimensions)
-//
-// MAGMA/LAPACK modify tensor `a` in-place, and the main goal of this method
-// is to be memory efficient, which means that if there exists an index i such that
-// a.shape[i] < b.shape[i], 0 <= i <= a.dim() - 3,
-// then instead of materializing copies of `a` in the broadcasted shape, we keep
-// a buffer copy of `a` along with flags that check whether specific batch dimension
-// indices for `a` were already accessed. If they were, we copy the data from the buffer
-// into `a`. The number of copies does not exceed
-// prod(max(a.shape[:-2], b.shape[:-2]) - a.shape[:-2] + 1)
-// and this value is attained by tensors with non-empty batch dimensions.
-//
-// func_t `f` is a callable that is being supplied with
-// scalar_t* a_working_ptr, scalar_t* b_working_ptr, int64_t a_linear_batch_idx.
-// a_working_ptr and b_working_ptr can directly be passed to LAPACK/MAGMA routines,
-// and a_linear_batch_idx is an index in the 3d representation which corresponds to
-// the memory a_working_ptr points to, in other words:
-// a_working_ptr == a.view({-1, a.size(-2), a.size(-1)}.select(0, a_linear_batch_idx).data_ptr<scalar_t>();
-// a_linear_batch_idx is useful to store metadata related to `a`, such as, for example,
-// its rank or singular values (see linalg_lstsq).
-template<typename scalar_t, typename func_t>
-void batch_iterator_with_broadcasting(const Tensor& a, const Tensor& b, const func_t& f) {
- IntArrayRef a_batch_sizes(a.sizes().data(), a.dim() - 2);
- IntArrayRef b_batch_sizes(b.sizes().data(), b.dim() - 2);
-
- auto a_linear_batch_idx = at::arange(batchCount(a)).view(a_batch_sizes);
- auto b_linear_batch_idx = at::arange(batchCount(b)).view(b_batch_sizes);
-
- TensorIterator iter = TensorIteratorConfig()
- .set_check_mem_overlap(false)
- .check_all_same_dtype(false)
- .resize_outputs(false)
- .add_output(b_linear_batch_idx)
- .add_input(a_linear_batch_idx)
- .build();
-
- auto m = a.size(-2);
- auto n = a.size(-1);
- auto a_3d = a.view({batchCount(a), m, n});
- auto b_3d = b.view({batchCount(b), b.size(-2), b.size(-1)});
-
- auto a_broadcasts_over_b = (a_batch_sizes != b_batch_sizes);
- Tensor a_buffer, a_was_accessed, a_buffer_3d;
- std::function<void(int64_t)> check_if_copy_needed_for_a
- = [](int64_t a_curr_linear_batch_idx){};
- if (a_broadcasts_over_b) {
- a_buffer = at::empty_strided(a.sizes(), a.strides(), a.options())
- .copy_(a);
- a_was_accessed = at::zeros(batchCount(a), at::kBool);
- a_buffer_3d = a_buffer.view({batchCount(a), m, n});
- check_if_copy_needed_for_a = [&](int64_t a_curr_linear_batch_idx) {
- auto* a_was_accessed_flag = a_was_accessed
- .select(0, a_curr_linear_batch_idx)
- .data_ptr<bool>();
- if (!(*a_was_accessed_flag)) {
- *a_was_accessed_flag = true;
- }
- else {
- a_3d.select(0, a_curr_linear_batch_idx)
- .copy_(a_buffer_3d.select(0, a_curr_linear_batch_idx));
- }
- };
- }
-
- auto loop = [&](char** data, const int64_t* strides, int64_t nelems) {
- auto* b_batch_idx_ptr = data[0];
- auto* a_batch_idx_ptr = data[1];
-
- for (int64_t elem = 0; elem < nelems; ++elem) {
- auto b_curr_linear_batch_idx = *reinterpret_cast<int64_t*>(b_batch_idx_ptr);
- auto a_curr_linear_batch_idx = *reinterpret_cast<int64_t*>(a_batch_idx_ptr);
-
- check_if_copy_needed_for_a(a_curr_linear_batch_idx);
-
- auto* a_working_ptr = a_3d.select(0, a_curr_linear_batch_idx)
- .data_ptr<scalar_t>();
- auto* b_working_ptr = b_3d.select(0, b_curr_linear_batch_idx)
- .data_ptr<scalar_t>();
- f(a_working_ptr, b_working_ptr, a_curr_linear_batch_idx);
-
- b_batch_idx_ptr += strides[0];
- a_batch_idx_ptr += strides[1];
- }
- };
- iter.serial_for_each(loop, {0, batchCount(b)});
-}
-
-
// Returns the epsilon value for floating types except half
static inline double _get_epsilon(const ScalarType& sc_type) {
switch (sc_type) {
@@ -294,13 +170,6 @@
return std::make_tuple(arg1_broadcasted, arg2_broadcasted);
}
-static inline std::vector<int64_t> broadcast_batch_size(const Tensor& t1, const Tensor& t2, int64_t n_batch_dims) {
- IntArrayRef t1_batch_sizes(t1.sizes().data(), n_batch_dims);
- IntArrayRef t2_batch_sizes(t2.sizes().data(), n_batch_dims);
- auto broadcasted_batch_sizes = infer_size(t1_batch_sizes, t2_batch_sizes);
- return broadcasted_batch_sizes;
-}
-
// Return a permutation with the given axes moved to the end.
static inline Tensor _move_to_end(const Tensor& self, IntArrayRef axes) {
const std::vector<int64_t> a = axes.vec();
diff --git a/aten/src/ATen/native/cuda/BatchLinearAlgebra.cu b/aten/src/ATen/native/cuda/BatchLinearAlgebra.cu
index 66ee412..fd4b37c 100644
--- a/aten/src/ATen/native/cuda/BatchLinearAlgebra.cu
+++ b/aten/src/ATen/native/cuda/BatchLinearAlgebra.cu
@@ -160,12 +160,6 @@
scalar_t** dB_array, magma_int_t lddb, magma_int_t& info,
magma_int_t batchsize, const MAGMAQueue& magma_queue);
-template<class scalar_t>
-void magmaGels(
- magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nrhs,
- scalar_t* dA, magma_int_t ldda, scalar_t* dB, magma_int_t lddb,
- scalar_t* hwork, magma_int_t lwork, magma_int_t* info);
-
template<>
void magmaSolve<double>(
magma_int_t n, magma_int_t nrhs, double* dA, magma_int_t ldda,
@@ -1269,56 +1263,6 @@
info = magma_cgetrs_batched(MagmaNoTrans, n, nrhs, reinterpret_cast<magmaFloatComplex**>(dA_array), ldda, dipiv_array, reinterpret_cast<magmaFloatComplex**>(dB_array), lddb, batchsize, magma_queue.get_queue());
AT_CUDA_CHECK(cudaGetLastError());
}
-
-template<>
-void magmaGels<float>(
- magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nrhs,
- float* dA, magma_int_t ldda, float* dB, magma_int_t lddb,
- float* hwork, magma_int_t lwork, magma_int_t* info) {
- MagmaStreamSyncGuard guard;
- magma_sgels_gpu(trans, m, n, nrhs,
- dA, ldda, dB, lddb,
- hwork, lwork, info);
- AT_CUDA_CHECK(cudaGetLastError());
-}
-
-template<>
-void magmaGels<double>(
- magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nrhs,
- double* dA, magma_int_t ldda, double* dB, magma_int_t lddb,
- double* hwork, magma_int_t lwork, magma_int_t* info) {
- MagmaStreamSyncGuard guard;
- magma_dgels_gpu(trans, m, n, nrhs,
- dA, ldda, dB, lddb,
- hwork, lwork, info);
- AT_CUDA_CHECK(cudaGetLastError());
-}
-
-template<>
-void magmaGels<c10::complex<float>>(
- magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nrhs,
- c10::complex<float>* dA, magma_int_t ldda, c10::complex<float>* dB, magma_int_t lddb,
- c10::complex<float>* hwork, magma_int_t lwork, magma_int_t* info) {
- MagmaStreamSyncGuard guard;
- magma_cgels_gpu(trans, m, n, nrhs,
- reinterpret_cast<magmaFloatComplex*>(dA), ldda,
- reinterpret_cast<magmaFloatComplex*>(dB), lddb,
- reinterpret_cast<magmaFloatComplex*>(hwork), lwork, info);
- AT_CUDA_CHECK(cudaGetLastError());
-}
-
-template<>
-void magmaGels<c10::complex<double>>(
- magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t nrhs,
- c10::complex<double>* dA, magma_int_t ldda, c10::complex<double>* dB, magma_int_t lddb,
- c10::complex<double>* hwork, magma_int_t lwork, magma_int_t* info) {
- MagmaStreamSyncGuard guard;
- magma_zgels_gpu(trans, m, n, nrhs,
- reinterpret_cast<magmaDoubleComplex*>(dA), ldda,
- reinterpret_cast<magmaDoubleComplex*>(dB), lddb,
- reinterpret_cast<magmaDoubleComplex*>(hwork), lwork, info);
- AT_CUDA_CHECK(cudaGetLastError());
-}
#endif
#define ALLOCATE_ARRAY(name, type, size) \
@@ -2555,44 +2499,6 @@
TORCH_CHECK(info == 0, "MAGMA lu_solve : invalid argument: ", -info);
return self_working_copy;
}
-// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lstsq ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-std::tuple<Tensor, Tensor, Tensor> _lstsq_helper_cuda(
- const Tensor& a, const Tensor& b, double cond, c10::optional<std::string> driver_name) {
-#ifndef USE_MAGMA
-AT_ERROR("torch.linalg.lstsq: MAGMA library not found in "
- "compilation. Please rebuild with MAGMA.");
-#else
- AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES(a.scalar_type(), "torch.linalg.lstsq_cuda", [&] {
- auto trans = MagmaNoTrans;
- auto m = magma_int_cast(a.size(-2), "m");
- auto n = magma_int_cast(a.size(-1), "n");
- auto nrhs = magma_int_cast(b.size(-1), "nrhs");
- auto ldda = std::max<magma_int_t>(1, m);
- auto lddb = std::max<magma_int_t>(1, std::max(m, n));
- auto nb = magmaGeqrfOptimalBlocksize<scalar_t>(m, n);
- auto lwork = (m - n + nb) * (nrhs + nb) + nrhs * nb;
- Tensor hwork = at::empty({static_cast<int64_t>(lwork)}, a.scalar_type());
- auto* hwork_ptr = hwork.data_ptr<scalar_t>();
- magma_int_t info;
-
- batch_iterator_with_broadcasting<scalar_t>(a, b,
- [&](scalar_t* a_working_ptr, scalar_t* b_working_ptr,
- int64_t a_linear_batch_idx) {
- magmaGels<scalar_t>(trans, m, n, nrhs,
- a_working_ptr, ldda, b_working_ptr, lddb,
- hwork_ptr, lwork, &info);
- singleCheckErrors(static_cast<int64_t>(info), "torch.linalg.lstsq_cuda");
- }
- );
- });
-
- Tensor rank, singular_values;
- return std::make_tuple(b, rank, singular_values);
-#endif
-}
-// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
}} // namespace at::native
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
index a4ec077..7ee6969 100644
--- a/aten/src/ATen/native/native_functions.yaml
+++ b/aten/src/ATen/native/native_functions.yaml
@@ -9015,18 +9015,6 @@
dispatch:
DefaultBackend: det
-- func: linalg_lstsq(Tensor self, Tensor b, float? cond=None, *, str? driver=None) -> (Tensor solution, Tensor residuals, Tensor rank, Tensor singular_values)
- python_module: linalg
- variants: function
- dispatch:
- DefaultBackend: linalg_lstsq
-
-- func: _lstsq_helper(Tensor a, Tensor b, float cond, str? driver_name) -> (Tensor, Tensor, Tensor)
- variants: function
- dispatch:
- CPU: _lstsq_helper_cpu
- CUDA: _lstsq_helper_cuda
-
- func: linalg_slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet)
python_module: linalg
variants: function
diff --git a/docs/source/linalg.rst b/docs/source/linalg.rst
index 697fd2e..668632d 100644
--- a/docs/source/linalg.rst
+++ b/docs/source/linalg.rst
@@ -31,4 +31,3 @@
.. autofunction:: tensorsolve
.. autofunction:: inv
.. autofunction:: qr
-.. autofunction:: lstsq
diff --git a/test/test_linalg.py b/test/test_linalg.py
index 2543bd8..1671be3 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -119,250 +119,6 @@
@skipCUDAIfNoMagma
@skipCPUIfNoLapack
- @dtypes(torch.float, torch.double, torch.cfloat, torch.cdouble)
- def test_linalg_lstsq(self, device, dtype):
- from torch.testing._internal.common_utils import random_well_conditioned_matrix
- if self.device_type == 'cpu':
- drivers = ('gels', 'gelsy', 'gelsd', 'gelss', None)
- else:
- drivers = ('gels', None)
-
- def check_correctness(a, b, sol):
- sol2 = a.pinverse() @ b
- self.assertEqual(sol, sol2, atol=1e-5, rtol=1e-5)
-
- def check_correctness_ref(a, b, res, ref):
- def apply_if_not_empty(t, f):
- if t.numel():
- return f(t)
- else:
- return t
-
- def select_if_not_empty(t, i):
- selected = apply_if_not_empty(t, lambda x: x.select(0, i).numpy())
- return selected
-
- m = a.size(-2)
- n = a.size(-1)
- nrhs = b.size(-1)
- batch_size = int(np.prod(a.shape[:-2]))
- if batch_size == 0:
- batch_size = 1
- a_3d = a.view(batch_size, m, n)
- b_3d = b.view(batch_size, m, nrhs)
-
- solution_3d = res.solution.view(batch_size, n, nrhs)
- residuals_2d = apply_if_not_empty(res.residuals, lambda t: t.view(-1, nrhs))
- rank_1d = apply_if_not_empty(res.rank, lambda t: t.view(-1))
- singular_values_2d = res.singular_values.view(batch_size, res.singular_values.shape[-1])
-
- for i in range(batch_size):
- sol, residuals, rank, singular_values = ref(
- a_3d.select(0, i).numpy(),
- b_3d.select(0, i).numpy()
- )
- # Singular values are None when lapack_driver='gelsy' in SciPy
- if singular_values is None:
- singular_values = []
- self.assertEqual(sol, solution_3d.select(0, i), atol=1e-5, rtol=1e-5)
- self.assertEqual(residuals, select_if_not_empty(residuals_2d, i), atol=1e-5, rtol=1e-5)
- self.assertEqual(rank, select_if_not_empty(rank_1d, i), atol=1e-5, rtol=1e-5)
- self.assertEqual(singular_values, singular_values_2d.select(0, i), atol=1e-5, rtol=1e-5)
-
- def check_correctness_scipy(a, b, res, driver, cond):
- if TEST_SCIPY and driver not in (None, 'gels'):
- import scipy.linalg
-
- def scipy_ref(a, b):
- return scipy.linalg.lstsq(a, b, lapack_driver=driver, cond=cond)
- check_correctness_ref(a, b, res, scipy_ref)
-
- def check_correctness_numpy(a, b, res, driver, cond):
- if driver in ('gelsd', 'gelss'):
- import numpy.linalg
-
- def numpy_ref(a, b):
- return numpy.linalg.lstsq(a, b, rcond=-1 if cond is None else cond)
- check_correctness_ref(a, b, res, numpy_ref)
-
- def check_ranks(a, ranks, cond=1e-7):
- ranks2 = torch.matrix_rank(a, tol=cond)
- self.assertEqual(ranks, ranks2)
-
- def check_singular_values(a, sv):
- sv2 = a.svd()[1]
- self.assertEqual(sv, sv2)
-
- ms = [2 ** i for i in range(5)]
- m_ge_n_sizes = [(m, m // 2) for m in ms] + [(m, m) for m in ms]
- # cases m < n are only supported on CPU
- m_l_n_sizes = [(m // 2, m) for m in ms]
- matrix_sizes = m_ge_n_sizes + (m_l_n_sizes if device == 'cpu' else [])
- batches = [(), (2,), (2, 2), (2, 2, 2)]
- # we generate matrices with singular values sampled from a normal distribution,
- # that is why we use `cond=1.0`, the mean to cut roughly half of all
- # the singular values and compare whether torch.linalg.lstsq agrees with
- # SciPy and NumPy.
- cond = (None, 1.0)
-
- for batch, matrix_size, driver, cond in itertools.product(batches, matrix_sizes, drivers, cond):
- shape = batch + matrix_size
- a = random_well_conditioned_matrix(*shape, dtype=dtype, device=device)
- b = torch.rand(*shape, dtype=dtype, device=device)
-
- cond = 1e-7
- m = a.size(-2)
- n = a.size(-1)
- res = torch.linalg.lstsq(a, b, cond=cond, driver=driver)
- sol = res.solution.narrow(-2, 0, n)
-
- check_correctness_scipy(a, b, res, driver, cond)
- check_correctness_numpy(a, b, res, driver, cond)
-
- check_correctness(a, b, sol)
- if self.device_type == 'cpu' and driver != 'gels':
- # rank-revealing drivers are only available for the CPU.
- # `gels` is not rank-revealing and is only for full
- # rank inputs.
- check_ranks(a, res.rank, cond)
- if self.device_type == 'cpu' and driver in ('gelsd', 'gelss'):
- # SVD-based drivers are only available for the CPU.
- # These are only `gelsd` and `gelss`.
- check_singular_values(a, res.singular_values)
-
- @skipCUDAIfNoMagma
- @skipCPUIfNoLapack
- @dtypes(torch.float, torch.double, torch.cfloat, torch.cdouble)
- def test_linalg_lstsq_batch_broadcasting(self, device, dtype):
- from torch.testing._internal.common_utils import random_well_conditioned_matrix
-
- def check_correctness(a, b):
- sol = torch.linalg.lstsq(a, b).solution
- sol2 = a.pinverse() @ b
- self.assertEqual(sol, sol2, rtol=1e-5, atol=1e-5)
-
- ms = [2 ** i for i in range(5)]
- batches = [(), (0,), (2,), (2, 2), (2, 2, 2)]
- # the case when a single matrix is batch-broadcasted over the rhs
- for m, batch in itertools.product(ms, batches):
- a = random_well_conditioned_matrix(m, m, dtype=dtype, device=device).view(*([1] * len(batch)), m, m)
- b = torch.rand(*(batch + (m, m)), dtype=dtype, device=device)
- check_correctness(a, b)
-
- # cases with broadcastable shapes
- for m in ms:
- a = random_well_conditioned_matrix(1, 3, 1, 3, m, m, dtype=dtype, device=device)
- b = torch.rand(3, 1, 3, 1, m, m // 2, dtype=dtype, device=device)
- check_correctness(a, b)
-
- # rhs are vectors, not matrices in this test
- b = torch.rand(3, 1, 3, 1, m, dtype=dtype, device=device)
- # unsqueeze for b because `check_correctness` checks against
- # a.pinverse() @ b, which requires b to be a matrix
- check_correctness(a, b.unsqueeze(-1))
-
- a = random_well_conditioned_matrix(3, 1, 3, 1, m, m, dtype=dtype, device=device)
- b = torch.rand(1, 3, 1, 3, m, m // 2, dtype=dtype, device=device)
- check_correctness(a, b)
-
- # rhs are vectors, not matrices in this test
- b = torch.rand(1, 3, 1, 3, m, dtype=dtype, device=device)
- check_correctness(a, b.unsqueeze(-1))
-
- @skipCPUIfNoLapack
- @skipCUDAIfNoMagma
- @dtypes(torch.float, torch.double, torch.cfloat, torch.cdouble)
- def test_linalg_lstsq_input_checks(self, device, dtype):
- # check empty inputs
- # empty batches
- a = torch.rand(0, 0, 3, 3, dtype=dtype, device=device)
- b = torch.rand(0, 0, 3, 2, dtype=dtype, device=device)
- self.assertEqual(
- torch.linalg.lstsq(a, b)[0],
- torch.zeros(0, 0, 3, 2, dtype=dtype, device=device)
- )
- # empty a and b
- a = torch.rand(2, 2, 0, 0, dtype=dtype, device=device)
- b = torch.rand(2, 2, 0, 0, dtype=dtype, device=device)
- self.assertEqual(
- torch.linalg.lstsq(a, b)[0],
- torch.zeros(2, 2, 0, 0, dtype=dtype, device=device)
- )
- # empty a and b
- a = torch.rand(2, 2, 3, 0, dtype=dtype, device=device)
- b = torch.rand(2, 2, 3, 0, dtype=dtype, device=device)
- self.assertEqual(
- torch.linalg.lstsq(a, b)[0],
- torch.zeros(2, 2, 0, 0, dtype=dtype, device=device)
- )
- # empty a but not b
- a = torch.rand(2, 2, 3, 0, dtype=dtype, device=device)
- b = torch.rand(2, 2, 3, 2, dtype=dtype, device=device)
- self.assertEqual(
- torch.linalg.lstsq(a, b)[0],
- torch.zeros(2, 2, 0, 2, dtype=dtype, device=device)
- )
-
- # empty a and b
- if torch.device(device).type == 'cpu':
- # only CPU since CUDA does not support overdetermined systems
- a = torch.rand(2, 2, 0, 3, dtype=dtype, device=device)
- b = torch.rand(2, 2, 0, 3, dtype=dtype, device=device)
- self.assertEqual(
- torch.linalg.lstsq(a, b)[0],
- torch.zeros(2, 2, 3, 3, dtype=dtype, device=device)
- )
-
- a = torch.rand(2, 3, dtype=dtype, device=device)
- b = torch.rand(3, dtype=dtype, device=device)
-
- with self.assertRaisesRegex(RuntimeError, 'input `self` Tensor should be at least 2D'):
- torch.linalg.lstsq(b, b)
-
- with self.assertRaisesRegex(RuntimeError, 'input `b` Tensor should be at least 1D'):
- torch.linalg.lstsq(a, torch.tensor(1, dtype=dtype, device=device))
-
- with self.assertRaisesRegex(RuntimeError, r'self.size\(-2\) should match b.size\(-1\)'):
- torch.linalg.lstsq(a, b)
-
- with self.assertRaisesRegex(RuntimeError, r'self.size\(-2\) should match b.size\(-2\)'):
- torch.linalg.lstsq(a, b.unsqueeze(-1))
-
- def complement_device(device):
- if device == 'cpu' and torch.cuda.is_available():
- return 'cuda'
- else:
- return 'cpu'
-
- a = torch.rand(2, 2, 2, 2, dtype=dtype, device=device)
- b = torch.rand(2, 2, 2, dtype=dtype, device=complement_device(device))
- if a.device != b.device:
- with self.assertRaisesRegex(RuntimeError, 'input tensors should be on the same device'):
- torch.linalg.lstsq(a, b)
-
- b = (torch.rand(2, 2, 2, dtype=dtype, device=device) * 100).long()
- with self.assertRaisesRegex(RuntimeError, 'input tensors should be of the same dtype'):
- torch.linalg.lstsq(a, b)
-
- a = torch.rand(2, 2, 2, 2, dtype=dtype, device=device)
- b = torch.rand(2, 2, 2, dtype=dtype, device=device)
-
- if device != 'cpu':
- with self.assertRaisesRegex(RuntimeError, '`driver` other than `gels` is not supported on CUDA'):
- torch.linalg.lstsq(a, b, driver='fictitious_driver')
- # if on cpu
- else:
- with self.assertRaisesRegex(RuntimeError, r'parameter `driver` should be one of \(gels, gelsy, gelsd, gelss\)'):
- torch.linalg.lstsq(a, b, driver='fictitious_driver')
-
- if device != 'cpu':
- a = torch.rand(2, 3, dtype=dtype, device=device)
- b = torch.rand(2, 1, dtype=dtype, device=device)
- with self.assertRaisesRegex(RuntimeError, r'only overdetermined systems \(m >= n\) are allowed on CUDA'):
- torch.linalg.lstsq(a, b)
-
- @skipCUDAIfNoMagma
- @skipCPUIfNoLapack
@dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
def test_cholesky(self, device, dtype):
from torch.testing._internal.common_utils import random_hermitian_pd_matrix
diff --git a/test/test_namedtuple_return_api.py b/test/test_namedtuple_return_api.py
index 034a5e7..89b9576 100644
--- a/test/test_namedtuple_return_api.py
+++ b/test/test_namedtuple_return_api.py
@@ -16,7 +16,6 @@
'triangular_solve', 'cummax', 'cummin', 'linalg_eigh', "_unpack_dual", 'linalg_qr',
'_svd_helper', 'linalg_svd', 'linalg_slogdet', 'fake_quantize_per_tensor_affine_cachemask',
'fake_quantize_per_channel_affine_cachemask',
- 'linalg_lstsq'
}
@@ -78,7 +77,6 @@
input=(per_channel_scale, per_channel_zp, 1, 0, 255),
names=('output', 'mask',), hasout=False),
op(operators=['_unpack_dual'], input=(0,), names=('primal', 'tangent'), hasout=False),
- op(operators=['linalg_lstsq'], input=(a,), names=('solution', 'residuals', 'rank', 'singular_values'), hasout=False),
]
def get_func(f):
diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml
index ba364ed..7076abf 100644
--- a/tools/autograd/derivatives.yaml
+++ b/tools/autograd/derivatives.yaml
@@ -666,11 +666,6 @@
self: not_implemented("lstsq")
A: not_implemented("lstsq")
-- name: linalg_lstsq(Tensor self, Tensor b, float? cond=None, *, str? driver=None) -> (Tensor solution, Tensor residuals, Tensor rank, Tensor singular_values)
- self: not_implemented("linalg_lstsq")
- b: not_implemented("linalg_lstsq")
- output_differentiability: [True, True]
-
- name: lt_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
self: zeros_like(self)
diff --git a/torch/csrc/api/include/torch/linalg.h b/torch/csrc/api/include/torch/linalg.h
index 594d3c6..26d9e66 100644
--- a/torch/csrc/api/include/torch/linalg.h
+++ b/torch/csrc/api/include/torch/linalg.h
@@ -44,10 +44,6 @@
return torch::linalg_eigvalsh_out(result, self, uplo);
}
-inline std::tuple<Tensor, Tensor, Tensor, Tensor> lstsq(const Tensor& self, const Tensor& b, c10::optional<double> cond, c10::optional<std::string> driver) {
- return torch::linalg_lstsq(self, b, cond, driver);
-}
-
inline Tensor norm(const Tensor& self, optional<Scalar> opt_ord, optional<IntArrayRef> opt_dim, bool keepdim, optional<ScalarType> opt_dtype) {
return torch::linalg_norm(self, opt_ord, opt_dim, keepdim, opt_dtype);
}
@@ -172,10 +168,6 @@
return detail::eigvalsh_out(result, self, uplo);
}
-inline std::tuple<Tensor, Tensor, Tensor, Tensor> lstsq(const Tensor& self, const Tensor& b, c10::optional<double> cond, c10::optional<std::string> driver) {
- return detail::lstsq(self, b, cond, driver);
-}
-
inline Tensor linalg_norm(const Tensor& self, optional<Scalar> opt_ord, optional<IntArrayRef> opt_dim, bool keepdim, optional<ScalarType> opt_dtype) {
return detail::norm(self, opt_ord, opt_dim, keepdim, opt_dtype);
}
diff --git a/torch/csrc/autograd/utils/wrap_outputs.h b/torch/csrc/autograd/utils/wrap_outputs.h
index e532109..538181a 100644
--- a/torch/csrc/autograd/utils/wrap_outputs.h
+++ b/torch/csrc/autograd/utils/wrap_outputs.h
@@ -109,16 +109,6 @@
return r.release();
}
-inline PyObject* wrap(PyTypeObject *type, std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor> tensors) {
- auto r = THPObjectPtr{PyStructSequence_New(type)};
- if (!r) throw python_error();
- PyStructSequence_SET_ITEM(r.get(), 0, wrap(std::get<0>(tensors)));
- PyStructSequence_SET_ITEM(r.get(), 1, wrap(std::get<1>(tensors)));
- PyStructSequence_SET_ITEM(r.get(), 2, wrap(std::get<2>(tensors)));
- PyStructSequence_SET_ITEM(r.get(), 3, wrap(std::get<3>(tensors)));
- return r.release();
-}
-
inline PyObject* wrap(std::tuple<at::Tensor, at::Tensor, at::Tensor, int64_t> tensors) {
auto r = THPObjectPtr{PyTuple_New(4)};
if (!r) throw python_error();
diff --git a/torch/linalg/__init__.py b/torch/linalg/__init__.py
index 0b44cc8..c7a0c07 100644
--- a/torch/linalg/__init__.py
+++ b/torch/linalg/__init__.py
@@ -367,99 +367,6 @@
[-3.1113, 2.7381]], dtype=torch.float64)
""")
-lstsq = _add_docstr(_linalg.linalg_lstsq, r"""
-torch.linalg.lstsq(input, b, cond=None, *, driver=None)
- -> (Tensor solution, Tensor residuals, Tensor rank, Tensor singular_values)
-
-Computes the least squares solution to the system with a batch of matrices :math:`a` (represented by :attr:`input`)
-and a batch of vectors or matrices :math:`b` such that
-
-.. math::
- x = \text{argmin}_x \|ax - b\|_F,
-
-where :math:`a` is of size :math:`(..., m, n)` and :math:`b` is of size :math:`(..., m, k)` or
-:math:`(..., m)`. The batch dimensions of :math:`a` and :math:`b` have to be broadcastable.
-
-The returned solution :math:`solution` is of shape :math:`(..., n, k)` if :math:`b` is of shape :math:`(..., m, k)`,
-and is of shape :math:`(..., n, 1)` if :math:`b` is of shape :math:`(..., m)`.
-The batch sizes of :math:`x` is the broadcasted shape of the batch dimensions of :math:`a` and :math:`b`.
-
-.. note::
- The case when :math:`m < n` is not supported on CUDA.
-
-Args:
- input (Tensor): the batch of left-hand side matrices :math:`a`
- of shape :math:`(..., m, n)` with :math:`m > 0, n > 0`
- b (Tensor): the batch of righ-hand side vectors or matrices :math:`b`
- of shape :math:`(..., m)` or :math:`(..., m, k)` with :math:`m > 0, k > 0`
- cond (float, optional): used to determine the effective rank of :math:`a`
- for the rank-revealing drivers (see :attr:`driver`).
- Singular values :math:`s[i] \le cond * s[0]` are treated as zero.
- If :attr:`cond` is ``None`` or is smaller than zero,
- the machine precision based on :attr:`input`'s dtype is used.
- Default: ``None``
- driver (str, optional): the name of the LAPACK/MAGMA driver that is used
- to compute the solution.
- For CPU inputs the valid values are
- (``'gels'``, ``'gelsy'``, ``'gelsd``, ``'gelss'``, ``None``).
- For CUDA inputs the valid values are (``'gels'``, ``None``).
- If ``driver == None``, ``'gelsy'`` is used for CPU inputs and ``'gels'`` for GPU inputs.
- Default: ``None``
-
-.. note::
- Driver ``'gels'`` assumes only full-rank inputs, i.e. ``torch.matrix_rank(a) == min(m, n)``.
- Drivers ``'gelsy'``, ``'gelsd'``, ``'gelss'`` are rank-revealing and hence handle rank-deficient inputs.
- ``'gelsy'`` uses QR factorization with column pivoting, ``'gelsd'`` and ``'gelss'`` use SVD.
- ``'gelsy'`` is the fastest among the rank-revealing algorithms that also handles rank-deficient inputs.
-
-.. warning::
- The default value for :attr:`cond` is subject to a potential change.
- It is therefore recommended to use some fixed value to avoid potential
- issues upon the library update.
-
-
-Returns:
- (Tensor, Tensor, Tensor, Tensor): a namedtuple (solution, residuals, rank, singular_values) containing:
- - **solution** (*Tensor*): the least squares solution
- - **residuals** (*Tensor*): if :math:`m > n` then for full rank matrices in :attr:`input` the tensor encodes
- the squared residuals of the solutions, that is :math:`||\text{input} @ x - b||_F^2`.
- If :math:`m \le n`, an empty tensor is returned instead.
- - **rank** (*Tensor*): the tensor of ranks of the matrix :attr:`input` with shape ``input.shape[:-2]``.
- Only computed if :attr:`driver` is one of (``'gelsy'``, ``'gelsd'``, ``'gelss'``),
- an empty tensor is returned otherwise.
- - **singular_values** (*Tensor*): the tensor of singular values
- of the matrix :attr:`input` with shape ``input.shape[:-2] + (min(m, n),)``.
- Only computed if :attr:`driver` is one of (``'gelsd'``, ``'gelss'``),
- an empty tensor is returend otherwise.
-
-Example::
-
- >>> a = torch.tensor([[10, 2, 3], [3, 10, 5], [5, 6, 12]], dtype=torch.float)
- >>> b = torch.tensor([[[2, 5, 1], [3, 2, 1], [5, 1, 9]],
- [[4, 2, 9], [2, 0, 3], [2, 5, 3]]], dtype=torch.float)
- >>> x = torch.linalg.lstsq(a.view(1, 3, 3), b).solution
- >>> x
- tensor([[[ 0.0793, 0.5345, -0.1228],
- [ 0.1125, 0.1458, -0.3517],
- [ 0.3274, -0.2123, 0.9770]],
-
- [[ 0.3939, 0.1023, 0.9361],
- [ 0.1074, -0.2903, 0.1189],
- [-0.0512, 0.5192, -0.1995]]])
-
- >>> (x - a.pinverse() @ b).abs().max()
- tensor(2.0862e-07)
-
- >>> aa = a.clone().select(1, -1).zero_()
- >>> xx, rank, _ = torch.linalg.lstsq(aa.view(1, 3, 3), b)
- >>> rank
- tensor([2])
-
- >>> sv = torch.linalg.lstsq(a.view(1, 3, 3), b, driver='gelsd').singular_values
- >>> (sv - a.svd()[1]).max().abs()
- tensor(5.7220e-06)
-""")
-
matrix_rank = _add_docstr(_linalg.linalg_matrix_rank, r"""
matrix_rank(input, tol=None, hermitian=False, *, out=None) -> Tensor
diff --git a/torch/overrides.py b/torch/overrides.py
index 885ef15..46c0ed6 100644
--- a/torch/overrides.py
+++ b/torch/overrides.py
@@ -1028,7 +1028,6 @@
Tensor.view: lambda self, shape: -1,
Tensor.view_as: lambda self, other: -1,
Tensor.zero_: lambda self: -1,
- torch.linalg.lstsq: lambda self, b, cond=None, driver=None: -1
}
ret2 = {}
diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py
index 37e17ba..156fae0 100644
--- a/torch/testing/_internal/common_methods_invocations.py
+++ b/torch/testing/_internal/common_methods_invocations.py
@@ -1004,17 +1004,6 @@
out.append(SampleInput(a, kwargs=dict(upper=True)))
return out
-def sample_inputs_linalg_lstsq(op_info, device, dtype, requires_grad=False):
- from torch.testing._internal.common_utils import random_well_conditioned_matrix
- out = []
- for batch in ((), (3,), (3, 3)):
- shape = batch + (3, 3)
- # NOTE: inputs are not marked with `requires_grad` since
- # linalg_lstsq is not differentiable
- a = random_well_conditioned_matrix(*shape, dtype=dtype, device=device)
- b = make_tensor(shape, device, dtype, low=None, high=None)
- out.append(SampleInput((a, b)))
- return out
def sample_inputs_linalg_pinv(op_info, device, dtype, requires_grad=False):
"""
@@ -2356,21 +2345,6 @@
sample_inputs_func=sample_inputs_linalg_solve,
check_batched_gradgrad=False,
decorators=[skipCUDAIfNoMagma, skipCUDAIfRocm, skipCPUIfNoLapack]),
- OpInfo('linalg.lstsq',
- aten_name='linalg_lstsq',
- op=torch.linalg.lstsq,
- dtypes=floating_and_complex_types(),
- test_inplace_grad=False,
- supports_tensor_out=False,
- sample_inputs_func=sample_inputs_linalg_lstsq,
- check_batched_grad=False,
- check_batched_gradgrad=False,
- decorators=[skipCUDAIfNoMagma, skipCPUIfNoLapack],
- skips=(
- # skip because `linalg_lstsq` is not differentiable
- SkipInfo('TestGradients', 'test_fn_grad'),
- SkipInfo('TestCommon', 'test_variant_consistency_jit'),
- )),
OpInfo('linalg.pinv',
aten_name='linalg_pinv',
op=torch.linalg.pinv,
diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py
index 3286087..f1c217b 100644
--- a/torch/testing/_internal/common_utils.py
+++ b/torch/testing/_internal/common_utils.py
@@ -1598,27 +1598,6 @@
s[i] = 1
return u.mm(torch.diag(s).to(dtype)).mm(v.transpose(0, 1))
-def random_well_conditioned_matrix(*shape, dtype, device, mean=1.0, sigma=0.001):
- """
- Returns a random rectangular matrix (batch of matrices)
- with singular values sampled from a Gaussian with
- mean `mean` and standard deviation `sigma`.
- The smaller the `sigma`, the better conditioned
- the output matrix is.
- """
- primitive_dtype = {
- torch.float: torch.float,
- torch.double: torch.double,
- torch.cfloat: torch.float,
- torch.cdouble: torch.double
- }
- x = torch.rand(shape, dtype=dtype, device=device)
- m = x.size(-2)
- n = x.size(-1)
- u, _, v = x.svd()
- s = (torch.randn(*(shape[:-2] + (min(m, n),)), dtype=primitive_dtype[dtype], device=device) * sigma + mean) \
- .sort(-1, descending=True).values.to(dtype)
- return (u * s.unsqueeze(-2)) @ v.transpose(-2, -1).conj()
def random_symmetric_matrix(l, *batches, **kwargs):
dtype = kwargs.get('dtype', torch.double)