blob: 33f9eb77e6d17f420cf8734e245d587be3fbd37c [file] [log] [blame]
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/Tensor.cpp"
#else
#ifdef WITH_NUMPY
#ifdef TH_REAL_IS_DOUBLE
#define NUMPY_TYPE_ENUM NPY_DOUBLE
#endif
#ifdef TH_REAL_IS_FLOAT
#define NUMPY_TYPE_ENUM NPY_FLOAT
#endif
#ifdef TH_REAL_IS_LONG
#define NUMPY_TYPE_ENUM NPY_INT64
#endif
#ifdef TH_REAL_IS_INT
#define NUMPY_TYPE_ENUM NPY_INT32
#endif
#ifdef TH_REAL_IS_SHORT
#define NUMPY_TYPE_ENUM NPY_INT16
#endif
#ifdef TH_REAL_IS_BYTE
#define NUMPY_TYPE_ENUM NPY_UINT8
#endif
#endif
PyObject *THPTensorClass = NULL;
THPCopyList THTensor_(copy_functions);
PyObject * THPTensor_(NewEmpty)()
{
return THPTensor_(New)(THTensor_(new)(LIBRARY_STATE_NOARGS));
}
PyObject * THPTensor_(New)(THTensor *tensor)
{
THTensorPtr ptr(tensor);
if (!tensor->storage) {
tensor->storage = THStorage_(new)(LIBRARY_STATE_NOARGS);
}
PyTypeObject *type = (PyTypeObject *)THPTensorClass;
PyObject *obj = type->tp_alloc(type, 0);
if (obj) {
((THPTensor *)obj)->cdata = ptr.release();
}
return obj;
}
static THTensor* THPTensor_(_new)()
{
THTensorPtr tensor(THTensor_(new)(LIBRARY_STATE_NOARGS));
if (!tensor->storage) {
tensor->storage = THStorage_(new)(LIBRARY_STATE_NOARGS);
}
return tensor.release();
}
static THTensor* THPTensor_(_newWithSize)(THLongStorage *size)
{
THTensorPtr tensor(THTensor_(newWithSize)(LIBRARY_STATE size, NULL));
// Ensure that PyTorch's "storage is not NULL" invariant is upheld
// See Note [Storage is not NULL]
if (!tensor->storage) {
tensor->storage = THStorage_(new)(LIBRARY_STATE_NOARGS);
}
return tensor.release();
}
static void THPTensor_(dealloc)(THPTensor* self)
{
THTensor_(free)(LIBRARY_STATE self->cdata);
Py_TYPE(self)->tp_free((PyObject*)self);
}
static std::string THPTensor_(indicesToString)(std::vector<size_t> &indices,
size_t depth)
{
std::string index = "(";
for (size_t i = 0; i <= depth; ++i) {
index += std::to_string(indices[i]);
index += ", ";
}
index.erase(index.length()-2); // Remove trailing ", "
index += ")";
return index;
}
static void THPTensor_(setInconsistentDepthError)(std::vector<size_t> &sizes,
std::vector<size_t> &indices, size_t depth, size_t length)
{
std::string error = "inconsistent sequence length at index ";
error += THPTensor_(indicesToString)(indices, depth);
error += " - expected ";
error += std::to_string(sizes[depth]);
error += " but got ";
error += std::to_string(length);
THPUtils_setError(error.c_str());
}
#ifdef NUMPY_TYPE_ENUM
THTensor* THPTensor_(fromNumpy)(PyObject *numpy_array) {
PyArrayObject *array = (PyArrayObject*)numpy_array;
// Numpy and Torch disagree on empty tensors. In Torch, an empty
// tensor is a tensor with zero dimensions. In Numpy, an empty tensor
// keeps its shape, but has 0 as the size of one of the dimensions.
// So we'll convert all Numpy tensors of 0 elements to empty Torch tensors.
if (PyArray_SIZE(array) != 0) {
auto ndim = PyArray_NDIM(array);
size_t storage_size = 1;
THLongStoragePtr sizes(THLongStorage_newWithSize(ndim));
long *sizes_data = sizes->data;
for (int i = 0; i < ndim; ++i) {
sizes_data[i] = PyArray_DIM(array, i);
}
THLongStoragePtr strides(THLongStorage_newWithSize(ndim));
long *strides_data = strides->data;
for (int i = 0; i < ndim; ++i) {
// numpy uses bytes, torch uses elements
// we have to cast sizeof to long, because otherwise stride gets
// promoted to size_t, and is UB for negative values
strides_data[i] = PyArray_STRIDE(array, i) / ((long)sizeof(real));
if (strides_data[i] < 0) {
THPUtils_setError("some of the strides of a given numpy array are "
"negative. This is currently not supported, but will be added in "
"future releases.");
return NULL;
}
// XXX: this won't work for negative strides
storage_size += strides_data[i] * (sizes_data[i] - 1);
}
THStoragePtr storage(THStorage_(newWithDataAndAllocator)(
(real*)PyArray_DATA(array),
storage_size,
// See Note [Numpy memory management]
&THNumpyArrayAllocator,
new NumpyArrayAllocator(numpy_array)));
THTensor *result = THTensor_(newWithStorage)(storage, 0, sizes, strides);
return result;
} else {
THPUtils_setError("the given numpy array has zero-sized dimensions. "
"Zero-sized dimensions are not supported in PyTorch");
return NULL;
}
}
#endif
static PyObject * THPTensor_(pynew)(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
HANDLE_TH_ERRORS
Py_ssize_t num_args = args ? PyTuple_Size(args) : 0;
THPTensorPtr self((THPTensor *)type->tp_alloc(type, 0));
if (!self) {
return NULL;
}
self->cdata = NULL;
#ifdef THC_GENERIC_FILE
THCPAutoGPU gpu_guard;
#endif
// Internally we allow constructing with a keyword only argument cdata
if (kwargs != NULL) {
Py_ssize_t num_kwargs = PyDict_Size(kwargs);
#ifdef THC_GENERIC_FILE
PyObject *device_id = PyDict_GetItemString(kwargs, "device");
if (device_id == Py_None) {
num_kwargs--;
} else if (device_id) {
THPUtils_assert(THPUtils_checkLong(device_id), "device argument "
" has to be an int, but got %s", THPUtils_typename(device_id));
gpu_guard.setDevice(THPUtils_unpackLong(device_id));
// simulate pop() and pretend this key was never there
num_kwargs--;
}
#endif
if (num_args == 0) {
PyObject *cdata_ptr = PyDict_GetItemString(kwargs, "cdata");
if (num_kwargs == 1 && cdata_ptr && THPUtils_checkLong(cdata_ptr)) {
THTensor *ptr = (THTensor*)PyLong_AsVoidPtr(cdata_ptr);
self->cdata = ptr;
return (PyObject*)self.release();
}
}
// This is an internal option, so we don't want to advertise it.
#ifdef THC_GENERIC_FILE
THPUtils_assert(num_kwargs == 0, THPTensorStr " constructor only "
"accepts a 'device' keyword argument")
#else
THPUtils_assert(num_kwargs == 0, THPTensorStr " constructor doesn't "
"accept any keyword arguments");
#endif
}
// torch.Tensor()
if (num_args == 0) {
self->cdata = THPTensor_(_new)();
return (PyObject*)self.release();
}
PyObject *first_arg = PyTuple_GET_ITEM(args, 0);
// torch.Tensor(torch.Tensor tensor)
if (num_args == 1 && THPTensor_(Check)(first_arg)) {
THTensor *tensor = ((THPTensor*)first_arg)->cdata;
self->cdata = THTensor_(newWithTensor)(LIBRARY_STATE tensor);
return (PyObject*)self.release();
}
// torch.Tensor(torch.Size sizes)
if (num_args == 1 && THPSize_Check(first_arg)) {
THLongStoragePtr sizes(THPUtils_unpackSize(first_arg));
self->cdata = THPTensor_(_newWithSize)(sizes.get());
return (PyObject *)self.release();
}
// TODO: implement storageOffset, sizes and strides
// torch.Tensor(torch.Storage data)
if (num_args == 1 && THPStorage_(Check)(first_arg)) {
THStorage *storage = ((THPStorage*)first_arg)->cdata;
self->cdata = THTensor_(newWithStorage1d)(LIBRARY_STATE storage, 0, storage->size, -1);
return (PyObject *)self.release();
}
#ifdef NUMPY_TYPE_ENUM
// torch.Tensor(np.ndarray array)
if (num_args == 1 && PyArray_Check(first_arg) &&
PyArray_TYPE((PyArrayObject*)first_arg) == NUMPY_TYPE_ENUM) {
THPObjectPtr numpy_array(
PyArray_FromArray((PyArrayObject*)first_arg, nullptr, NPY_ARRAY_BEHAVED));
self->cdata = THPTensor_(fromNumpy)(numpy_array.get());
if (!self->cdata)
return NULL;
return (PyObject*)self.release();
}
#endif
// torch.Tensor(Sequence data)
if (num_args == 1 && PySequence_Check(first_arg)) {
Py_ssize_t length = PySequence_Length(first_arg);
THPUtils_assert(length >= 0, "couldn't obtain the length of %s",
THPUtils_typename(first_arg));
if (length == 0) {
self->cdata = THPTensor_(_new)();
return (PyObject*)self.release();
}
Py_INCREF(first_arg);
THPObjectPtr item(first_arg);
std::vector<size_t> sizes;
while ((length = PySequence_Length(item)) >= 0) {
sizes.push_back(length);
// TODO: check for string in this case
THPUtils_assert(sizes.size() < 1000000, "already counted a million "
"dimensions in a given sequence. Most likely your items are also "
"sequences and there's no way to infer how many dimension should "
"the tensor have");
THPUtils_assert(length > 0, "given sequence has an invalid size of "
"dimension %ld: %ld", (long)sizes.size(), (long)length);
item = PySequence_GetItem(item, 0);
if (!item)
return NULL;
}
// Last length check has set an error flag, so we need to clear it.
PyErr_Clear();
THLongStoragePtr sizes_storage(THLongStorage_newWithSize(sizes.size()));
long *sizes_data = sizes_storage->data;
for (auto size: sizes)
*sizes_data++ = size;
THTensorPtr tensor(THTensor_(newWithSize)(LIBRARY_STATE sizes_storage, NULL));
int ndims = sizes.size();
std::vector<size_t> indices(ndims);
std::vector<THPObjectPtr> sequences(ndims);
Py_INCREF(first_arg);
item = first_arg;
for (size_t i = 0; i < sequences.size(); i++) {
PyObject *item_ptr = item.get();
sequences[i] = std::move(item);
if (i < sequences.size()-1) {
item = PySequence_ITEM(item_ptr, 0);
if (!item)
return NULL;
}
}
// half tensors don't have CPU counterparts so we have to buffer them as
// floats while loading
#ifndef THC_REAL_IS_HALF
#define load_real real
#define UNPACK_REAL(item) THPUtils_(unpackReal)(item)
#else
#define load_real float
#define UNPACK_REAL(item) THPFloatUtils_unpackReal(item)
#endif
#if !defined(THC_GENERIC_FILE) && !defined(THD_GENERIC_FILE)
real *data = tensor->storage->data;
#else
size_t numel = THTensor_(numel)(LIBRARY_STATE tensor);
std::unique_ptr<load_real> data_guard(new load_real[numel]);
load_real *data = data_guard.get();
#endif
THPObjectPtr final_sequence;
while (true) {
final_sequence = std::move(sequences[ndims-1]);
try {
// We're taking a fast-track over the last dimension
for (size_t i = 0; i < sizes[ndims-1]; i++) {
indices[ndims-1] = i;
item = PySequence_ITEM(final_sequence, i);
// We've checked the length earlier, so it must have been an error
if (!item)
return NULL;
*data++ = UNPACK_REAL(item);
}
} catch(std::runtime_error &e) {
std::string index = THPTensor_(indicesToString)(indices, ndims-1);
THPUtils_setError("tried to construct a tensor from a %s%s sequence, "
"but found an item of type %s at index %s",
(ndims > 1 ? "nested " : ""),
THPUtils_typeTraits<real>::python_type_str,
THPUtils_typename(item.get()),
index.c_str());
return NULL;
}
#ifdef THC_GENERIC_FILE
#ifdef THC_REAL_IS_HALF
THFloatStorage *cpu_storage = THFloatStorage_newWithData(data_guard.get(), numel);
cpu_storage->flag &= ~TH_STORAGE_FREEMEM;
THCudaHalfStorage_copyFloat(LIBRARY_STATE tensor->storage, cpu_storage);
THFloatStorage_free(cpu_storage);
#else
THHostStorage *cpu_storage = THHostStorage_(newWithData)(data_guard.get(), numel);
cpu_storage->flag &= ~TH_STORAGE_FREEMEM;
THCStorage_(copyCPU)(LIBRARY_STATE tensor->storage, cpu_storage);
THHostStorage_(free)(cpu_storage);
#endif
#endif
#undef UNPACK_REAL
#undef load_real
// Update the counters
int dim = ndims-2;
size_t last_updated_dim = dim;
while (dim >= 0) {
last_updated_dim = dim;
if (++indices[dim] == sizes[dim])
indices[dim--] = 0;
else
break;
}
// Check if we've just made a full cycle
if ((last_updated_dim == 0 && indices[0] == 0) || ndims == 1)
break;
// Update sequences
for (int i = last_updated_dim+1; i < ndims; i++) {
sequences[i] = PySequence_ITEM(sequences[i-1], indices[i-1]);
if (!sequences[i]) {
THPTensor_(setInconsistentDepthError)(sizes, indices, i, indices[i]);
return NULL;
}
if (!PySequence_Check(sequences[i])) {
std::string index_str = THPTensor_(indicesToString)(indices, i);
THPUtils_setError("an item of time %s at index %s doesn't implement "
"a sequence protocol");
return NULL;
}
Py_ssize_t length = PySequence_Length(sequences[i]);
if (length < 0) {
std::string index_str = THPTensor_(indicesToString)(indices, i);
THPUtils_setError("could not obtain a length of %s at index %s",
THPUtils_typename(sequences[i].get()), index_str.c_str());
return NULL;
}
if ((size_t)length != sizes[i]) {
THPTensor_(setInconsistentDepthError)(sizes, indices, i, length);
return NULL;
}
}
}
self->cdata = tensor.release();
return (PyObject *)self.release();
}
// torch.Tensor(int ...)
THLongStoragePtr sizes;
if (THPUtils_tryUnpackLongVarArgs(args, 0, sizes)) {
self->cdata = THPTensor_(_newWithSize)(sizes.get());
return (PyObject *)self.release();
}
THPUtils_invalidArguments(args, kwargs, THPTensorStr " constructor", 6,
"no arguments",
"(int ...)",
"(" THPTensorStr " viewed_tensor)",
"(torch.Size size)",
"(" THPStorageStr " data)",
"(Sequence data)");
return NULL;
END_HANDLE_TH_ERRORS
}
#ifdef WITH_NUMPY
#define IS_SCALAR(NAME) \
((is_long = THPUtils_checkLong(NAME)) || \
(is_scalar_array = PyArray_CheckScalar(NAME)))
#define UNPACK_SCALAR(IDX_VARIABLE) \
if (is_long) { \
idx = THPUtils_unpackLong(IDX_VARIABLE); \
} else { \
PyArray_CastScalarToCtype(IDX_VARIABLE, &idx, NumpyLongArrDescr); \
}
#else
#define IS_SCALAR(NAME) THPUtils_checkLong(NAME)
#define UNPACK_SCALAR(IDX_VARIABLE) idx = THPUtils_unpackLong(IDX_VARIABLE);
#endif
#if defined(THC_GENERIC_FILE)
#define THIndexTensor THCudaLongTensor
#define THIndexTensor_(NAME) TH_CONCAT_2(THCudaLongTensor_,NAME)
#define THPIndexTensor THCPLongTensor
#define THPIndexTensor_Check THCPLongTensor_Check
#define THPIndexTensorClass THCPLongTensorClass
#elif defined(THD_GENERIC_FILE)
#define THIndexTensor THDLongTensor
#define THIndexTensor_(NAME) TH_CONCAT_2(THDLongTensor_,NAME)
#define THPIndexTensor THDPLongTensor
#define THPIndexTensor_Check THDPLongTensor_Check
#define THPIndexTensorClass THDPLongTensorClass
#else
#define THIndexTensor THLongTensor
#define THIndexTensor_(NAME) TH_CONCAT_2(THLongTensor_,NAME)
#define THPIndexTensor THPLongTensor
#define THPIndexTensor_Check THPLongTensor_Check
#define THPIndexTensorClass THPLongTensorClass
#endif
static bool THPTensor_(_indexOnce)(PyObject *index, int &indexed_dim,
THTensorPtr &tresult, THStorage* &sresult, long &storage_offset)
{
#ifdef WITH_NUMPY
static PyArray_Descr *NumpyLongArrDescr = PyArray_DescrFromType(NPY_INT64);
bool is_long, is_scalar_array;
#endif
// Indexing with a scalar
if(IS_SCALAR(index)) {
int64_t idx;
UNPACK_SCALAR(index);
long dimsize = THTensor_(size)(LIBRARY_STATE tresult.get(), indexed_dim);
// If the user provided negative idx, convert to positive equivalent
idx = (idx < 0) ? dimsize + idx : idx;
if (dimsize <= 0) {
PyErr_SetString(PyExc_IndexError, "indexing an empty tensor");
throw python_error();
}
if (idx < 0 || idx >= dimsize) {
PyErr_Format(PyExc_IndexError, "index %lld is out of range for dimension "
"%lld (of size %lld)", (long long)idx, (long long)indexed_dim, (long long)dimsize);
throw python_error();
}
// If we are indexing a vector, set the storage to the storage underlying
// the vector, and the storage_offset to the location of the element at
// the specificed index. Otherwise, perform a selection
if(THTensor_(nDimension)(LIBRARY_STATE tresult.get()) == 1) {
sresult = tresult.get()->storage;
storage_offset = tresult->storageOffset + tresult->stride[0] * idx;
tresult = NULL;
} else {
THTensor_(select)(LIBRARY_STATE tresult.get(), NULL, indexed_dim, idx);
}
} else if (index == Py_None) {
// _indexOnce will never be called with tresult == NULL, except for a None index
// e.g. x = torch.Tensor(5); y = x[5, None]
if (!tresult) {
tresult = THTensor_(newWithStorage1d)(LIBRARY_STATE sresult, storage_offset, 1, 1);
sresult = NULL;
} else {
// Insert a singleton dimension at indexed_dim, then bump indexed_dim
THTensor_(unsqueeze1d)(LIBRARY_STATE tresult.get(), NULL, indexed_dim++);
}
// Indexing with a slice
} else if (PySlice_Check(index)) {
Py_ssize_t start, end, length, step;
if (!THPUtils_parseSlice(index, THTensor_(size)(LIBRARY_STATE tresult.get(), indexed_dim), &start, &end, &step, &length))
throw python_error();
if (step <= 0) {
PyErr_SetString(PyExc_ValueError, "slice step has to be greater than 0");
throw python_error();
}
if (length == 0) {
PyErr_SetString(PyExc_ValueError, "result of slicing is an empty tensor");
throw python_error();
}
// Modify the Tensor to point to the sliced components
tresult->storageOffset += tresult->stride[indexed_dim] * start;
tresult->stride[indexed_dim] *= step;
tresult->size[indexed_dim] = length;
indexed_dim++;
} else {
return false;
}
return true;
}
#ifndef TH_REAL_IS_HALF
static bool THPTensor_(_checkBasicIntegerArrayIndexing)(THPTensor *indexed, PyObject *arg) {
long ndim = THTensor_(nDimension)(LIBRARY_STATE indexed->cdata);
if (PySequence_Check(arg) && PySequence_Size(arg) == ndim) {
THPObjectPtr fast = THPObjectPtr(PySequence_Fast(arg, NULL));
for (Py_ssize_t i = 0; i < ndim; ++i) {
PyObject *item = PySequence_Fast_GET_ITEM(fast.get(), i);
if (!THPIndexTensor_Check(item) && !PySequence_Check(item)) {
return false;
}
}
return true;
}
return false;
}
static bool THPTensor_(_checkAdvancedIndexing)(THPTensor *indexed, PyObject *arg) {
// Currently we only support two forms of advanced indexing:
//
// 1. "Basic Integer Array Indexing" the integer-array indexing strategy
// where we have ndim sequence/LongTensor arguments
// 2. Combining Advanced Indexing with ":", or "..." , with the limitation that
// the advanced indexing dimensions must be adjacent, i.e.:
//
// x[:, :, [1,2], [3,4], :] --> valid
// x[[1,2], [3,4]] --> valid
// x[[1,2], [3,4], ...] --> valid
// x[:, [1,2], :, [3,4], :] --> not valid
// Verification, Step #1 -- ndim sequencers
if (THPTensor_(_checkBasicIntegerArrayIndexing)(indexed, arg)) return true;
// Verification, Step #2 -- at least one sequencer, all the rest are
// ':' and/or a single '...', can be less than ndim indexers, all sequencers
// adjacent
long ndim = THTensor_(nDimension)(LIBRARY_STATE indexed->cdata);
if (PySequence_Check(arg) && PySequence_Size(arg) <= ndim) {
THPObjectPtr fast = THPObjectPtr(PySequence_Fast(arg, NULL));
bool sequenceFound = false;
bool nonColonEllipsisFound = false;
bool ellipsisFound = false;
Py_ssize_t lastSeqDim = -1;
for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(fast.get()); ++i) {
PyObject *item = PySequence_Fast_GET_ITEM(fast.get(), i);
if (THPIndexTensor_Check(item) || PySequence_Check(item)) {
sequenceFound = true;
// non-adjacent sequencers not yet supported
if (i - 1 != lastSeqDim && lastSeqDim != -1) {
return false;
}
lastSeqDim = i;
continue;
}
if (PySlice_Check(item)) {
long dimSize = THTensor_(size)(LIBRARY_STATE indexed->cdata, i);
// Basically verify that the Slice is ':' and did not specify
// a specific start, end or step
Py_ssize_t start, end, length, step;
if (THPUtils_parseSlice(item, dimSize, &start, &end, &step, &length)) {
if (start != 0 || end != dimSize || step != 1 || length != dimSize) {
nonColonEllipsisFound = true;
break;
}
}
continue;
}
if (Py_TYPE(item) == &PyEllipsis_Type) {
if (ellipsisFound) {
// Can't have duplicate ellipsi
return false;
}
ellipsisFound = true;
continue;
}
nonColonEllipsisFound = true;
break;
}
return sequenceFound && (!nonColonEllipsisFound);
}
return false;
// Full NumPy advanced indexing requirements are coded up below. To fully support
// such indexing will require changes to the actual indexing logic, so we will
// leave this commented out as a reference
/**
// Checks whether the specified selection object should trigger advanced
// indexing
// Case 1: arg is a non-tuple sequence object
if (PySequence_Check(arg) && !PyTuple_Check(arg)) return true;
#ifdef WITH_NUMPY
// Case 2: arg is an nd-array with type integer or bool
if (PyArray_Check(arg) && (PyArray_TYPE((PyArrayObject*)arg) == NPY_INT64 || PyArray_TYPE((PyArrayObject*)arg) == NPY_BOOL)) return true;
#endif
// Case 3: arg is a tuple containing at least one sequence object, ndarray, or LongTensor
if (PyTuple_Check(arg)) {
for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(arg); ++i) {
PyObject *item = PyTuple_GET_ITEM(arg, i);
if (PySequence_Check(item)) {
return true;
}
#ifdef WITH_NUMPY
if (PyArray_Check(item) && (PyArray_TYPE((PyArrayObject*)item) == NPY_INT64 || PyArray_TYPE((PyArrayObject*)item) == NPY_BOOL)) return true;
#endif
if (THPIndexTensor_Check(item)) return true;
}
}
**/
}
// Exposed at the interpreter level
static PyObject* THPTensor_(checkAdvancedIndexing)(THPTensor *self, PyObject *arg) {
if (THPTensor_(_checkAdvancedIndexing)(self, arg)) {
Py_RETURN_TRUE;
}
Py_RETURN_FALSE;
}
static bool THPTensor_(_convertToTensorIndexers)(
PyObject *index,
THTensorPtr& indexed,
Py_ssize_t& sequenceLength,
std::unordered_map<Py_ssize_t, THPPointer<THIndexTensor>>& broadcasted) {
// At the top-level, each indexing element must be one of 3 things:
//
// 1. A LongTensor
// 2. A sequence that can be converted into a LongTensor
// 3. A empty slice object (i.e. ':')
// 4. An Ellipsis (i.e. '...')
//
// This function loops through all of the indexing elements. If we encounter
// a LongTensor, we record the dimension at which it occurs. If we encounter
// another sequence type, we attempt to convert it to a LongTensor, and record
// its position.
//
// Next, once we have all of the indexing Tensors, we attempt to broadcast them.
// If they can be broadcasted, we store each of the broadcasted Tensors in the
// output map, with the dimension of the original tensor as the key.
// Indexes all indexing Tensors (pre-broadcast) by which dimension they occurred.
// Because we rely upon the THPIndexTensor constructor to handle sequence -> tensor
// conversions, we store THPTensors rather than THTensors. We use an ordered map
// to maintain the order of Tensors via dimension. Because this is limited to
// ndim(Tensor), it should always be small + fast.
std::vector<Py_ssize_t> indexingDims;
std::vector<THPIndexTensor*>indexers;
// The indexing matches advanced indexing requirements. In the case that
// the user has an Ellipsis, and/or less dimensions than are in the
// Tensor being indexed, we "fill in" empty Slices to these dimensions
// so that the the resulting advanced indexing code still works
// The top-level indexer should be a sequence, per the check above
THPObjectPtr fast(PySequence_Fast(index, NULL));
sequenceLength = PySequence_Fast_GET_SIZE(fast.get());
int ellipsisOffset = 0;
for (Py_ssize_t i = 0; i < sequenceLength; ++i) {
PyObject *item = PySequence_Fast_GET_ITEM(fast.get(), i);
// If this is an ellipsis, the all subsequent advanced indexing
// objects "positions" should be shifted, e.g. if we have a 5D Tensor
// x, and then x[..., [2, 3]], then the "position" of [2, 3] is 4
if (Py_TYPE(item) == &PyEllipsis_Type) {
ellipsisOffset = THTensor_(nDimension)(LIBRARY_STATE indexed) - sequenceLength;
continue;
}
if (!PySlice_Check(item)) {
// Returns NULL upon conversion failure
THPIndexTensor *indexer = (THPIndexTensor *)PyObject_CallFunctionObjArgs(
THPIndexTensorClass, PySequence_Fast_GET_ITEM(fast.get(), i), NULL);
if (!indexer) {
PyErr_Format(PyExc_IndexError,
"When performing advanced indexing the indexing objects must be LongTensors or "
"convertible to LongTensors");
// Clean up Indexers
for (auto& idx : indexers) {
THIndexTensor_(free)(LIBRARY_STATE idx->cdata);
Py_DECREF(idx);
}
return false;
}
indexingDims.push_back(i + ellipsisOffset);
indexers.push_back(indexer);
}
}
// Next, we need to verify that the Tensors are broadcastable. Keep these
// as raw pointer vectors
std::vector<THIndexTensor*> maybeBroadcasted;
std::vector<THIndexTensor*> candidates;
// Extract the underlying Tensors for use in the expansion API call
for (const auto& indexer : indexers) {
maybeBroadcasted.emplace_back(THIndexTensor_(new)(LIBRARY_STATE_NOARGS));
// borrow the underlying Tensor from the indexer map
candidates.emplace_back(indexer->cdata);
}
// Broadcast/Expand indexing Tensors as necessary
try {
THIndexTensor_(expandNd)(LIBRARY_STATE maybeBroadcasted.data(), candidates.data(), maybeBroadcasted.size());
// Broadcast succeeded, place Broadcasted Tensors into output map by the index at
// which they occurred, transferring ownership to that map object
for (unsigned int i = 0; i < indexingDims.size(); ++i) {
THPPointer<THIndexTensor> owned(maybeBroadcasted[i]);
broadcasted[indexingDims[i]] = std::move(owned);
}
// Next, before doing any further work, we want to verify that all the indices
// are in bounds at each advanced index dimension. This occurs only on the CPU,
// as point gets on CUDA Tensors would be slow. CUDA out of bounds errors
// will trigger a device-side assert
#if !defined(THC_GENERIC_FILE)
ptrdiff_t nElement = THIndexTensor_(nElement)(LIBRARY_STATE broadcasted.begin()->second.get());
THLongStoragePtr viewer(THLongStorage_newWithSize(1));
THLongStorage_set(viewer.get(), 0, nElement);
for (auto& dimBroadcast : broadcasted) {
Py_ssize_t dim = dimBroadcast.first;
long sizeAtDim = THTensor_(size)(LIBRARY_STATE indexed, dim);
// Need to make contiguous to view as 1D :/
THPPointer<THIndexTensor> contig(THIndexTensor_(newContiguous)(LIBRARY_STATE dimBroadcast.second.get()));
// View as 1D + get1D makes me sad :(
THPPointer<THIndexTensor> flat(THIndexTensor_(newView)(LIBRARY_STATE contig.get(), viewer));
for (ptrdiff_t i = 0; i < THIndexTensor_(nElement)(LIBRARY_STATE flat.get()); ++i) {
long indexAtDim = THTensor_fastGet1d(flat.get(), i);
if (indexAtDim >= sizeAtDim) {
PyErr_Format(PyExc_IndexError, "index %lld from broadcast indexer is out of range "
"for dimension %lld (of size %lld)",
(long long)indexAtDim, (long long)dim, (long long)sizeAtDim);
// Clean up Indexers
for (auto& idx : indexers) {
THIndexTensor_(free)(LIBRARY_STATE idx->cdata);
Py_DECREF(idx);
}
return false;
}
}
}
#endif
} catch (std::exception& e) {
// Broadcasted failed, cleanup and return error. I'm not sure if there is a better
// way to do this where we don't have to manually clean up the memory
for (const auto& tensor : maybeBroadcasted) {
THIndexTensor_(free)(LIBRARY_STATE tensor);
}
PyErr_Format(PyExc_IndexError, "The advanced indexing objects could not be broadcast");
// Clean up Indexers
for (auto& idx : indexers) {
THIndexTensor_(free)(LIBRARY_STATE idx->cdata);
Py_DECREF(idx);
}
return false;
}
// Clean up Indexers
for (auto& idx : indexers) {
THIndexTensor_(free)(LIBRARY_STATE idx->cdata);
Py_DECREF(idx);
}
return true;
}
static inline long THPTensor_(_indexToOffset)(
THTensorPtr& indexed,
std::unordered_map<Py_ssize_t, THPPointer<THIndexTensor>>& broadcasted,
ptrdiff_t index)
{
// We need to translate an "index" into a linear offset within the Tensor indexed.
// We will perform the normal mod/divide loop, except in the case of an advance indexed
// dimension, we need to take special care to utilize the size and subset of indices
// specified by the Tensor at the advanced indexed dimension. We hereafter refer to
// this as the "broadcast" dimension, although in the case of a single indexer, the
// broadcast op is pretty much a no-op.
//
// For example, suppose we have a three-dimensional Tensor x of shape (5, 10, 15),
// and our indexing operation is x[:, (2, 4, 5), :].
//
// For Linear Index 32:
//
// dim = 2 (size = 15): 32 % 15 = 2; 32 / 15 = 2
// dim = 1 (size = 3): 2 % 3 = 2; 2 / 3 = 0
// dim = 0 (size = 5): 0 % 5 = 0; end
//
// So we have selected the index (0, 2, 2). Now for the strides calculation. For the
// non-broadcast dimensions, we simply do the index * the stride. But for the broadcast
// dimension we need to get the corresponding subset index (i.e., pick from (2, 4, 5))
// and use that before multiplying by the stride at that dimension.
//
// (assumes that x is contiguous)
//
// dim = 2 (stride = 1): 2 * stride = 2, offset = 2
// dim = 1 (stride = 15): (broadcast[2] = 5) * stride = 75, offset = 77
// dim = 0 (stride = 75): 0 * stride = 0, offset = 77
//
// So we can see how this works.
//
// The other complication occurs when we have more than one advanced indexer. Consider
// the case:
//
// x = torch.Tensor(3, 4, 6, 3)
// x.stride = (72, 18, 3, 1)
// x[:, [0, 1], [2, 3], :]
//
// Because the advanced indexers are broadcast and iterated as one, we need to apply
// the same index in each of the advanced indexing dimensions. When we reach an advanced
// indexing element, we look to see if the next dimension we will consider is also part
// of the advanced indexing. If it is, we maintain the index:
//
// For Linear Index 16:
//
// dim = 3 (size = 3): 16 % 3 = 1; 16 / 3 = 5
// dim = 2 (size = 2): 5 % 2 = 1; Do Not Update Index
// dim = 1 (size = 2): 5 % 2 = 1; 5 / 2 = 2
// dim = 0 (size = 3): 2 % 3 = 2; end
//
// Then for the offsets:
//
// dim = 3 (stride = 1): 1 * stride = 1, offset: 1
// dim = 2 (stride = 3): [2, 3][1] = 3 * stride = 9, offset = 10
// dim = 1 (stride = 18): [0, 1][1] = 1 * stride = 18, offset = 28
// dim = 0 (stride = 72): 2 * stride = 144, offset = 172
//
// Special care needs to be taken to handle advanced indexers at the beginning, end.
long offset = 0;
for (long i = THTensor_(nDimension)(LIBRARY_STATE indexed) - 1; i >= 0; --i) {
// Get size at dimension i, its the size of the indexed Tensor at that dimension if its
// not an advanced indexing dimension, otherwise its the size of the broadcast Tensor
ptrdiff_t sizeAtDim, indexAtDim, nextIndex;
long strideAtDim = THTensor_(stride)(LIBRARY_STATE indexed, i);
auto broadcast = broadcasted.find(i);
if (broadcast != broadcasted.end()) {
sizeAtDim = THIndexTensor_(nElement)(LIBRARY_STATE broadcast->second.get());
indexAtDim = THTensor_fastGet1d(broadcast->second.get(), index % sizeAtDim);
if (i > 0 && broadcasted.find(i - 1) != broadcasted.end()) {
nextIndex = index;
} else {
nextIndex = index / sizeAtDim;
}
} else {
sizeAtDim = THTensor_(size)(LIBRARY_STATE indexed, i);
indexAtDim = index % sizeAtDim;
nextIndex = index / sizeAtDim;
}
offset += indexAtDim * strideAtDim;
index = nextIndex;
}
// size at dim is a bad name, because its really the number of elements in the
// broadcast tensor, rather than the size of the indexed Tensor at that dim
return offset;
}
// Caller takes ownership of the returned IndexTensor
static THIndexTensor* THPTensor_(_calculateLinearIndices)(
THTensorPtr& indexed,
Py_ssize_t sequenceLength,
std::unordered_map<Py_ssize_t, THPPointer<THIndexTensor>>& broadcasted) {
// Get the number of indices to generate - this is the product of the size at each dimension,
// that is not part of the advanced indexing, multiplied by the nElement of one of the broadcast
// Tensors. For example:
//
// x = torch.Tensor(10)
// x[[0, 2, 4], ] --> no dims not part of indexing, size = 3
//
// x = torch.Tensor(5, 5)
// x[[0, 3, 3], [1]] --> no dims not part of indexing, size = 3
// x[:, [2, 3]] --> dim_0 not part of indexing, size = 5
// --> multiply by nElement of broadcast Tensor, nElement = 2
// --> total_size = 10
//
// x = torch.Tensor(5, 5, 5)
// x[[0, 1], :, :] --> dim_1, dim_2 not part of indexing, size = 5 * 5 = 25
// --> multiply by nElement of broadcast Tensor, nElement = 2
// --> total_size = 50
// TODO: should this be 1? what if there are no things to index? ????
ptrdiff_t indexingElements = THIndexTensor_(nElement)(LIBRARY_STATE broadcasted.begin()->second.get());
for (Py_ssize_t i = 0; i < THTensor_(nDimension)(LIBRARY_STATE indexed.get()); ++i) {
indexingElements *= broadcasted.find(i) != broadcasted.end() ?
1 : THTensor_(size)(LIBRARY_STATE indexed.get(), i);
}
// The broadcasted advanced indexing tensor might not be one-dimensional, but we are
// generating a vector of indices, so we need to view the indexer as 1D prior to getting
// the value for the particular dimension.
std::unordered_map<Py_ssize_t, THPPointer<THIndexTensor>> flattenedBroadcasters;
THLongStorage *indexerSize = THLongStorage_newWithSize(1);
// All broadcast Tensors have the same number of elements
ptrdiff_t dimIndexingElements = THIndexTensor_(nElement)(LIBRARY_STATE broadcasted.begin()->second.get());
THLongStorage_set(indexerSize, 0, dimIndexingElements);
for (auto& broadcast : broadcasted) {
THIndexTensor *contig = THIndexTensor_(newContiguous)(LIBRARY_STATE broadcast.second.get());
THPPointer<THIndexTensor> flat(THIndexTensor_(newView)(LIBRARY_STATE contig, indexerSize));
flattenedBroadcasters[broadcast.first] = std::move(flat);
THIndexTensor_(free)(LIBRARY_STATE contig);
}
THLongStorage_free(indexerSize);
#ifdef THC_GENERIC_FILE
// Call GPU kernel for index calculation
THCudaLongTensor *cudaIndices =
THCudaLongTensor_newWithSize1d(LIBRARY_STATE indexingElements);
long baseOffset = THTensor_(storageOffset)(LIBRARY_STATE indexed);
// Need to pass broadcast Tensors to API, pass NULL ptr for all empty
// (i.e. not-advanced indexed) dims
std::vector<THCudaLongTensor *> indexers(
THTensor_(nDimension)(LIBRARY_STATE indexed.get()), NULL);
for (int i = 0; i < THTensor_(nDimension)(LIBRARY_STATE indexed.get()); ++i) {
if (flattenedBroadcasters.count(i) > 0) {
indexers[i] = flattenedBroadcasters[i].get();
}
}
THTensor_(calculateAdvancedIndexingOffsets)(LIBRARY_STATE cudaIndices, indexed, baseOffset, indexers.data());
return cudaIndices;
#else
THIndexTensor *linearIndices = THIndexTensor_(newWithSize1d)(LIBRARY_STATE indexingElements);
long baseOffset = THTensor_(storageOffset)(LIBRARY_STATE indexed);
for (ptrdiff_t i = 0; i < indexingElements; ++i) {
long linearIdx = THPTensor_(_indexToOffset)(
indexed, flattenedBroadcasters, i);
THTensor_fastSet1d(linearIndices, i, baseOffset + linearIdx);
}
return linearIndices;
#endif
}
static bool THPTensor_(_advancedIndexCommonInit)(
PyObject *index,
THTensorPtr &indexed,
std::unordered_map<Py_ssize_t, THPPointer<THIndexTensor>>& broadcasted,
THIndexTensor **linearIndices,
THTensor **flattened) {
// Precondition: index is an object that specifies advanced indexing.
// For now, we only support the simple integer-array indexing strategy
// where there are ndim(self) indexing sequences/LongTensors that can be
// broadcasted and iterated as one
// Precondition: tresult points to the Tensor we are indexing, and is also where
// we will store the output Tensor
// First attempt to convert to Tensor indexers from the arbitrary
// python/tensor objects passed
Py_ssize_t sequenceLength;
if (!THPTensor_(_convertToTensorIndexers)(index, indexed, sequenceLength, broadcasted)) {
return false;
}
// At this point broadcasted should store our indexing Tensors.
// Our strategy is to view the indexed Tensor as a 1D Tensor, calculate
// the linear indices for each tuple of indexing elements, and then call
// indexSelect using those linear indices
*linearIndices = THPTensor_(_calculateLinearIndices)(indexed, sequenceLength, broadcasted);
*flattened = THTensor_(newWithStorage1d)(LIBRARY_STATE
THTensor_(storage)(LIBRARY_STATE indexed.get()),
0,
THStorage_(size)(LIBRARY_STATE
THTensor_(storage)(LIBRARY_STATE indexed.get())),
1);
return true;
}
// Should called, written in such a way that if any of the parameters are not
// initialized we still don't crash
static void THPTensor_(_advancedIndexCommonCleanup)(
THIndexTensor *linearIndices,
THTensor *flattened) {
if (linearIndices) THIndexTensor_(free)(LIBRARY_STATE linearIndices);
if (flattened) THTensor_(free)(LIBRARY_STATE flattened);
}
static bool THPTensor_(_advancedIndexGet)(PyObject *index, THTensorPtr &tresult)
{
std::unordered_map<Py_ssize_t, THPPointer<THIndexTensor>> broadcasted;
THIndexTensor *linearIndices = NULL;
THTensor *flattened = NULL;
bool success = THPTensor_(_advancedIndexCommonInit)(
index, tresult, broadcasted, &linearIndices, &flattened);
if (success) {
THTensor *result = THTensor_(new)(LIBRARY_STATE_NOARGS);
// Index Select makes a copy of the storage, thus it is enforcing NumPy semantics, which
// says that the array returned by advanced indexing is a copy, not a view
THTensor_(indexSelect)(LIBRARY_STATE result, flattened, 0, linearIndices);
// Finally, we need to calculate the appropriate shape of the output Tensor
// The size at each dimension is unmodified from the input Tensor, except where
// there are advanced indexers. In this case, the n dimensions containing adjacent
// advanced indexers are reshaped to be the size of the broadcast indexer.
//
// Example, x = torch.Tensor(5, 10, 15)
//
// x[[0, 2, 4], [2, 3, 4], [1, 1, 2]]
//
// Broadcast Advanced Indexer Size: 1D Tensor of Size 3
// Result Size: 1D Tensor of Size 3
//
// x[:, [2, 4, 5], :]
// Broadcast Advanced Indexer Size: 1D Tensor of Size 3
// Result Size: (5, 3, 15)
//
// x[:, [[0, 0], [1, 2]], [[1, 3], [2, 4]]]
// Broadcast Advanced Indexer Size: 2D Tensor (2, 2)
// Result Size: (5, 2, 2)
//
// x[:, [[1, 2, 3], [2, 3, 4]], :]
// Broadcast Advanced Indexer Size: 2D Tensor of Size (2, 3)
// Result Size: (5, 2, 3, 15)
// First, calculate the number of dimensions of the output shape. This is the
// number of non-advanced indexed dimensions + the number of dimensions in the
// broadcast Tensor
int baseDims = THTensor_(nDimension)(LIBRARY_STATE tresult.get()) - broadcasted.size();
// Fast path, if we have ndim advanced indexers, the output shape is simply the
// broadcast shape
if (baseDims == 0) {
auto iter = broadcasted.begin();
THTensor_(resizeNd)(LIBRARY_STATE result,
THIndexTensor_(nDimension)(LIBRARY_STATE iter->second.get()),
iter->second.get()->size,
NULL);
} else {
// We have at least one dimension that is not part of advanced indexing. This
// implementation is pretty much shit, there might be a better way of doing this...
THIndexTensor *broadcastShape = broadcasted.begin()->second.get();
int indexedDims = THIndexTensor_(nDimension)(LIBRARY_STATE broadcastShape);
THLongStorage *outputShape = THLongStorage_newWithSize(baseDims + indexedDims);
int baseDimPtr = 0;
int outputDimPtr = 0;
bool insertedSubspace = false;
while (outputDimPtr != baseDims + indexedDims) {
auto iter = broadcasted.find(baseDimPtr);
if (iter == broadcasted.end()) {
outputShape->data[outputDimPtr] = THTensor_(size)(LIBRARY_STATE tresult.get(), baseDimPtr);
++baseDimPtr;
++outputDimPtr;
} else if (!insertedSubspace) {
for (int dim = 0; dim < indexedDims; ++dim) {
outputShape->data[outputDimPtr] = THIndexTensor_(size)(LIBRARY_STATE iter->second.get(), dim);
++outputDimPtr;
}
insertedSubspace = true;
} else {
// ignore
++baseDimPtr;
}
}
THTensor_(resizeNd)(LIBRARY_STATE result,
baseDims + indexedDims,
outputShape->data,
NULL);
THLongStorage_free(outputShape);
}
// result ptr takes ownership of result tensor, and implicitly frees the
// indexed one
tresult = result;
}
THPTensor_(_advancedIndexCommonCleanup)(linearIndices, flattened);
return success;
}
static bool THPTensor_(_advancedIndexSet)(PyObject *index, THTensorPtr &dest, PyObject *src)
{
std::unordered_map<Py_ssize_t, THPPointer<THIndexTensor>> broadcasted;
THIndexTensor *linearIndices = NULL;
THTensor *flattened = NULL;
bool success = THPTensor_(_advancedIndexCommonInit)(
index, dest, broadcasted, &linearIndices, &flattened);
if (success) {
if (THPUtils_(checkReal)(src)) {
real v = THPUtils_(unpackReal)(src);
THTensor_(indexFill)(LIBRARY_STATE flattened, 0, linearIndices, v);
} else if (THPTensor_(Check)(src)) {
// Because we are doing an index copy, we need to make sure of two things:
// 1. the src Tensor is 1D and
// 2. the src is made contiguous before being flattened into a 1D view, if
// necessary
THTensor *contiguous = THTensor_(newContiguous)(LIBRARY_STATE ((THPTensor*)src)->cdata);
THTensor *cviewed = THTensor_(newWithStorage1d)(LIBRARY_STATE
THTensor_(storage)(LIBRARY_STATE contiguous),
THTensor_(storageOffset)(LIBRARY_STATE contiguous),
THTensor_(nElement)(LIBRARY_STATE contiguous),
1);
THTensor_(indexCopy)(LIBRARY_STATE flattened, 0, linearIndices, cviewed);
THTensor_(free)(LIBRARY_STATE contiguous);
THTensor_(free)(LIBRARY_STATE cviewed);
} else {
THPUtils_setError("can't assign %s to a " THPTensorStr " using a LongTensor "
"(only " THPTensorStr " or %s are supported)",
THPUtils_typename(src), THPUtils_typeTraits<real>::python_type_str);
success = false;
}
}
THPTensor_(_advancedIndexCommonCleanup)(linearIndices, flattened);
return success;
}
static bool THPTensor_(_advancedIndexAdd)(PyObject *index, THTensorPtr &dest, THTensorPtr &src) {
std::unordered_map<Py_ssize_t, THPPointer<THIndexTensor>> broadcasted;
THIndexTensor *linearIndices = NULL;
THTensor *flattened = NULL;
bool success = THPTensor_(_advancedIndexCommonInit)(
index, dest, broadcasted, &linearIndices, &flattened);
if (success) {
// Verify src tensor is contiguous before flattening
THTensor *contiguous = THTensor_(newContiguous)(LIBRARY_STATE src);
THTensor *cviewed = THTensor_(newWithStorage1d)(LIBRARY_STATE
THTensor_(storage)(LIBRARY_STATE contiguous),
THTensor_(storageOffset)(LIBRARY_STATE contiguous),
THTensor_(nElement)(LIBRARY_STATE contiguous),
1);
THTensor_(indexAdd)(LIBRARY_STATE flattened, 0, linearIndices, cviewed);
THTensor_(free)(LIBRARY_STATE contiguous);
THTensor_(free)(LIBRARY_STATE cviewed);
}
THPTensor_(_advancedIndexCommonCleanup)(linearIndices, flattened);
return success;
}
static bool THPTensor_(_advancedIndexSelect)(PyObject *index, THTensorPtr &dest, THTensorPtr &src) {
std::unordered_map<Py_ssize_t, THPPointer<THIndexTensor>> broadcasted;
THIndexTensor *linearIndices = NULL;
THTensor *flattened = NULL;
bool success = THPTensor_(_advancedIndexCommonInit)(
index, src, broadcasted, &linearIndices, &flattened);
if (success) {
THTensor_(indexSelect)(LIBRARY_STATE dest, flattened, 0, linearIndices);
}
THPTensor_(_advancedIndexCommonCleanup)(linearIndices, flattened);
return success;
}
// Needed for autograd to support twice differentiable indexing
static PyObject* THPTensor_(advancedIndexAdd)(THPTensor *self, PyObject *args) {
HANDLE_TH_ERRORS
THPUtils_assert(PyTuple_GET_SIZE(args) == 2, "advancedIndexAdd takes exactly two "
"arguments (%d given)", (int) PyTuple_GET_SIZE(args));
THPUtils_assert(THPTensor_(_checkAdvancedIndexing)(self, PyTuple_GET_ITEM(args, 0)),
"first argument must be an indexer that triggers advanced indexing");
THPUtils_assert(THPTensor_(Check)(PyTuple_GET_ITEM(args, 1)), "Second argument "
"must be a Tensor");
THTensorPtr gradOutput(THTensor_(newWithTensor)(
LIBRARY_STATE ((THPTensor *)PyTuple_GET_ITEM(args, 1))->cdata));
THTensorPtr dest(THTensor_(newWithTensor)(LIBRARY_STATE self->cdata));
bool success = THPTensor_(_advancedIndexAdd)(PyTuple_GET_ITEM(args, 0), dest, gradOutput);
if (!success) {
return NULL;
}
Py_INCREF(self);
return (PyObject *)self;
END_HANDLE_TH_ERRORS
}
// Needed for autograd to support backwards passes when there are overlapping
// indices
static PyObject* THPTensor_(advancedIndexSelect)(THPTensor *self, PyObject *args) {
HANDLE_TH_ERRORS
THPUtils_assert(PyTuple_GET_SIZE(args) == 1, "advancedIndexSelect takes exactly one "
"argument (%d given)", (int) PyTuple_GET_SIZE(args));
THPUtils_assert(THPTensor_(_checkAdvancedIndexing)(self, PyTuple_GET_ITEM(args, 0)),
"first argument must be an indexer that triggers advanced indexing");
THTensorPtr dest(THTensor_(new)(LIBRARY_STATE_NOARGS));
THTensorPtr src(THTensor_(newWithTensor)(LIBRARY_STATE self->cdata));
bool success = THPTensor_(_advancedIndexSelect)(PyTuple_GET_ITEM(args, 0), dest, src);
if (!success) {
return NULL;
}
return THPTensor_(New)(dest.release());
END_HANDLE_TH_ERRORS
}
#endif // TH_REAL_IS_HALF
// Handles indexing into a Tensor given a tuple, ellipses, sequence, etc. index
static bool THPTensor_(_index)(THPTensor *self, PyObject *index,
THTensorPtr &tresult, THStorage * &sresult, long &storage_offset)
{
// As a base case, we create a new Tensor that is a copy of the Tensor
// we are indexing
tresult = THTensor_(newWithTensor)(LIBRARY_STATE self->cdata);
sresult = NULL;
int indexed_dim = 0;
if(PyTuple_Check(index)) {
// num_index_dim is the number of indices in the tuple, num_effective_index
// is the number of non-None, non-ellipses indices
long num_index_dim = (long)PyTuple_Size(index);
long num_effective_index = num_index_dim;
long num_tensor_dim = THTensor_(nDimension)(LIBRARY_STATE self->cdata);
long ellipsis_idx = -1;
for (int i = 0; i < num_index_dim; i++) {
PyObject *dimidx = PyTuple_GET_ITEM(index, i);
if (dimidx == Py_Ellipsis) {
if (ellipsis_idx != -1) throw std::runtime_error("ellipsis can be used at most once");
ellipsis_idx = i;
num_effective_index--;
}
if (dimidx == Py_None) {
num_effective_index--;
}
}
if (num_effective_index > num_tensor_dim) {
PyErr_Format(PyExc_IndexError,
"trying to index %ld dimensions of a %ld dimensional tensor",
num_effective_index, num_tensor_dim);
return false;
}
// Loop through the indices and perform the indiviudal indexing at each dim
bool valid = true;
for (int dim = 0; dim < num_index_dim; dim++) {
if (dim == ellipsis_idx) {
// tresult can be NULL if ellipsis is the last item
if (tresult) indexed_dim = tresult->nDimension - (num_index_dim - dim - 1);
continue;
}
PyObject *dimidx = PyTuple_GET_ITEM(index, dim);
valid = THPTensor_(_indexOnce)(dimidx, indexed_dim, tresult, sresult, storage_offset);
if (!valid) {
tresult = NULL;
// overwrite this, so the message mentions the incorrect object
index = dimidx;
break;
}
}
if (valid) return true;
} else if (index == Py_Ellipsis) {
// The result of indexing with an ellipsis only is just the entire existing
// Tensor
return true;
} else {
// index is a scalar, perform the indexing once on the 0th-dimension
if (THPTensor_(_indexOnce)(index, indexed_dim, tresult, sresult, storage_offset))
return true;
}
PyErr_Format(PyExc_TypeError, "indexing a tensor with an object of type %s. "
"The only supported types are integers, slices"
#ifdef WITH_NUMPY
", numpy scalars and "
#endif
#ifndef THC_GENERIC_FILE
"torch.LongTensor or torch.ByteTensor as the only argument.",
#else
"torch.cuda.LongTensor or torch.cuda.ByteTensor as the only argument.",
#endif
THPUtils_typename(index));
return false;
}
#undef IS_SCALAR
#undef UNPACK_SCALAR
template<bool force_tensor>
static PyObject * THPTensor_(getValue)(THPTensor *self, PyObject *index)
{
HANDLE_TH_ERRORS
#ifndef TH_REAL_IS_HALF
#if defined(THC_GENERIC_FILE)
THCPByteTensor *mask = THCPByteTensor_Check(index) ? (THCPByteTensor*)index : NULL;
THCPAutoGPU __gpu_guard(NULL, (PyObject*)self);
#elif defined(THD_GENERIC_FILE)
THDPByteTensor *mask = THDPByteTensor_Check(index) ? (THDPByteTensor*)index : NULL;
#else
THPByteTensor *mask = THPByteTensor_Check(index) ? (THPByteTensor*)index : NULL;
#endif
if (mask) {
THTensorPtr t(THTensor_(new)(LIBRARY_STATE_NOARGS));
THTensor_(maskedSelect)(LIBRARY_STATE t.get(), self->cdata, mask->cdata);
return THPTensor_(New)(t.release());
}
if (THPIndexTensor_Check(index)) {
THIndexTensor *index_t = ((THPIndexTensor*)index)->cdata;
THTensorPtr index_result(THTensor_(new)(LIBRARY_STATE_NOARGS));
THTensor_(indexSelect)(LIBRARY_STATE index_result.get(), self->cdata, 0, index_t);
return THPTensor_(New)(index_result.release());
}
#endif
THTensorPtr tresult;
THStorage *sresult;
long storage_offset;
// Check and see if the indexing object triggers advanced indexing semantics
#ifndef TH_REAL_IS_HALF
if (THPTensor_(_checkAdvancedIndexing)(self, index)) {
tresult = THTensor_(newWithTensor)(LIBRARY_STATE self->cdata);
if (!THPTensor_(_advancedIndexGet)(index, tresult)) {
return NULL;
}
// TODO: needed?
return THPTensor_(New)(tresult.release());
}
#endif // TH_REAL_IS_HALF
if (!THPTensor_(_index)(self, index, tresult, sresult, storage_offset))
return NULL;
if (tresult)
return THPTensor_(New)(tresult.release());
if (sresult) {
if (force_tensor) {
return THPTensor_(New)(THTensor_(newWithStorage1d)(LIBRARY_STATE sresult, storage_offset, 1, -1));
} else {
return THPUtils_(newReal)(THStorage_(get)(LIBRARY_STATE sresult, storage_offset));
}
}
THPUtils_setError("An unknown error has occurred when indexing a tensor "
"in THPTensor_(getValue). Please report this in a github issue at: "
"https://github.com/pytorch/pytorch");
return NULL;
END_HANDLE_TH_ERRORS
}
template<bool force_tensor>
static int THPTensor_(setValue)(THPTensor *self, PyObject *index, PyObject *value)
{
HANDLE_TH_ERRORS
#ifndef TH_REAL_IS_HALF
#if defined(THC_GENERIC_FILE)
THCPByteTensor *mask = THCPByteTensor_Check(index) ? (THCPByteTensor*)index : NULL;
THCPAutoGPU __gpu_guard(NULL, (PyObject*)self);
#elif defined(THD_GENERIC_FILE)
THDPByteTensor *mask = THDPByteTensor_Check(index) ? (THDPByteTensor*)index : NULL;
#else
THPByteTensor *mask = THPByteTensor_Check(index) ? (THPByteTensor*)index : NULL;
#endif
if (mask) {
if (THPUtils_(checkReal)(value)) {
real v = THPUtils_(unpackReal)(value);
THTensor_(maskedFill)(LIBRARY_STATE self->cdata, mask->cdata, v);
} else if (THPTensor_(Check)(value)) {
THTensor_(maskedCopy)(LIBRARY_STATE self->cdata, mask->cdata, ((THPTensor*)value)->cdata);
} else {
THPUtils_setError("can't assign %s to a " THPTensorStr " using a mask "
"(only " THPTensorStr " or %s are supported)",
THPUtils_typename(value), THPUtils_typeTraits<real>::python_type_str);
}
return 0;
}
if (THPIndexTensor_Check(index)) {
THIndexTensor *index_t = ((THPIndexTensor*)index)->cdata;
if (THPUtils_(checkReal)(value)) {
real v = THPUtils_(unpackReal)(value);
THTensor_(indexFill)(LIBRARY_STATE self->cdata, 0, index_t, v);
} else if (THPTensor_(Check)(value)) {
THTensor_(indexCopy)(LIBRARY_STATE self->cdata, 0, index_t, ((THPTensor*)value)->cdata);
} else {
THPUtils_setError("can't assign %s to a " THPTensorStr " using a LongTensor "
"(only " THPTensorStr " or %s are supported)",
THPUtils_typename(value), THPUtils_typeTraits<real>::python_type_str);
}
return 0;
}
#endif
THTensorPtr tresult;
THStorage *sresult;
long storage_offset;
// Check and see if the indexing object triggers advanced indexing semantics
#ifndef TH_REAL_IS_HALF
if (THPTensor_(_checkAdvancedIndexing)(self, index)) {
tresult = THTensor_(newWithTensor)(LIBRARY_STATE self->cdata);
if (!THPTensor_(_advancedIndexSet)(index, tresult, value)) {
return -1;
}
return 0;
}
#endif // TH_REAL_IS_HALF
if (!THPTensor_(_index)(self, index, tresult, sresult, storage_offset))
return -1;
if (sresult) {
if (!force_tensor) {
if (!THPUtils_(checkReal)(value)) {
THPUtils_setError("can't assign a %s to a scalar value of type %s",
THPUtils_typename(value), THPUtils_typeTraits<real>::python_type_str);
return -1;
}
THStorage_(set)(LIBRARY_STATE sresult, storage_offset, THPUtils_(unpackReal)(value));
return 0;
} else {
tresult = THTensor_(newWithStorage1d)(LIBRARY_STATE sresult, storage_offset, 1, -1);
}
}
if (tresult) {
if (THPUtils_(checkReal)(value)) {
#ifndef TH_REAL_IS_HALF
THTensor_(fill)(LIBRARY_STATE tresult.get(), THPUtils_(unpackReal)(value));
#else
throw std::runtime_error("torch.HalfTensors don't support scalar assignments");
#endif
} else {
// TODO: try to do this without creating a temporary object
THPTensorPtr tmp((THPTensor*)THPTensor_(New)(tresult.release()));
if (!tmp)
return -1;
if (!THPCopy(THTensor_(copy_functions), (PyObject*)tmp.get(), value, false, false)) {
return -1;
}
}
return 0;
}
THPUtils_setError("An unknown error has occurred when indexing a tensor "
"in THPTensor_(setValue). Please report this in a github issue at: "
"https://github.com/pytorch/pytorch");
return -1;
END_HANDLE_TH_ERRORS_RET(-1)
}
#undef THIndexTensor
#undef THIndexTensor_
#undef THPIndexTensor
#undef THPIndexTensor_Check
Py_ssize_t THPTensor_(length)(THPTensor *self)
{
if (self->cdata->nDimension == 0)
return 0;
return self->cdata->size[0];
}
#include "TensorMethods.cpp"
static PyMappingMethods THPTensor_(mappingmethods) = {
(lenfunc)THPTensor_(length),
(binaryfunc)THPTensor_(getValue)<false>,
(objobjargproc)THPTensor_(setValue)<false>
};
// TODO: implement equality
PyTypeObject THPTensorType = {
PyVarObject_HEAD_INIT(NULL, 0)
"torch._C." THPTensorBaseStr, /* tp_name */
sizeof(THPTensor), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)THPTensor_(dealloc), /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
&THPTensor_(mappingmethods), /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
NULL, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* will be assigned in init */ /* tp_methods */
0, /* will be assigned in init */ /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
THPTensor_(pynew), /* tp_new */
};
static struct PyMemberDef THPTensor_(members)[] = {
{(char*)"_cdata", T_ULONGLONG, offsetof(THPTensor, cdata), READONLY, NULL},
{NULL}
};
typedef struct {
PyObject_HEAD
} THPTensorStateless;
PyTypeObject THPTensorStatelessType = {
PyVarObject_HEAD_INIT(NULL, 0)
"torch._C." THPTensorBaseStr ".stateless", /* tp_name */
sizeof(THPTensorStateless), /* tp_basicsize */
0, /* tp_itemsize */
0, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved / tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
NULL, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
THPTensor_stateless_(methods), /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
0, /* tp_free */
0, /* tp_is_gc */
0, /* tp_bases */
0, /* tp_mro */
0, /* tp_cache */
0, /* tp_subclasses */
0, /* tp_weaklist */
};
#if !defined(TH_REAL_IS_HALF) && !defined(THD_GENERIC_FILE)
#include "SparseTensor.cpp"
#endif
#ifndef THD_GENERIC_FILE
void THPTensor_(initCopyMethods)()
{
auto& h = THTensor_(copy_functions);
// copy from same type
THPInsertTensorCopyFunction(h, &THTensor_(copy));
// copy from CPU types
THPInsertTensorCopyFunction(h, &THTensor_(copyByte));
THPInsertTensorCopyFunction(h, &THTensor_(copyChar));
THPInsertTensorCopyFunction(h, &THTensor_(copyShort));
THPInsertTensorCopyFunction(h, &THTensor_(copyInt));
THPInsertTensorCopyFunction(h, &THTensor_(copyLong));
THPInsertTensorCopyFunction(h, &THTensor_(copyFloat));
THPInsertTensorCopyFunction(h, &THTensor_(copyHalf));
THPInsertTensorCopyFunction(h, &THTensor_(copyDouble));
#ifdef THC_GENERIC_FILE
// copy from GPU types
THPInsertTensorCopyFunction(h, &THTensor_(copyCudaByte));
THPInsertTensorCopyFunction(h, &THTensor_(copyCudaChar));
THPInsertTensorCopyFunction(h, &THTensor_(copyCudaShort));
THPInsertTensorCopyFunction(h, &THTensor_(copyCudaInt));
THPInsertTensorCopyFunction(h, &THTensor_(copyCudaLong));
THPInsertTensorCopyFunction(h, &THTensor_(copyCudaFloat));
THPInsertTensorCopyFunction(h, &THTensor_(copyCudaDouble));
#ifdef CUDA_HALF_TENSOR
THPInsertTensorCopyFunction(h, &THTensor_(copyCudaHalf));
#endif
THPInsertTensorCopyFunction(h, &THCTensor_(copyAsyncCPU), true);
// add CPU <- GPU copies to base type
#define THCpuTensor_(name) TH_CONCAT_4(TH, Real, Tensor_, name)
extern THPCopyList THCpuTensor_(copy_functions);
auto& b = THCpuTensor_(copy_functions);
THPInsertTensorCopyFunction(b, &THCpuTensor_(copyCudaByte));
THPInsertTensorCopyFunction(b, &THCpuTensor_(copyCudaChar));
THPInsertTensorCopyFunction(b, &THCpuTensor_(copyCudaShort));
THPInsertTensorCopyFunction(b, &THCpuTensor_(copyCudaInt));
THPInsertTensorCopyFunction(b, &THCpuTensor_(copyCudaLong));
THPInsertTensorCopyFunction(b, &THCpuTensor_(copyCudaFloat));
THPInsertTensorCopyFunction(b, &THCpuTensor_(copyCudaDouble));
#ifdef CUDA_HALF_TENSOR
THPInsertTensorCopyFunction(b, &THCpuTensor_(copyCudaHalf));
#endif
THPInsertTensorCopyFunction(b, &THCpuTensor_(copyAsyncCuda), true);
#undef THCpuTensor_
#endif
}
#else
void THPTensor_(initCopyMethods)()
{
// TODO: cross type copies
auto& h = THTensor_(copy_functions);
THPInsertCopyFunction(h, &THDTensor_(copy));
#define THCpuTensor_(name) TH_CONCAT_4(TH, Real, Tensor_, name)
#define THCpuTensor TH_CONCAT_3(TH, Real, Tensor)
#define THPCpuTensorType TH_CONCAT_3(THP, Real, TensorType)
extern THPCopyList THCpuTensor_(copy_functions);
auto& b = THCpuTensor_(copy_functions);
THDPInsertCopyFunctionFromMaster(h, &THDTensor_(copyFromMaster), &THPCpuTensorType);
THDPInsertCopyFunctionFromWorker(b, THDTensor_(copyFromWorker));
#undef THCpuTensor
#undef THCpuTensor_
#undef THPCpuTensorType
}
#endif // !defined(THD_GENERIC_FILE)
bool THPTensor_(init)(PyObject *module)
{
#if !defined(THC_GENERIC_FILE) && !defined(TH_REAL_IS_HALF)
THVector_(vectorDispatchInit)();
#endif
THPTensorType.tp_methods = THPTensor_(methods);
THPTensorType.tp_members = THPTensor_(members);
if (PyType_Ready(&THPTensorType) < 0)
return false;
THPTensorStatelessType.tp_new = PyType_GenericNew;
if (PyType_Ready(&THPTensorStatelessType) < 0)
return false;
PyModule_AddObject(module, THPTensorBaseStr, (PyObject *)&THPTensorType);
THPTensor_(initCopyMethods)();
return true;
}
bool THPTensor_(postInit)(PyObject *module)
{
THPTensorClass = PyObject_GetAttrString(module,(char*)TH_CONCAT_STRING_2(Real,Tensor));
if (!THPTensorClass) return false;
bool is_cuda = false;
#ifdef THC_GENERIC_FILE
is_cuda = true;
#endif
const char *type_name = TH_CONCAT_STRING_2(Real,);
torch::registerPyTypeObject((PyTypeObject*)THPTensorClass, type_name, is_cuda, false);
return true;
}
#undef NUMPY_TYPE_ENUM
#endif