| #ifndef THC_TENSOR_INFO_INC |
| #define THC_TENSOR_INFO_INC |
| |
| #include <cuda.h> |
| #include <assert.h> |
| #include "THCGeneral.h" |
| #include "THCTensor.h" |
| |
| // Maximum number of dimensions allowed for cutorch |
| #define MAX_CUTORCH_DIMS 25 |
| |
| // Warning string for tensor arguments that are too large or have too |
| // many dimensions |
| #define CUTORCH_STR(X) #X |
| #define CUTORCH_DIM_WARNING "tensor too large or too many (>" \ |
| CUTORCH_STR(MAX_CUTORCH_DIMS) ") dimensions" |
| |
| // CUDA kernel argument that defines tensor layout |
| template <typename T, typename IndexType> |
| struct TensorInfo { |
| TensorInfo(T* p, |
| int dim, |
| IndexType sz[MAX_CUTORCH_DIMS], |
| IndexType st[MAX_CUTORCH_DIMS]); |
| |
| // Set the size of the given dimension to 1, as if it were a |
| // reduction dim (allows you to calculate offsets of the reduction |
| // slice) |
| void reduceDim(int dim); |
| |
| // Collapses all runs of successive dimensions if the size/strides |
| // match up within the run and there are no holes between the |
| // dimensions. |
| // If excludeDim is set (not -1), then excludeDim will not be |
| // collapsed with any other dimension. |
| // Function returns the new dimension index that excludeDim maps to, |
| // since the collapsed dimensions are <= the input dimensions. |
| int collapseDims(int excludeDim = -1); |
| |
| // Contiguous tensors of more than one dimension are collapsed down |
| // to one tensor |
| __host__ __device__ inline bool isContiguous() const { |
| return (dims == 1 && strides[0] == 1); |
| } |
| |
| T* data; |
| IndexType sizes[MAX_CUTORCH_DIMS]; |
| IndexType strides[MAX_CUTORCH_DIMS]; |
| int dims; |
| }; |
| |
| template <typename T, typename IndexType> |
| TensorInfo<T, IndexType>::TensorInfo(T* p, |
| int dim, |
| IndexType sz[MAX_CUTORCH_DIMS], |
| IndexType st[MAX_CUTORCH_DIMS]) { |
| data = p; |
| dims = dim; |
| assert(dims > 0 && dims < MAX_CUTORCH_DIMS); |
| |
| for (int i = 0; i < dim; ++i) { |
| sizes[i] = sz[i]; |
| strides[i] = st[i]; |
| } |
| } |
| |
| template <typename T, typename IndexType> |
| void |
| TensorInfo<T, IndexType>::reduceDim(int dim) { |
| assert(dim < dims && dim >= 0); |
| sizes[dim] = 1; |
| } |
| |
| template <typename T, typename IndexType> |
| int |
| TensorInfo<T, IndexType>::collapseDims(int excludeDim) { |
| // Find the innermost dimension not of size 1, since dimensions of size 1 are |
| // collapsible. |
| int firstNonOneDim = -1; |
| |
| for (int i = dims - 1; i >= 0; --i) { |
| if (i == excludeDim) { |
| // We cannot collapse this dimension, even if it is size 1 |
| firstNonOneDim = i; |
| break; |
| } |
| |
| if (sizes[i] != 1) { |
| firstNonOneDim = i; |
| break; |
| } |
| } |
| |
| // Special case: if all dimensions are of size 1, then this is a |
| // single-point tensor that we still have to operate on. Reduce to a |
| // single point. |
| if (firstNonOneDim == -1) { |
| assert(excludeDim == -1); |
| |
| dims = 1; |
| sizes[0] = 1; |
| strides[0] = 1; |
| |
| // Everything effectively got collapsed into this dimension |
| return 0; |
| } |
| |
| // Count the number of successive dimensions that can be collapsed, from |
| // innermost to outermost. |
| int numCollapsed = 0; |
| |
| // Skip the leading size 1 dims |
| numCollapsed += dims - 1 - firstNonOneDim; |
| |
| // We perform one pass through to determine how many dimensions we |
| // can collapse, before calculating the actual size of the collapsed |
| // dimensions. |
| // size/strideInner are the size/strides of the previous inner |
| // non-collapsible dim we encounter. |
| long sizeInner = sizes[firstNonOneDim]; |
| long strideInner = strides[firstNonOneDim]; |
| |
| for (int i = firstNonOneDim - 1; i >= 0; --i) { |
| long sizeOuter = sizes[i]; |
| long strideOuter = strides[i]; |
| |
| // Don't collapse this dimension if we want to exclude it from |
| // collapsing. |
| // Since this code is attempting to collapse a subsequent |
| // dimension (i) with the preceding dimension (i + 1), we can only |
| // perform collapsing if the preceding dimension can be collapsed |
| // (i.e., not excludeDim) |
| if ((excludeDim != i) && (excludeDim != i + 1)) { |
| // The next outermost dimension can be skipped if size 1 |
| if (sizeOuter == 1) { |
| ++numCollapsed; |
| continue; |
| } |
| |
| // If the next outermost dimension is contiguous with the |
| // previous non-collapsed one, collapse it |
| if (strideOuter == strideInner * sizeInner) { |
| ++numCollapsed; |
| |
| // This is the run of collapsed dimensions' size |
| sizeInner = sizeInner * sizeOuter; |
| continue; |
| } |
| } |
| |
| // Otherwise, this new outer dimension at `i` cannot be collapsed |
| // because it is excluded from collapsing, or it is not contiguous |
| // with the previous inner dimension. |
| sizeInner = sizeOuter; |
| strideInner = strideOuter; |
| } |
| |
| // This will be our new size/stride and dimension. |
| IndexType newSizes[MAX_CUTORCH_DIMS]; |
| IndexType newStrides[MAX_CUTORCH_DIMS]; |
| |
| assert(numCollapsed < dims); |
| int newDims = dims - numCollapsed; |
| |
| // We return the index of the excluded dimension that is excluded |
| // from being collapsed here. |
| int returnDim = -1; |
| |
| // We perform a second pass through the dimensions to actually |
| // calculate the size of the collapsed dimensions. |
| int collapsedIndex = dims - numCollapsed - 1; |
| newSizes[collapsedIndex] = sizes[firstNonOneDim]; |
| newStrides[collapsedIndex] = strides[firstNonOneDim]; |
| |
| if (firstNonOneDim == excludeDim) { |
| returnDim = collapsedIndex; |
| } |
| |
| for (int i = firstNonOneDim - 1; i >= 0; --i) { |
| IndexType sizeOuter = sizes[i]; |
| IndexType strideOuter = strides[i]; |
| |
| if ((excludeDim != i) && (excludeDim != i + 1)) { |
| if (sizeOuter == 1) { |
| // skip |
| continue; |
| } |
| |
| if (strideOuter == newSizes[collapsedIndex] * newStrides[collapsedIndex]) { |
| // collapse |
| newSizes[collapsedIndex] *= sizeOuter; |
| continue; |
| } |
| } |
| |
| // Otherwise, strides don't match, or dim `i` is excluded from |
| // collapsing. |
| --collapsedIndex; |
| assert(collapsedIndex >= 0); |
| assert(collapsedIndex < newDims); |
| newSizes[collapsedIndex] = sizeOuter; |
| newStrides[collapsedIndex] = strideOuter; |
| |
| if (excludeDim == i) { |
| returnDim = collapsedIndex; |
| } |
| } |
| |
| // We must have filled all the dimensions we're looking for |
| assert(collapsedIndex == 0); |
| assert((excludeDim == -1) || (returnDim != -1)); |
| |
| dims = newDims; |
| |
| for (int i = 0; i < dims; ++i) { |
| sizes[i] = newSizes[i]; |
| strides[i] = newStrides[i]; |
| } |
| |
| // After collapsing, the original `excludeDim` may have been |
| // renumbered to this new `returnDim`, since some dimensions could |
| // have been collapsed. |
| return returnDim; |
| } |
| |
| // Translate a linear index for the apply to a T* offset; |
| // specialized on `Dims` to reduce nvcc compilation time |
| template <typename T, typename IndexType, int Dims> |
| struct IndexToOffset { |
| static __host__ __device__ IndexType get( |
| IndexType linearId, |
| const TensorInfo<T, IndexType>& info) { |
| IndexType offset = 0; |
| |
| // Use static dims |
| for (int i = Dims - 1; i >= 0; --i) { |
| IndexType curDimIndex = linearId % info.sizes[i]; |
| IndexType curDimOffset = curDimIndex * info.strides[i]; |
| offset += curDimOffset; |
| |
| if (i > 0) { |
| linearId /= info.sizes[i]; |
| } |
| } |
| |
| return offset; |
| } |
| }; |
| |
| // For contiguous tensors, the offset = index |
| template <typename T, typename IndexType> |
| struct IndexToOffset<T, IndexType, -2> { |
| static inline __host__ __device__ IndexType |
| get(IndexType linearId, const TensorInfo<T, IndexType>& info) { |
| return linearId; |
| } |
| }; |
| |
| template <typename T, typename IndexType> |
| struct IndexToOffset<T, IndexType, -1> { |
| static inline __host__ __device__ IndexType get( |
| IndexType linearId, |
| const TensorInfo<T, IndexType>& info) { |
| |
| IndexType offset = 0; |
| |
| // Use dynamic dims |
| for (int i = info.dims - 1; i >= 0; --i) { |
| IndexType curDimIndex = linearId % info.sizes[i]; |
| IndexType curDimOffset = curDimIndex * info.strides[i]; |
| offset += curDimOffset; |
| |
| linearId /= info.sizes[i]; |
| } |
| |
| return offset; |
| } |
| }; |
| |
| #endif // THC_TENSOR_INFO_INC |