| #include "dnnlowp_partition.h" | 
 |  | 
 | #include "caffe2/core/logging.h" | 
 |  | 
 | namespace caffe2 { | 
 |  | 
 | static size_t GetWorkPerThread_(size_t work, int nthreads, int work_align) { | 
 |   return ((work + work_align - 1) / work_align + nthreads - 1) / nthreads * | 
 |       work_align; | 
 | } | 
 |  | 
 | std::pair<size_t, size_t> | 
 | Get1DPartition(size_t work, int nthreads, int tid, int work_align /*=1*/) { | 
 |   size_t work_per_thread = GetWorkPerThread_(work, nthreads, work_align); | 
 |   size_t work_begin = std::min(tid * work_per_thread, work); | 
 |   size_t work_end = std::min(work_begin + work_per_thread, work); | 
 |   return {work_begin, work_end}; | 
 | } | 
 |  | 
 | void Get1DPartitionOf2D( | 
 |     int m, | 
 |     int n, | 
 |     int nthreads, | 
 |     int tid, | 
 |     int* m_begin, | 
 |     int* m_end, | 
 |     int* n_begin, | 
 |     int* n_end, | 
 |     int n_align /*=1*/) { | 
 |   if (m >= nthreads || m == 0) { | 
 |     // When m >= nthreads, just parallelize over m. | 
 |     std::tie(*m_begin, *m_end) = Get1DPartition(m, nthreads, tid); | 
 |     *n_begin = 0; | 
 |     *n_end = n; | 
 |   } else { | 
 |     // Otherwise, each row is parallelized by multiple threads. | 
 |     // nthreads_per_row is floor(nthreads / m). If we use ceil, some rows won't | 
 |     // be handled by any thread. | 
 |     int nthreads_per_row = nthreads / m; | 
 |     *m_begin = std::max(std::min(tid / nthreads_per_row, m - 1), 0); | 
 |     *m_end = std::min(*m_begin + 1, m); | 
 |  | 
 |     int tid_of_m_begin = std::min(*m_begin * nthreads_per_row, nthreads); | 
 |     int tid_of_m_end = std::min( | 
 |         (*m_end == m) ? nthreads : (tid_of_m_begin + nthreads_per_row), | 
 |         nthreads); | 
 |     int nthreads_within_row = tid_of_m_end - tid_of_m_begin; | 
 |     int tid_within_row = tid - tid_of_m_begin; | 
 |     CAFFE_ENFORCE_GE(tid_within_row, 0); | 
 |     CAFFE_ENFORCE_LT(tid_within_row, nthreads_within_row); | 
 |  | 
 |     std::tie(*n_begin, *n_end) = | 
 |         Get1DPartition(n, nthreads_within_row, tid_within_row, n_align); | 
 |   } | 
 | } | 
 |  | 
 | } // namespace caffe2 |