Enable QS8 4x8 dot product GEMM AArch32 microkernel little core
PiperOrigin-RevId: 423980261
diff --git a/src/init.c b/src/init.c
index bc1ea37..b5bc0e4 100644
--- a/src/init.c
+++ b/src/init.c
@@ -262,6 +262,43 @@
break;
}
}
+ #if XNN_MAX_UARCH_TYPES > 1
+ {
+ /* Choose micro-kernels for little cores according to micro-kernel specification for the big core */
+ const uint32_t mr = xnn_params.qc8.gemm.mr;
+ const uint32_t nr = xnn_params.qc8.gemm.nr;
+ const uint32_t log2_kr = xnn_params.qc8.gemm.log2_kr;
+ for (size_t i = 1; i < XNN_MAX_UARCH_TYPES; i++) {
+ const struct cpuinfo_uarch_info* uarch_info = cpuinfo_get_uarch(i);
+ if (uarch_info == NULL) {
+ /* No more microarchitectures in the system */
+ break;
+ }
+
+ switch (uarch_info->uarch) {
+ case cpuinfo_uarch_cortex_a53:
+ case cpuinfo_uarch_cortex_a55r0:
+ if (mr == 4 && nr == 8 && log2_kr == 0) {
+ xnn_params.qs8.gemm.minmax.gemm.function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64;
+ xnn_params.qs8.gemm.minmax.igemm.function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64;
+ xnn_params.qs8.gemm.minmax.gemm1.function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane;
+ xnn_params.qs8.gemm.minmax.igemm1.function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane;
+ }
+ break;
+ case cpuinfo_uarch_cortex_a55:
+ if (mr == 4 && nr == 8 && log2_kr == 2 && cpuinfo_has_arm_neon_dot()) {
+ xnn_params.qs8.gemm.minmax.gemm.function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55;
+ xnn_params.qs8.gemm.minmax.igemm.function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64;
+ xnn_params.qs8.gemm.minmax.gemm1.function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neondot;
+ xnn_params.qs8.gemm.minmax.igemm1.function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ #endif // XNN_MAX_UARCH_TYPES > 1
#else // XNN_ENABLE_ASSEMBLY
if (!XNN_PLATFORM_IOS && cpuinfo_has_arm_neon_dot()) {
xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neondot);