Benchmarks for F32 GEMM Relaxed SIMD microkernels
PiperOrigin-RevId: 438760194
diff --git a/bench/f32-gemm-e2e.cc b/bench/f32-gemm-e2e.cc
index b347ce0..8038a51 100644
--- a/bench/f32-gemm-e2e.cc
+++ b/bench/f32-gemm-e2e.cc
@@ -1166,6 +1166,256 @@
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMRELAXEDSIMD
+ static void f32_gemm_3x8__wasmrelaxedsimd_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat,
+ xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat,
+ xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat,
+ xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 3 /* mr */, 8 /* nr */);
+ }
+ static void f32_gemm_4x8__wasmrelaxedsimd_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat,
+ xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat,
+ xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat,
+ xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 4 /* mr */, 8 /* nr */);
+ }
+ static void f32_gemm_5x8__wasmrelaxedsimd_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat,
+ xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat,
+ xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat,
+ xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 5 /* mr */, 8 /* nr */);
+ }
+ static void f32_gemm_6x8__wasmrelaxedsimd_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat,
+ xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat,
+ xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat,
+ xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_loadsplat,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 6 /* mr */, 8 /* nr */);
+ }
+ static void f32_gemm_3x8__wasmrelaxedsimd_fma_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat,
+ xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat,
+ xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat,
+ xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 3 /* mr */, 8 /* nr */);
+ }
+ static void f32_gemm_4x8__wasmrelaxedsimd_fma_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat,
+ xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat,
+ xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat,
+ xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 4 /* mr */, 8 /* nr */);
+ }
+ static void f32_gemm_5x8__wasmrelaxedsimd_fma_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat,
+ xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat,
+ xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat,
+ xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 5 /* mr */, 8 /* nr */);
+ }
+ static void f32_gemm_6x8__wasmrelaxedsimd_fma_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat,
+ xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat,
+ xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat,
+ xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 6 /* mr */, 8 /* nr */);
+ }
+ static void f32_gemm_3x8__wasmrelaxedsimd_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat,
+ xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat,
+ xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat,
+ xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 3 /* mr */, 8 /* nr */);
+ }
+ static void f32_gemm_4x8__wasmrelaxedsimd_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat,
+ xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat,
+ xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat,
+ xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 4 /* mr */, 8 /* nr */);
+ }
+ static void f32_gemm_5x8__wasmrelaxedsimd_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat,
+ xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat,
+ xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat,
+ xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 5 /* mr */, 8 /* nr */);
+ }
+ static void f32_gemm_6x8__wasmrelaxedsimd_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat,
+ xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat,
+ xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat,
+ xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_splat,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 6 /* mr */, 8 /* nr */);
+ }
+ static void f32_gemm_3x8__wasmrelaxedsimd_fma_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat,
+ xnn_f32_igemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat,
+ xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat,
+ xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 3 /* mr */, 8 /* nr */);
+ }
+ static void f32_gemm_4x8__wasmrelaxedsimd_fma_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat,
+ xnn_f32_igemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat,
+ xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat,
+ xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 4 /* mr */, 8 /* nr */);
+ }
+ static void f32_gemm_5x8__wasmrelaxedsimd_fma_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat,
+ xnn_f32_igemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat,
+ xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat,
+ xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 5 /* mr */, 8 /* nr */);
+ }
+ static void f32_gemm_6x8__wasmrelaxedsimd_fma_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat,
+ xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat,
+ xnn_f32_gemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat,
+ xnn_f32_igemm_minmax_ukernel_1x8__wasmrelaxedsimd_fma_splat,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 6 /* mr */, 8 /* nr */);
+ }
+ static void f32_gemm_3x8s4__wasmrelaxedsimd(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_3x8s4__wasmrelaxedsimd,
+ xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd,
+ xnn_f32_gemm_minmax_ukernel_1x8s4__wasmrelaxedsimd,
+ xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 3 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
+ }
+ static void f32_gemm_4x8s4__wasmrelaxedsimd(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_4x8s4__wasmrelaxedsimd,
+ xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd,
+ xnn_f32_gemm_minmax_ukernel_1x8s4__wasmrelaxedsimd,
+ xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 4 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
+ }
+ static void f32_gemm_5x8s4__wasmrelaxedsimd(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd,
+ xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd,
+ xnn_f32_gemm_minmax_ukernel_1x8s4__wasmrelaxedsimd,
+ xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 5 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
+ }
+ static void f32_gemm_6x8s4__wasmrelaxedsimd(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd,
+ xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd,
+ xnn_f32_gemm_minmax_ukernel_1x8s4__wasmrelaxedsimd,
+ xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 6 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
+ }
+ static void f32_gemm_3x8s4__wasmrelaxedsimd_fma(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma,
+ xnn_f32_igemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma,
+ xnn_f32_gemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma,
+ xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 3 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
+ }
+ static void f32_gemm_4x8s4__wasmrelaxedsimd_fma(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma,
+ xnn_f32_igemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma,
+ xnn_f32_gemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma,
+ xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 4 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
+ }
+ static void f32_gemm_5x8s4__wasmrelaxedsimd_fma(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma,
+ xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma,
+ xnn_f32_gemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma,
+ xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 5 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
+ }
+ static void f32_gemm_6x8s4__wasmrelaxedsimd_fma(benchmark::State& state, models::ExecutionPlanFactory model) {
+ GEMMEnd2EndBenchmark(state, model,
+ xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma,
+ xnn_f32_igemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma,
+ xnn_f32_gemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma,
+ xnn_f32_igemm_minmax_ukernel_1x8s4__wasmrelaxedsimd_fma,
+ xnn_init_f32_minmax_wasmsimd_params,
+ 6 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
+ }
+
+ BENCHMARK_FP32_END2END(f32_gemm_3x8__wasmrelaxedsimd_loadsplat);
+ BENCHMARK_FP32_END2END(f32_gemm_4x8__wasmrelaxedsimd_loadsplat);
+ BENCHMARK_FP32_END2END(f32_gemm_5x8__wasmrelaxedsimd_loadsplat);
+ BENCHMARK_FP32_END2END(f32_gemm_6x8__wasmrelaxedsimd_loadsplat);
+
+ BENCHMARK_FP32_END2END(f32_gemm_3x8__wasmrelaxedsimd_fma_loadsplat);
+ BENCHMARK_FP32_END2END(f32_gemm_4x8__wasmrelaxedsimd_fma_loadsplat);
+ BENCHMARK_FP32_END2END(f32_gemm_5x8__wasmrelaxedsimd_fma_loadsplat);
+ BENCHMARK_FP32_END2END(f32_gemm_6x8__wasmrelaxedsimd_fma_loadsplat);
+
+ BENCHMARK_FP32_END2END(f32_gemm_3x8__wasmrelaxedsimd_splat);
+ BENCHMARK_FP32_END2END(f32_gemm_4x8__wasmrelaxedsimd_splat);
+ BENCHMARK_FP32_END2END(f32_gemm_5x8__wasmrelaxedsimd_splat);
+ BENCHMARK_FP32_END2END(f32_gemm_6x8__wasmrelaxedsimd_splat);
+
+ BENCHMARK_FP32_END2END(f32_gemm_3x8__wasmrelaxedsimd_fma_splat);
+ BENCHMARK_FP32_END2END(f32_gemm_4x8__wasmrelaxedsimd_fma_splat);
+ BENCHMARK_FP32_END2END(f32_gemm_5x8__wasmrelaxedsimd_fma_splat);
+ BENCHMARK_FP32_END2END(f32_gemm_6x8__wasmrelaxedsimd_fma_splat);
+
+ BENCHMARK_FP32_END2END(f32_gemm_3x8s4__wasmrelaxedsimd);
+ BENCHMARK_FP32_END2END(f32_gemm_4x8s4__wasmrelaxedsimd);
+ BENCHMARK_FP32_END2END(f32_gemm_5x8s4__wasmrelaxedsimd);
+ BENCHMARK_FP32_END2END(f32_gemm_6x8s4__wasmrelaxedsimd);
+
+ BENCHMARK_FP32_END2END(f32_gemm_3x8s4__wasmrelaxedsimd_fma);
+ BENCHMARK_FP32_END2END(f32_gemm_4x8s4__wasmrelaxedsimd_fma);
+ BENCHMARK_FP32_END2END(f32_gemm_5x8s4__wasmrelaxedsimd_fma);
+ BENCHMARK_FP32_END2END(f32_gemm_6x8s4__wasmrelaxedsimd_fma);
+#endif // XNN_ARCH_WASMRELAXEDSIMD
+
+
#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
static void f32_gemm_3x8__wasmsimd_arm_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
GEMMEnd2EndBenchmark(state, model,
@@ -1173,7 +1423,7 @@
xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat,
xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat,
xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
3 /* mr */, 8 /* nr */);
}
static void f32_gemm_4x8__wasmsimd_arm_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1182,7 +1432,7 @@
xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat,
xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat,
xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
4 /* mr */, 8 /* nr */);
}
static void f32_gemm_5x8__wasmsimd_arm_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1191,7 +1441,7 @@
xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat,
xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat,
xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
5 /* mr */, 8 /* nr */);
}
static void f32_gemm_6x8__wasmsimd_arm_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1200,7 +1450,7 @@
xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat,
xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat,
xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
6 /* mr */, 8 /* nr */);
}
static void f32_gemm_3x8__wasmsimd_x86_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1209,7 +1459,7 @@
xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat,
xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat,
xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
3 /* mr */, 8 /* nr */);
}
static void f32_gemm_4x8__wasmsimd_x86_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1218,7 +1468,7 @@
xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat,
xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat,
xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
4 /* mr */, 8 /* nr */);
}
static void f32_gemm_5x8__wasmsimd_x86_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1227,7 +1477,7 @@
xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat,
xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat,
xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
5 /* mr */, 8 /* nr */);
}
static void f32_gemm_6x8__wasmsimd_x86_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1236,7 +1486,7 @@
xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat,
xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat,
xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
6 /* mr */, 8 /* nr */);
}
static void f32_gemm_3x8__wasmsimd_arm_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1245,7 +1495,7 @@
xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat,
xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat,
xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
3 /* mr */, 8 /* nr */);
}
static void f32_gemm_4x8__wasmsimd_arm_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1254,7 +1504,7 @@
xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat,
xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat,
xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
4 /* mr */, 8 /* nr */);
}
static void f32_gemm_5x8__wasmsimd_arm_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1263,7 +1513,7 @@
xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat,
xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat,
xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
5 /* mr */, 8 /* nr */);
}
static void f32_gemm_6x8__wasmsimd_arm_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1272,7 +1522,7 @@
xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat,
xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat,
xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
6 /* mr */, 8 /* nr */);
}
static void f32_gemm_3x8__wasmsimd_x86_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1281,7 +1531,7 @@
xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat,
xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat,
xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
3 /* mr */, 8 /* nr */);
}
static void f32_gemm_4x8__wasmsimd_x86_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1290,7 +1540,7 @@
xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat,
xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat,
xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
4 /* mr */, 8 /* nr */);
}
static void f32_gemm_5x8__wasmsimd_x86_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1299,7 +1549,7 @@
xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat,
xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat,
xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
5 /* mr */, 8 /* nr */);
}
static void f32_gemm_6x8__wasmsimd_x86_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1308,7 +1558,7 @@
xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat,
xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat,
xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
6 /* mr */, 8 /* nr */);
}
static void f32_gemm_3x8s4__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1317,7 +1567,7 @@
xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm,
xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm,
xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
3 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
}
static void f32_gemm_4x8s4__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1326,7 +1576,7 @@
xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm,
xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm,
xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
4 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
}
static void f32_gemm_5x8s4__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1335,7 +1585,7 @@
xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm,
xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm,
xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
5 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
}
static void f32_gemm_6x8s4__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1344,7 +1594,7 @@
xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm,
xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm,
xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
6 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
}
static void f32_gemm_3x8s4__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1353,7 +1603,7 @@
xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86,
xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86,
xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
3 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
}
static void f32_gemm_4x8s4__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1362,7 +1612,7 @@
xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86,
xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86,
xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
4 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
}
static void f32_gemm_5x8s4__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1371,7 +1621,7 @@
xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86,
xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86,
xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
5 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
}
static void f32_gemm_6x8s4__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
@@ -1380,7 +1630,7 @@
xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86,
xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86,
xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86,
- xnn_init_f32_minmax_scalar_params,
+ xnn_init_f32_minmax_wasmsimd_params,
6 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
}
diff --git a/bench/f32-gemm.cc b/bench/f32-gemm.cc
index b46a071..c401663 100644
--- a/bench/f32-gemm.cc
+++ b/bench/f32-gemm.cc
@@ -1118,142 +1118,277 @@
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+#if XNN_ARCH_WASMRELAXEDSIMD
+ static void f32_gemm_3x8__wasmrelaxedsimd_loadsplat(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_loadsplat, 3, 8, 1, 1,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_4x8__wasmrelaxedsimd_loadsplat(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_loadsplat, 4, 8, 1, 1,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_5x8__wasmrelaxedsimd_loadsplat(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_loadsplat, 5, 8, 1, 1,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_6x8__wasmrelaxedsimd_loadsplat(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat, 6, 8, 1, 1,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_3x8__wasmrelaxedsimd_fma_loadsplat(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat, 3, 8, 1, 1,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_4x8__wasmrelaxedsimd_fma_loadsplat(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat, 4, 8, 1, 1,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_5x8__wasmrelaxedsimd_fma_loadsplat(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat, 5, 8, 1, 1,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_6x8__wasmrelaxedsimd_fma_loadsplat(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat, 6, 8, 1, 1,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_3x8__wasmrelaxedsimd_splat(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_splat, 3, 8, 1, 1,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_4x8__wasmrelaxedsimd_splat(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_splat, 4, 8, 1, 1,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_5x8__wasmrelaxedsimd_splat(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_splat, 5, 8, 1, 1,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_6x8__wasmrelaxedsimd_splat(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_splat, 6, 8, 1, 1,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_3x8__wasmrelaxedsimd_fma_splat(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmrelaxedsimd_fma_splat, 3, 8, 1, 1,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_4x8__wasmrelaxedsimd_fma_splat(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmrelaxedsimd_fma_splat, 4, 8, 1, 1,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_5x8__wasmrelaxedsimd_fma_splat(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmrelaxedsimd_fma_splat, 5, 8, 1, 1,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_6x8__wasmrelaxedsimd_fma_splat(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_splat, 6, 8, 1, 1,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_3x8s4__wasmrelaxedsimd(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8s4__wasmrelaxedsimd, 3, 8, 1, 4,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_4x8s4__wasmrelaxedsimd(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__wasmrelaxedsimd, 4, 8, 1, 4,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_5x8s4__wasmrelaxedsimd(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd, 5, 8, 1, 4,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_6x8s4__wasmrelaxedsimd(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd, 6, 8, 1, 4,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_3x8s4__wasmrelaxedsimd_fma(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8s4__wasmrelaxedsimd_fma, 3, 8, 1, 4,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_4x8s4__wasmrelaxedsimd_fma(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__wasmrelaxedsimd_fma, 4, 8, 1, 4,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_5x8s4__wasmrelaxedsimd_fma(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma, 5, 8, 1, 4,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+ static void f32_gemm_6x8s4__wasmrelaxedsimd_fma(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma, 6, 8, 1, 4,
+ xnn_init_f32_minmax_wasmsimd_params);
+ }
+
+ BENCHMARK_GEMM(f32_gemm_3x8__wasmrelaxedsimd_loadsplat)
+ BENCHMARK_GEMM(f32_gemm_4x8__wasmrelaxedsimd_loadsplat)
+ BENCHMARK_GEMM(f32_gemm_5x8__wasmrelaxedsimd_loadsplat)
+ BENCHMARK_GEMM(f32_gemm_6x8__wasmrelaxedsimd_loadsplat)
+
+ BENCHMARK_GEMM(f32_gemm_3x8__wasmrelaxedsimd_fma_loadsplat)
+ BENCHMARK_GEMM(f32_gemm_4x8__wasmrelaxedsimd_fma_loadsplat)
+ BENCHMARK_GEMM(f32_gemm_5x8__wasmrelaxedsimd_fma_loadsplat)
+ BENCHMARK_GEMM(f32_gemm_6x8__wasmrelaxedsimd_fma_loadsplat)
+
+ BENCHMARK_GEMM(f32_gemm_3x8__wasmrelaxedsimd_splat)
+ BENCHMARK_GEMM(f32_gemm_4x8__wasmrelaxedsimd_splat)
+ BENCHMARK_GEMM(f32_gemm_5x8__wasmrelaxedsimd_splat)
+ BENCHMARK_GEMM(f32_gemm_6x8__wasmrelaxedsimd_splat)
+
+ BENCHMARK_GEMM(f32_gemm_3x8__wasmrelaxedsimd_fma_splat)
+ BENCHMARK_GEMM(f32_gemm_4x8__wasmrelaxedsimd_fma_splat)
+ BENCHMARK_GEMM(f32_gemm_5x8__wasmrelaxedsimd_fma_splat)
+ BENCHMARK_GEMM(f32_gemm_6x8__wasmrelaxedsimd_fma_splat)
+
+ BENCHMARK_GEMM(f32_gemm_3x8s4__wasmrelaxedsimd)
+ BENCHMARK_GEMM(f32_gemm_4x8s4__wasmrelaxedsimd)
+ BENCHMARK_GEMM(f32_gemm_5x8s4__wasmrelaxedsimd)
+ BENCHMARK_GEMM(f32_gemm_6x8s4__wasmrelaxedsimd)
+
+ BENCHMARK_GEMM(f32_gemm_3x8s4__wasmrelaxedsimd_fma)
+ BENCHMARK_GEMM(f32_gemm_4x8s4__wasmrelaxedsimd_fma)
+ BENCHMARK_GEMM(f32_gemm_5x8s4__wasmrelaxedsimd_fma)
+ BENCHMARK_GEMM(f32_gemm_6x8s4__wasmrelaxedsimd_fma)
+#endif // XNN_ARCH_WASMRELAXEDSIMD
+
+
#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
static void f32_gemm_3x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, 3, 8, 1, 1,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_4x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, 4, 8, 1, 1,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_5x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, 5, 8, 1, 1,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_6x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, 6, 8, 1, 1,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_3x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, 3, 8, 1, 1,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_4x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, 4, 8, 1, 1,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_5x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, 5, 8, 1, 1,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_6x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, 6, 8, 1, 1,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_3x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat, 3, 8, 1, 1,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_4x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_splat, 4, 8, 1, 1,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_5x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat, 5, 8, 1, 1,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_6x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat, 6, 8, 1, 1,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_3x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat, 3, 8, 1, 1,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_4x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_splat, 4, 8, 1, 1,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_5x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat, 5, 8, 1, 1,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_6x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat, 6, 8, 1, 1,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_3x8s4__wasmsimd_arm(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm, 3, 8, 1, 4,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_4x8s4__wasmsimd_arm(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_arm, 4, 8, 1, 4,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_5x8s4__wasmsimd_arm(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm, 5, 8, 1, 4,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_6x8s4__wasmsimd_arm(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm, 6, 8, 1, 4,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_3x8s4__wasmsimd_x86(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86, 3, 8, 1, 4,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_4x8s4__wasmsimd_x86(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_x86, 4, 8, 1, 4,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_5x8s4__wasmsimd_x86(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86, 5, 8, 1, 4,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_gemm_6x8s4__wasmsimd_x86(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86, 6, 8, 1, 4,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_ppmm_4x8_unipass__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_x32_packx_ukernel_4x__wasmsimd, 4, 8,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_ppmm_4x8_unipass__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
PPMM1PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_x32_packx_ukernel_4x__wasmsimd, 4, 8,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_ppmm_4x8_twopass__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_x32_packx_ukernel_4x__wasmsimd, 4, 8,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
static void f32_ppmm_4x8_twopass__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
PPMM2PBenchmark(state, xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_x32_packx_ukernel_4x__wasmsimd, 4, 8,
- xnn_init_f32_minmax_scalar_params);
+ xnn_init_f32_minmax_wasmsimd_params);
}
BENCHMARK_GEMM(f32_gemm_3x8__wasmsimd_arm_loadsplat)
BENCHMARK_GEMM(f32_gemm_4x8__wasmsimd_arm_loadsplat)
BENCHMARK_GEMM(f32_gemm_5x8__wasmsimd_arm_loadsplat)
BENCHMARK_GEMM(f32_gemm_6x8__wasmsimd_arm_loadsplat)
+
BENCHMARK_GEMM(f32_gemm_3x8__wasmsimd_x86_loadsplat)
BENCHMARK_GEMM(f32_gemm_4x8__wasmsimd_x86_loadsplat)
BENCHMARK_GEMM(f32_gemm_5x8__wasmsimd_x86_loadsplat)
BENCHMARK_GEMM(f32_gemm_6x8__wasmsimd_x86_loadsplat)
+
BENCHMARK_GEMM(f32_gemm_3x8__wasmsimd_arm_splat)
BENCHMARK_GEMM(f32_gemm_4x8__wasmsimd_arm_splat)
BENCHMARK_GEMM(f32_gemm_5x8__wasmsimd_arm_splat)
BENCHMARK_GEMM(f32_gemm_6x8__wasmsimd_arm_splat)
+
BENCHMARK_GEMM(f32_gemm_3x8__wasmsimd_x86_splat)
BENCHMARK_GEMM(f32_gemm_4x8__wasmsimd_x86_splat)
BENCHMARK_GEMM(f32_gemm_5x8__wasmsimd_x86_splat)
BENCHMARK_GEMM(f32_gemm_6x8__wasmsimd_x86_splat)
+
BENCHMARK_GEMM(f32_gemm_3x8s4__wasmsimd_arm)
BENCHMARK_GEMM(f32_gemm_4x8s4__wasmsimd_arm)
BENCHMARK_GEMM(f32_gemm_5x8s4__wasmsimd_arm)
BENCHMARK_GEMM(f32_gemm_6x8s4__wasmsimd_arm)
+
BENCHMARK_GEMM(f32_gemm_3x8s4__wasmsimd_x86)
BENCHMARK_GEMM(f32_gemm_4x8s4__wasmsimd_x86)
BENCHMARK_GEMM(f32_gemm_5x8s4__wasmsimd_x86)