add to_fp16/from_fp16
Nothing too tricky here.
Change-Id: I48e51c301e53efc63fc92c378fe45a0e5a2df7e6
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/340520
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
diff --git a/src/core/SkVM.cpp b/src/core/SkVM.cpp
index fed90bb..9bb8867e 100644
--- a/src/core/SkVM.cpp
+++ b/src/core/SkVM.cpp
@@ -2248,6 +2248,9 @@
void Assembler::frintp4s(V d, V n) { this->op(0b0'1'0'01110'1'0'10000'1100'0'10, n,d); }
void Assembler::frintm4s(V d, V n) { this->op(0b0'1'0'01110'0'0'10000'1100'1'10, n,d); }
+ void Assembler::fcvtn(V d, V n) { this->op(0b0'0'0'01110'0'0'10000'10110'10, n,d); }
+ void Assembler::fcvtl(V d, V n) { this->op(0b0'0'0'01110'0'0'10000'10111'10, n,d); }
+
void Assembler::xtns2h(V d, V n) { this->op(0b0'0'0'01110'01'10000'10010'10, n,d); }
void Assembler::xtnh2b(V d, V n) { this->op(0b0'0'0'01110'00'10000'10010'10, n,d); }
@@ -3723,8 +3726,6 @@
#elif defined(__aarch64__)
case Op::store128:
case Op::load128:
- case Op::to_fp16:
- case Op::from_fp16:
return false; // TODO
case Op::assert_true: {
@@ -3918,6 +3919,16 @@
case Op::round: a->fcvtns4s(dst(), r(x)); break;
case Op::ceil: a->frintp4s(dst(), r(x)); break;
case Op::floor: a->frintm4s(dst(), r(x)); break;
+
+ case Op::to_fp16:
+ a->fcvtn (dst(x), r(x)); // 4x f32 -> 4x f16 in bottom four lanes
+ a->uxtlh2s(dst(), dst()); // expand to 4x f16 in even 16-bit lanes
+ break;
+
+ case Op::from_fp16:
+ a->xtns2h(dst(x), r(x)); // pack even 16-bit lanes into bottom four lanes
+ a->fcvtl (dst(), dst()); // 4x f16 -> 4x f32
+ break;
#endif
}
diff --git a/src/core/SkVM.h b/src/core/SkVM.h
index 1dbe956..1e5e5e5 100644
--- a/src/core/SkVM.h
+++ b/src/core/SkVM.h
@@ -315,6 +315,8 @@
fcvtns4s, // round float -> int (nearest even)
frintp4s, // round float -> int as float, toward plus infinity (ceil)
frintm4s, // round float -> int as float, toward minus infinity (floor)
+ fcvtn, // f32 -> f16 in low half
+ fcvtl, // f16 in low half -> f32
xtns2h, // u32 -> u16
xtnh2b, // u16 -> u8
uxtlb2h, // u8 -> u16 (TODO: this is a special case of ushll.8h)
diff --git a/tests/SkVMTest.cpp b/tests/SkVMTest.cpp
index 212e73d..d909010 100644
--- a/tests/SkVMTest.cpp
+++ b/tests/SkVMTest.cpp
@@ -1774,12 +1774,16 @@
a.fcvtns4s(A::v4, A::v3);
a.frintp4s(A::v4, A::v3);
a.frintm4s(A::v4, A::v3);
+ a.fcvtn (A::v4, A::v3);
+ a.fcvtl (A::v4, A::v3);
},{
0x64,0xd8,0x21,0x4e,
0x64,0xb8,0xa1,0x4e,
0x64,0xa8,0x21,0x4e,
0x64,0x88,0xa1,0x4e,
0x64,0x98,0x21,0x4e,
+ 0x64,0x68,0x21,0x0e,
+ 0x64,0x78,0x21,0x0e,
});
test_asm(r, [&](A& a) {