blob: b646ffe542655e94db525a8cd868c71a24ce09b2 [file] [log] [blame]
A8 over A8
3 registers, 15 instructions:
r0 = load8 arg(0)
r1 = splat 3B808081 (0.0039215689)
r0 = to_f32 r0
r0 = mul_f32 r1 r0
r2 = load8 arg(1)
r2 = to_f32 r2
r2 = mul_f32 r1 r2
r1 = splat 3F800000 (1)
r1 = sub_f32 r1 r0
r1 = mad_f32 r2 r1 r0
r2 = splat 437F0000 (255)
r0 = splat 3F000000 (0.5)
r0 = mad_f32 r1 r2 r0
r0 = to_i32 r0
store8 arg(1) r0
A8 over G8
4 registers, 21 instructions:
r0 = load8 arg(0)
r1 = splat 3B808081 (0.0039215689)
r0 = to_f32 r0
r0 = mul_f32 r1 r0
r2 = load8 arg(1)
r2 = to_f32 r2
r2 = mul_f32 r1 r2
r1 = splat 3F800000 (1)
r1 = sub_f32 r1 r0
r1 = mul_f32 r2 r1
r2 = splat 3E59B3D0 (0.21259999)
r0 = splat 3F371759 (0.71520001)
r3 = splat 3D93DD98 (0.0722)
r3 = mul_f32 r1 r3
r3 = mad_f32 r1 r0 r3
r3 = mad_f32 r1 r2 r3
r2 = splat 437F0000 (255)
r1 = splat 3F000000 (0.5)
r1 = mad_f32 r3 r2 r1
r1 = to_i32 r1
store8 arg(1) r1
A8 over RGBA_8888
6 registers, 37 instructions:
r0 = load8 arg(0)
r1 = splat 3B808081 (0.0039215689)
r0 = to_f32 r0
r0 = mul_f32 r1 r0
r2 = load32 arg(1)
r3 = extract r2 FF
r3 = to_f32 r3
r3 = mul_f32 r1 r3
r4 = extract r2 FF00
r4 = to_f32 r4
r4 = mul_f32 r1 r4
r5 = extract r2 FF0000
r5 = to_f32 r5
r5 = mul_f32 r1 r5
r2 = shr r2 24
r2 = to_f32 r2
r2 = mul_f32 r1 r2
r1 = splat 3F800000 (1)
r1 = sub_f32 r1 r0
r3 = mul_f32 r3 r1
r4 = mul_f32 r4 r1
r5 = mul_f32 r5 r1
r1 = mad_f32 r2 r1 r0
r2 = splat 437F0000 (255)
r0 = splat 3F000000 (0.5)
r3 = mad_f32 r3 r2 r0
r3 = to_i32 r3
r4 = mad_f32 r4 r2 r0
r4 = to_i32 r4
r5 = mad_f32 r5 r2 r0
r5 = to_i32 r5
r0 = mad_f32 r1 r2 r0
r0 = to_i32 r0
r4 = pack r3 r4 8
r0 = pack r5 r0 8
r0 = pack r4 r0 16
store32 arg(1) r0
G8 over A8
3 registers, 12 instructions:
r0 = splat 3B808081 (0.0039215689)
r1 = splat 3F800000 (1)
r2 = load8 arg(1)
r2 = to_f32 r2
r2 = mul_f32 r0 r2
r0 = sub_f32 r1 r1
r0 = mad_f32 r2 r0 r1
r2 = splat 437F0000 (255)
r1 = splat 3F000000 (0.5)
r1 = mad_f32 r0 r2 r1
r1 = to_i32 r1
store8 arg(1) r1
G8 over G8
4 registers, 21 instructions:
r0 = load8 arg(0)
r1 = splat 3B808081 (0.0039215689)
r0 = to_f32 r0
r0 = mul_f32 r1 r0
r2 = splat 3F800000 (1)
r3 = load8 arg(1)
r3 = to_f32 r3
r3 = mul_f32 r1 r3
r2 = sub_f32 r2 r2
r2 = mad_f32 r3 r2 r0
r3 = splat 3E59B3D0 (0.21259999)
r0 = splat 3F371759 (0.71520001)
r1 = splat 3D93DD98 (0.0722)
r1 = mul_f32 r2 r1
r1 = mad_f32 r2 r0 r1
r1 = mad_f32 r2 r3 r1
r3 = splat 437F0000 (255)
r2 = splat 3F000000 (0.5)
r2 = mad_f32 r1 r3 r2
r2 = to_i32 r2
store8 arg(1) r2
G8 over RGBA_8888
7 registers, 37 instructions:
r0 = load8 arg(0)
r1 = splat 3B808081 (0.0039215689)
r0 = to_f32 r0
r0 = mul_f32 r1 r0
r2 = splat 3F800000 (1)
r3 = load32 arg(1)
r4 = extract r3 FF
r4 = to_f32 r4
r4 = mul_f32 r1 r4
r5 = extract r3 FF00
r5 = to_f32 r5
r5 = mul_f32 r1 r5
r6 = extract r3 FF0000
r6 = to_f32 r6
r6 = mul_f32 r1 r6
r3 = shr r3 24
r3 = to_f32 r3
r3 = mul_f32 r1 r3
r1 = sub_f32 r2 r2
r4 = mad_f32 r4 r1 r0
r5 = mad_f32 r5 r1 r0
r6 = mad_f32 r6 r1 r0
r1 = mad_f32 r3 r1 r2
r3 = splat 437F0000 (255)
r2 = splat 3F000000 (0.5)
r4 = mad_f32 r4 r3 r2
r4 = to_i32 r4
r5 = mad_f32 r5 r3 r2
r5 = to_i32 r5
r6 = mad_f32 r6 r3 r2
r6 = to_i32 r6
r2 = mad_f32 r1 r3 r2
r2 = to_i32 r2
r5 = pack r4 r5 8
r2 = pack r6 r2 8
r2 = pack r5 r2 16
store32 arg(1) r2
RGBA_8888 over A8
3 registers, 16 instructions:
r0 = load32 arg(0)
r1 = splat 3B808081 (0.0039215689)
r0 = shr r0 24
r0 = to_f32 r0
r0 = mul_f32 r1 r0
r2 = load8 arg(1)
r2 = to_f32 r2
r2 = mul_f32 r1 r2
r1 = splat 3F800000 (1)
r1 = sub_f32 r1 r0
r1 = mad_f32 r2 r1 r0
r2 = splat 437F0000 (255)
r0 = splat 3F000000 (0.5)
r0 = mad_f32 r1 r2 r0
r0 = to_i32 r0
store8 arg(1) r0
RGBA_8888 over G8
6 registers, 33 instructions:
r0 = load32 arg(0)
r1 = extract r0 FF
r2 = splat 3B808081 (0.0039215689)
r1 = to_f32 r1
r1 = mul_f32 r2 r1
r3 = extract r0 FF00
r3 = to_f32 r3
r3 = mul_f32 r2 r3
r4 = extract r0 FF0000
r4 = to_f32 r4
r4 = mul_f32 r2 r4
r0 = shr r0 24
r0 = to_f32 r0
r0 = mul_f32 r2 r0
r5 = load8 arg(1)
r5 = to_f32 r5
r5 = mul_f32 r2 r5
r2 = splat 3F800000 (1)
r2 = sub_f32 r2 r0
r1 = mad_f32 r5 r2 r1
r3 = mad_f32 r5 r2 r3
r2 = mad_f32 r5 r2 r4
r5 = splat 3E59B3D0 (0.21259999)
r4 = splat 3F371759 (0.71520001)
r0 = splat 3D93DD98 (0.0722)
r0 = mul_f32 r2 r0
r0 = mad_f32 r3 r4 r0
r0 = mad_f32 r1 r5 r0
r5 = splat 437F0000 (255)
r1 = splat 3F000000 (0.5)
r1 = mad_f32 r0 r5 r1
r1 = to_i32 r1
store8 arg(1) r1
RGBA_8888 over RGBA_8888
9 registers, 47 instructions:
r0 = load32 arg(0)
r1 = extract r0 FF
r2 = splat 3B808081 (0.0039215689)
r1 = to_f32 r1
r1 = mul_f32 r2 r1
r3 = extract r0 FF00
r3 = to_f32 r3
r3 = mul_f32 r2 r3
r4 = extract r0 FF0000
r4 = to_f32 r4
r4 = mul_f32 r2 r4
r0 = shr r0 24
r0 = to_f32 r0
r0 = mul_f32 r2 r0
r5 = load32 arg(1)
r6 = extract r5 FF
r6 = to_f32 r6
r6 = mul_f32 r2 r6
r7 = extract r5 FF00
r7 = to_f32 r7
r7 = mul_f32 r2 r7
r8 = extract r5 FF0000
r8 = to_f32 r8
r8 = mul_f32 r2 r8
r5 = shr r5 24
r5 = to_f32 r5
r5 = mul_f32 r2 r5
r2 = splat 3F800000 (1)
r2 = sub_f32 r2 r0
r6 = mad_f32 r6 r2 r1
r7 = mad_f32 r7 r2 r3
r8 = mad_f32 r8 r2 r4
r2 = mad_f32 r5 r2 r0
r5 = splat 437F0000 (255)
r0 = splat 3F000000 (0.5)
r6 = mad_f32 r6 r5 r0
r6 = to_i32 r6
r7 = mad_f32 r7 r5 r0
r7 = to_i32 r7
r8 = mad_f32 r8 r5 r0
r8 = to_i32 r8
r0 = mad_f32 r2 r5 r0
r0 = to_i32 r0
r7 = pack r6 r7 8
r0 = pack r8 r0 8
r0 = pack r7 r0 16
store32 arg(1) r0
I32 8888 over 8888
9 registers, 24 instructions:
r0 = load32 arg(0)
r1 = extract r0 FF
r2 = extract r0 FF00
r3 = extract r0 FF0000
r0 = shr r0 24
r4 = load32 arg(1)
r5 = extract r4 FF
r6 = extract r4 FF00
r7 = extract r4 FF0000
r4 = shr r4 24
r8 = splat FF (3.5733111e-43)
r8 = sub_i32 r8 r0
r5 = mul_unorm8 r5 r8
r5 = add_i32 r1 r5
r6 = mul_unorm8 r6 r8
r6 = add_i32 r2 r6
r7 = mul_unorm8 r7 r8
r7 = add_i32 r3 r7
r8 = mul_unorm8 r4 r8
r8 = add_i32 r0 r8
r6 = pack r5 r6 8
r8 = pack r7 r8 8
r8 = pack r6 r8 16
store32 arg(1) r8
I32 (SWAR) 8888 over 8888
6 registers, 20 instructions:
r0 = load32 arg(0)
r1 = extract r0 FF00FF
r0 = extract r0 FF00FF00
r2 = load32 arg(1)
r3 = extract r2 FF00FF
r2 = extract r2 FF00FF00
r4 = splat FF (3.5733111e-43)
r5 = shr r0 16
r5 = sub_i32 r4 r5
r4 = splat FF00FF (2.3418409e-38)
r3 = mul_i32 r3 r5
r3 = add_i32 r3 r4
r3 = extract r3 FF00FF00
r3 = add_i32 r1 r3
r5 = mul_i32 r2 r5
r5 = add_i32 r5 r4
r5 = extract r5 FF00FF00
r5 = add_i32 r0 r5
r5 = pack r3 r5 8
store32 arg(1) r5