blob: 43aa085ab543bbe8c28cddd375fd77e8577d6443 [file] [log] [blame]
// Copyright 2019 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
$assert CHANNEL_TILE >= 1
$assert PIXEL_TILE == 1
$ABC = "0123456789ABCDEFGHIJKLMN"
#include <assert.h>
#include <xnnpack/bilinear.h>
void xnn_f32_bilinear_ukernel__scalar_c${CHANNEL_TILE}${"" if PIXEL_TILE == 1 else "x%d" % PIXEL_TILE}(
size_t output_pixels,
size_t channels,
const float**restrict input,
size_t input_offset,
const float*restrict weights,
float*restrict output,
size_t output_increment)
{
assert(output_pixels != 0);
assert(channels != 0);
assert(channels % sizeof(float) == 0);
do {
const float* i0 = (const float*) ((uintptr_t) input[0] + input_offset);
const float* i1 = (const float*) ((uintptr_t) input[1] + input_offset);
const float* i2 = (const float*) ((uintptr_t) input[2] + input_offset);
const float* i3 = (const float*) ((uintptr_t) input[3] + input_offset);
input += 4;
const float valphah = weights[0];
const float valphav = weights[1];
weights += 2;
size_t c = channels;
$if CHANNEL_TILE > 1:
for (; c >= ${CHANNEL_TILE} * sizeof(float); c -= ${CHANNEL_TILE} * sizeof(float)) {
$for C in range(CHANNEL_TILE):
const float vtl${ABC[C]} = i0[${C}];
const float vtr${ABC[C]} = i1[${C}];
const float vbl${ABC[C]} = i2[${C}];
const float vbr${ABC[C]} = i3[${C}];
i0 += ${CHANNEL_TILE};
i1 += ${CHANNEL_TILE};
i2 += ${CHANNEL_TILE};
i3 += ${CHANNEL_TILE};
$for C in range(CHANNEL_TILE):
const float vtd${ABC[C]} = vtr${ABC[C]} - vtl${ABC[C]};
const float vbd${ABC[C]} = vbr${ABC[C]} - vbl${ABC[C]};
$for C in range(CHANNEL_TILE):
const float vt${ABC[C]} = vtl${ABC[C]} + vtd${ABC[C]} * valphah;
const float vb${ABC[C]} = vbl${ABC[C]} + vbd${ABC[C]} * valphah;
$for C in range(CHANNEL_TILE):
const float vd${ABC[C]} = vb${ABC[C]} - vt${ABC[C]};
$for C in range(CHANNEL_TILE):
const float vo${ABC[C]} = vt${ABC[C]} + vd${ABC[C]} * valphav;
$for C in range(CHANNEL_TILE):
output[${C}] = vo${ABC[C]};
output += ${CHANNEL_TILE};
}
for (; c >= sizeof(float); c -= sizeof(float)) {
const float vtl = *i0++;
const float vtr = *i1++;
const float vbl = *i2++;
const float vbr = *i3++;
const float vtd = vtr - vtl;
const float vbd = vbr - vbl;
const float vt = vtl + vtd * valphah;
const float vb = vbl + vbd * valphah;
const float vd = vb - vt;
const float vo = vt + vd * valphav;
*output++ = vo;
}
$else:
do {
const float vtl = *i0++;
const float vtr = *i1++;
const float vbl = *i2++;
const float vbr = *i3++;
const float vtd = vtr - vtl;
const float vbd = vbr - vbl;
const float vt = vtl + vtd * valphah;
const float vb = vbl + vbd * valphah;
const float vd = vb - vt;
const float vo = vt + vd * valphav;
*output++ = vo;
c -= sizeof(float);
} while (c != 0);
output = (float*) ((uintptr_t) output + output_increment);
} while (--output_pixels != 0);
}