up to 6% faster encoding with clang compiler
mostly by revamping the main loop of GetResidualCost() and avoiding some branches
Change-Id: Ib05763e18a6bf46c82dc3d5d1d8eb65e99474207
diff --git a/src/enc/frame.c b/src/enc/frame.c
index 4624d91..2963ef6 100644
--- a/src/enc/frame.c
+++ b/src/enc/frame.c
@@ -292,31 +292,20 @@
if (res->last < 0) {
return VP8BitCost(0, p0);
}
- cost = 0;
- while (n < res->last) {
- int v = res->coeffs[n];
+ cost = VP8BitCost(1, p0);
+ for (; n < res->last; ++n) {
+ const int v = abs(res->coeffs[n]);
const int b = VP8EncBands[n + 1];
- ++n;
- if (v == 0) {
- // short-case for VP8LevelCost(t, 0) (note: VP8LevelFixedCosts[0] == 0):
- cost += t[0];
- t = res->cost[b][0];
- continue;
- }
- v = abs(v);
- cost += VP8BitCost(1, p0);
+ const int ctx = (v >= 2) ? 2 : v;
cost += VP8LevelCost(t, v);
- {
- const int ctx = (v == 1) ? 1 : 2;
- p0 = res->prob[b][ctx][0];
- t = res->cost[b][ctx];
- }
+ t = res->cost[b][ctx];
+ // the masking trick is faster than "if (v) cost += ..." with clang
+ cost += (v ? ~0U : 0) & VP8BitCost(1, res->prob[b][ctx][0]);
}
// Last coefficient is always non-zero
{
const int v = abs(res->coeffs[n]);
assert(v != 0);
- cost += VP8BitCost(1, p0);
cost += VP8LevelCost(t, v);
if (n < 15) {
const int b = VP8EncBands[n + 1];