Speedups for empty histograms.
When histograms are empty, it is easy to add them.
They should also not be considered when merging histograms
(it is a waste of CPU).
This does not change the compression performance,
just the speed.
Change-Id: I42c721ca0f9c5ea067e73b792aa3db6d5e71d01f
diff --git a/src/dsp/lossless_enc.c b/src/dsp/lossless_enc.c
index 6ec9e46..1408fbf 100644
--- a/src/dsp/lossless_enc.c
+++ b/src/dsp/lossless_enc.c
@@ -643,25 +643,56 @@
for (i = 0; i < size; ++i) out[i] += a[i];
}
+#define ADD(X, ARG, LEN) do { \
+ if (a->is_used_[X]) { \
+ if (b->is_used_[X]) { \
+ VP8LAddVector(a->ARG, b->ARG, out->ARG, (LEN)); \
+ } else { \
+ memcpy(&out->ARG[0], &a->ARG[0], (LEN) * sizeof(out->ARG[0])); \
+ } \
+ } else if (b->is_used_[X]) { \
+ memcpy(&out->ARG[0], &b->ARG[0], (LEN) * sizeof(out->ARG[0])); \
+ } else { \
+ memset(&out->ARG[0], 0, (LEN) * sizeof(out->ARG[0])); \
+ } \
+} while (0)
+
+#define ADD_EQ(X, ARG, LEN) do { \
+ if (a->is_used_[X]) { \
+ if (out->is_used_[X]) { \
+ VP8LAddVectorEq(a->ARG, out->ARG, (LEN)); \
+ } else { \
+ memcpy(&out->ARG[0], &a->ARG[0], (LEN) * sizeof(out->ARG[0])); \
+ } \
+ } \
+} while (0)
+
void VP8LHistogramAdd(const VP8LHistogram* const a,
const VP8LHistogram* const b, VP8LHistogram* const out) {
+ int i;
const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
assert(a->palette_code_bits_ == b->palette_code_bits_);
+
if (b != out) {
- VP8LAddVector(a->literal_, b->literal_, out->literal_, literal_size);
- VP8LAddVector(a->distance_, b->distance_, out->distance_,
- NUM_DISTANCE_CODES);
- VP8LAddVector(a->red_, b->red_, out->red_, NUM_LITERAL_CODES);
- VP8LAddVector(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES);
- VP8LAddVector(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES);
+ ADD(0, literal_, literal_size);
+ ADD(1, red_, NUM_LITERAL_CODES);
+ ADD(2, blue_, NUM_LITERAL_CODES);
+ ADD(3, alpha_, NUM_LITERAL_CODES);
+ ADD(4, distance_, NUM_DISTANCE_CODES);
+ for (i = 0; i < 5; ++i) {
+ out->is_used_[i] = (a->is_used_[i] | b->is_used_[i]);
+ }
} else {
- VP8LAddVectorEq(a->literal_, out->literal_, literal_size);
- VP8LAddVectorEq(a->distance_, out->distance_, NUM_DISTANCE_CODES);
- VP8LAddVectorEq(a->red_, out->red_, NUM_LITERAL_CODES);
- VP8LAddVectorEq(a->blue_, out->blue_, NUM_LITERAL_CODES);
- VP8LAddVectorEq(a->alpha_, out->alpha_, NUM_LITERAL_CODES);
+ ADD_EQ(0, literal_, literal_size);
+ ADD_EQ(1, red_, NUM_LITERAL_CODES);
+ ADD_EQ(2, blue_, NUM_LITERAL_CODES);
+ ADD_EQ(3, alpha_, NUM_LITERAL_CODES);
+ ADD_EQ(4, distance_, NUM_DISTANCE_CODES);
+ for (i = 0; i < 5; ++i) out->is_used_[i] |= a->is_used_[i];
}
}
+#undef ADD
+#undef ADD_EQ
//------------------------------------------------------------------------------
// Image transforms.
diff --git a/src/enc/histogram_enc.c b/src/enc/histogram_enc.c
index 9fdbc62..c988b4d 100644
--- a/src/enc/histogram_enc.c
+++ b/src/enc/histogram_enc.c
@@ -51,10 +51,12 @@
VP8LHistogram* const dst) {
uint32_t* const dst_literal = dst->literal_;
const int dst_cache_bits = dst->palette_code_bits_;
+ const int literal_size = VP8LHistogramNumCodes(dst_cache_bits);
const int histo_size = VP8LGetHistogramSize(dst_cache_bits);
assert(src->palette_code_bits_ == dst_cache_bits);
memcpy(dst, src, histo_size);
dst->literal_ = dst_literal;
+ memcpy(dst->literal_, src->literal_, literal_size * sizeof(*dst->literal_));
}
int VP8LGetHistogramSize(int cache_bits) {
@@ -237,7 +239,8 @@
// Get the symbol entropy for the distribution 'population'.
// Set 'trivial_sym', if there's only one symbol present in the distribution.
static double PopulationCost(const uint32_t* const population, int length,
- uint32_t* const trivial_sym) {
+ uint32_t* const trivial_sym,
+ uint8_t* const is_used) {
VP8LBitEntropy bit_entropy;
VP8LStreaks stats;
VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats);
@@ -245,6 +248,8 @@
*trivial_sym = (bit_entropy.nonzeros == 1) ? bit_entropy.nonzero_code
: VP8L_NON_TRIVIAL_SYM;
}
+ // The histogram is used if there is at least one non-zero streak.
+ *is_used = (stats.streaks[1][0] != 0 || stats.streaks[1][1] != 0);
return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
}
@@ -253,7 +258,9 @@
// non-zero: both the zero-th one, or both the last one.
static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
const uint32_t* const Y,
- int length, int trivial_at_end) {
+ int length, int is_X_used,
+ int is_Y_used,
+ int trivial_at_end) {
VP8LStreaks stats;
if (trivial_at_end) {
// This configuration is due to palettization that transforms an indexed
@@ -262,28 +269,43 @@
// Only FinalHuffmanCost needs to be evaluated.
memset(&stats, 0, sizeof(stats));
// Deal with the non-zero value at index 0 or length-1.
- stats.streaks[1][0] += 1;
+ stats.streaks[1][0] = 1;
// Deal with the following/previous zero streak.
- stats.counts[0] += 1;
- stats.streaks[0][1] += length - 1;
+ stats.counts[0] = 1;
+ stats.streaks[0][1] = length - 1;
return FinalHuffmanCost(&stats);
} else {
VP8LBitEntropy bit_entropy;
- VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats);
+ if (is_X_used) {
+ if (is_Y_used) {
+ VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats);
+ } else {
+ VP8LGetEntropyUnrefined(X, length, &bit_entropy, &stats);
+ }
+ } else {
+ if (is_Y_used) {
+ VP8LGetEntropyUnrefined(Y, length, &bit_entropy, &stats);
+ } else {
+ memset(&stats, 0, sizeof(stats));
+ stats.counts[0] = 1;
+ stats.streaks[0][length > 3] = length;
+ VP8LBitEntropyInit(&bit_entropy);
+ }
+ }
return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
}
}
// Estimates the Entropy + Huffman + other block overhead size cost.
-double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
+double VP8LHistogramEstimateBits(VP8LHistogram* const p) {
return
- PopulationCost(
- p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL)
- + PopulationCost(p->red_, NUM_LITERAL_CODES, NULL)
- + PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL)
- + PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL)
- + PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL)
+ PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_),
+ NULL, &p->is_used_[0])
+ + PopulationCost(p->red_, NUM_LITERAL_CODES, NULL, &p->is_used_[1])
+ + PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL, &p->is_used_[2])
+ + PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL, &p->is_used_[3])
+ + PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL, &p->is_used_[4])
+ VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
+ VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
}
@@ -299,7 +321,8 @@
int trivial_at_end = 0;
assert(a->palette_code_bits_ == b->palette_code_bits_);
*cost += GetCombinedEntropy(a->literal_, b->literal_,
- VP8LHistogramNumCodes(palette_code_bits), 0);
+ VP8LHistogramNumCodes(palette_code_bits),
+ a->is_used_[0], b->is_used_[0], 0);
*cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
b->literal_ + NUM_LITERAL_CODES,
NUM_LENGTH_CODES);
@@ -319,19 +342,23 @@
}
*cost +=
- GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES, trivial_at_end);
+ GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES, a->is_used_[1],
+ b->is_used_[1], trivial_at_end);
if (*cost > cost_threshold) return 0;
*cost +=
- GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES, trivial_at_end);
- if (*cost > cost_threshold) return 0;
-
- *cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES,
- trivial_at_end);
+ GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES, a->is_used_[2],
+ b->is_used_[2], trivial_at_end);
if (*cost > cost_threshold) return 0;
*cost +=
- GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES, 0);
+ GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES,
+ a->is_used_[3], b->is_used_[3], trivial_at_end);
+ if (*cost > cost_threshold) return 0;
+
+ *cost +=
+ GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES,
+ a->is_used_[4], b->is_used_[4], 0);
*cost +=
VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES);
if (*cost > cost_threshold) return 0;
@@ -419,16 +446,19 @@
static void UpdateHistogramCost(VP8LHistogram* const h) {
uint32_t alpha_sym, red_sym, blue_sym;
const double alpha_cost =
- PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym);
+ PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym,
+ &h->is_used_[3]);
const double distance_cost =
- PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) +
+ PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL, &h->is_used_[4]) +
VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
- h->literal_cost_ = PopulationCost(h->literal_, num_codes, NULL) +
- VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES,
- NUM_LENGTH_CODES);
- h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym);
- h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym);
+ h->literal_cost_ =
+ PopulationCost(h->literal_, num_codes, NULL, &h->is_used_[0]) +
+ VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES);
+ h->red_cost_ =
+ PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym, &h->is_used_[1]);
+ h->blue_cost_ =
+ PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym, &h->is_used_[2]);
h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +
alpha_cost + distance_cost;
if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) {
@@ -493,11 +523,19 @@
const int histo_size = orig_histo->size;
VP8LHistogram** const orig_histograms = orig_histo->histograms;
VP8LHistogram** const histograms = image_histo->histograms;
+ image_histo->size = 0;
for (i = 0; i < histo_size; ++i) {
VP8LHistogram* const histo = orig_histograms[i];
UpdateHistogramCost(histo);
+
+ // Skip the histogram if it is completely empty, which can happen for tiles
+ // with no information (when they are skipped because of LZ77).
+ if (!histo->is_used_[0] && !histo->is_used_[1] && !histo->is_used_[2]
+ && !histo->is_used_[3] && !histo->is_used_[4]) {
+ continue;
+ }
// Copy histograms from orig_histo[] to image_histo[].
- HistogramCopy(histo, histograms[i]);
+ HistogramCopy(histo, histograms[image_histo->size++]);
}
}
@@ -987,8 +1025,7 @@
// histograms of small sizes (as bin_map will be very sparse) and
// maximum quality q==100 (to preserve the compression gains at that level).
const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE;
- const int entropy_combine =
- (orig_histo->size > entropy_combine_num_bins * 2) && (quality < 100);
+ int entropy_combine;
if (orig_histo == NULL) goto Error;
@@ -996,15 +1033,16 @@
HistogramBuild(xsize, histo_bits, refs, orig_histo);
// Copies the histograms and computes its bit_cost.
HistogramCopyAndAnalyze(orig_histo, image_histo);
-
+ entropy_combine =
+ (image_histo->size > entropy_combine_num_bins * 2) && (quality < 100);
if (entropy_combine) {
- const int bin_map_size = orig_histo->size;
+ const int bin_map_size = image_histo->size;
// Reuse histogram_symbols storage. By definition, it's guaranteed to be ok.
uint16_t* const bin_map = histogram_symbols;
const double combine_cost_factor =
GetCombineCostFactor(image_histo_raw_size, quality);
- HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort);
+ HistogramAnalyzeEntropyBin(image_histo, bin_map, low_effort);
// Collapse histograms with similar entropy.
HistogramCombineEntropyBin(image_histo, tmp_histo, bin_map, bin_map_size,
entropy_combine_num_bins, combine_cost_factor,
diff --git a/src/enc/histogram_enc.h b/src/enc/histogram_enc.h
index e8c4c83..67d8253 100644
--- a/src/enc/histogram_enc.h
+++ b/src/enc/histogram_enc.h
@@ -44,6 +44,7 @@
double literal_cost_; // Cached values of dominant entropy costs:
double red_cost_; // literal, red & blue.
double blue_cost_;
+ uint8_t is_used_[5]; // 5 for literal, red, blue, alpha, distance
} VP8LHistogram;
// Collection of histograms with fixed capacity, allocated as one
@@ -113,7 +114,7 @@
// Estimate how many bits the combined entropy of literals and distance
// approximately maps to.
-double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
+double VP8LHistogramEstimateBits(VP8LHistogram* const p);
#ifdef __cplusplus
}