From 094e2572df23fd849688c2f95380b184c071efec Mon Sep 17 00:00:00 2001
From: Christian Duvivier <cduvivier@google.com>
Date: Mon, 11 Feb 2013 15:34:08 -0800
Subject: [PATCH] Faster convolve8_avg.

Implement convolve8_avg using common functions which are already optimized
instead of using more obscure ones which have only C versions. Encoder
overall speed-up of about 12%.

Change-Id: I8c57aa76936c8a48f22b115f19f61d9f2ae1e4b6
---
 vp9/common/vp9_convolve.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c
index f21f1d84e8..b87c410df3 100644
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -7,12 +7,15 @@
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
+#include "vp9/common/vp9_convolve.h"
+
 #include <assert.h>
 
 #include "./vpx_config.h"
 #include "./vp9_rtcd.h"
 #include "vp9/common/vp9_common.h"
 #include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
 
 #define VP9_FILTER_WEIGHT 128
 #define VP9_FILTER_SHIFT  7
@@ -293,9 +296,21 @@ void vp9_convolve8_avg_c(const uint8_t *src, int src_stride,
                          const int16_t *filter_x, int x_step_q4,
                          const int16_t *filter_y, int y_step_q4,
                          int w, int h) {
-  convolve_avg_c(src, src_stride, dst, dst_stride,
-                 filter_x, x_step_q4, filter_y, y_step_q4,
-                 w, h, 8);
+  /* Fixed size intermediate buffer places limits on parameters. */
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
+  assert(w <= 16);
+  assert(h <= 16);
+
+  vp9_convolve8(src, src_stride,
+                temp, 16,
+                filter_x, x_step_q4,
+                filter_y, y_step_q4,
+                w, h);
+  vp9_convolve_avg(temp, 16,
+                   dst, dst_stride,
+                   NULL, 0, /* These unused parameter should be removed! */
+                   NULL, 0, /* These unused parameter should be removed! */
+                   w, h);
 }
 
 void vp9_convolve_copy(const uint8_t *src, int src_stride,
-- 
GitLab