Commit c619f6cb authored by Yaowu Xu's avatar Yaowu Xu Committed by Code Review
Browse files

Merge "fixed an overflow in ssim calculation"

parents e1a8b6c8 d4cdb683
Branches
Tags
No related merge requests found
Showing with 65 additions and 38 deletions
......@@ -290,8 +290,8 @@ void ssim_parms_8x8_c
}
}
const static long long c1 = 426148; // (256^2*(.01*255)^2
const static long long c2 = 3835331; //(256^2*(.03*255)^2
const static long long cc1 = 26634; // (64^2*(.01*255)^2
const static long long cc2 = 239708; // (64^2*(.03*255)^2
static double similarity
(
......@@ -303,10 +303,19 @@ static double similarity
int count
)
{
long long ssim_n = (2*sum_s*sum_r+ c1)*(2*count*sum_sxr-2*sum_s*sum_r+c2);
long long ssim_n, ssim_d;
long long c1, c2;
long long ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
(count*sum_sq_s-sum_s*sum_s + count*sum_sq_r-sum_r*sum_r +c2) ;
//scale the constants by number of pixels
c1 = (cc1*count*count)>>12;
c2 = (cc2*count*count)>>12;
ssim_n = (2*sum_s*sum_r+ c1)*((long long) 2*count*sum_sxr-
(long long) 2*sum_s*sum_r+c2);
ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
((long long)count*sum_sq_s-(long long)sum_s*sum_s +
(long long)count*sum_sq_r-(long long) sum_r*sum_r +c2) ;
return ssim_n * 1.0 / ssim_d;
}
......@@ -332,18 +341,33 @@ long dssim(unsigned char *s,int sp, unsigned char *r,int rp,
const vp8_variance_rtcd_vtable_t *rtcd)
{
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
double ssim3;
long long ssim_n;
long long ssim_d;
long long ssim3;
long long ssim_n,ssim_n1,ssim_n2;
long long ssim_d,ssim_d1,ssim_d2;
long long ssim_t1,ssim_t2;
long long c1, c2;
// normalize by 256/64
c1 = cc1*16;
c2 = cc2*16;
rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
ssim_n = (2*sum_s*sum_r+ c1)*(2*256*sum_sxr-2*sum_s*sum_r+c2);
ssim_n1 = (2*sum_s*sum_r+ c1);
ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
(256*sum_sq_s-sum_s*sum_s + 256*sum_sq_r-sum_r*sum_r +c2) ;
ssim_n2 =((long long) 2*256*sum_sxr-(long long) 2*sum_s*sum_r+c2);
ssim3 = 256 * (ssim_d-ssim_n) / ssim_d;
return (long)( 256*ssim3 * ssim3 );
ssim_d1 =((long long)sum_s*sum_s +(long long)sum_r*sum_r+c1);
ssim_d2 = (256 * (long long) sum_sq_s-(long long) sum_s*sum_s +
(long long) 256*sum_sq_r-(long long) sum_r*sum_r +c2) ;
ssim_t1 = 256 - 256 * ssim_n1 / ssim_d1;
ssim_t2 = 256 - 256 * ssim_n2 / ssim_d2;
ssim3 = 256 *ssim_t1 * ssim_t2;
if(ssim3 <0 )
ssim3=0;
return (long)( ssim3 );
}
// TODO: (jbb) this 8x8 window might be too big + we may want to pick pixels
// such that the window regions overlap block boundaries to penalize blocking
......@@ -361,18 +385,20 @@ double vp8_ssim2
)
{
int i,j;
int samples =0;
double ssim_total=0;
// we can sample points as frequently as we like start with 1 per 8x8
for(i=0; i < height; i+=8, img1 += stride_img1*8, img2 += stride_img2*8)
// we can sample points as frequently as we like start with 1 per 4x4
for(i=0; i < height-8; i+=4, img1 += stride_img1*4, img2 += stride_img2*4)
{
for(j=0; j < width; j+=8 )
for(j=0; j < width-8; j+=4 )
{
ssim_total += ssim_8x8(img1, stride_img1, img2, stride_img2, rtcd);
double v = ssim_8x8(img1+j, stride_img1, img2+j, stride_img2, rtcd);
ssim_total += v;
samples++;
}
}
ssim_total /= (width/8 * height /8);
ssim_total /= samples;
return ssim_total;
}
......@@ -405,4 +431,4 @@ double vp8_calc_ssim
*weight = 1;
return ssimv;
}
}
\ No newline at end of file
......@@ -16,12 +16,12 @@
paddusw xmm14, xmm4 ; sum_r
movdqa xmm1, xmm3
pmaddwd xmm1, xmm1
paddq xmm13, xmm1 ; sum_sq_s
paddd xmm13, xmm1 ; sum_sq_s
movdqa xmm2, xmm4
pmaddwd xmm2, xmm2
paddq xmm12, xmm2 ; sum_sq_r
paddd xmm12, xmm2 ; sum_sq_r
pmaddwd xmm3, xmm4
paddq xmm11, xmm3 ; sum_sxr
paddd xmm11, xmm3 ; sum_sxr
%endmacro
; Sum across the register %1 starting with q words
......@@ -66,6 +66,7 @@ sym(vp8_ssim_parms_16x16_sse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 9
SAVE_XMM
push rsi
push rdi
; end prolog
......@@ -115,19 +116,20 @@ NextRow:
SUM_ACROSS_Q xmm11
mov rdi,arg(4)
movq [rdi], xmm15;
movd [rdi], xmm15;
mov rdi,arg(5)
movq [rdi], xmm14;
movd [rdi], xmm14;
mov rdi,arg(6)
movq [rdi], xmm13;
movd [rdi], xmm13;
mov rdi,arg(7)
movq [rdi], xmm12;
movd [rdi], xmm12;
mov rdi,arg(8)
movq [rdi], xmm11;
movd [rdi], xmm11;
; begin epilog
pop rdi
pop rsi
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
......@@ -154,6 +156,7 @@ sym(vp8_ssim_parms_8x8_sse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 9
SAVE_XMM
push rsi
push rdi
; end prolog
......@@ -174,11 +177,8 @@ sym(vp8_ssim_parms_8x8_sse3):
NextRow2:
;grab source and reference pixels
movq xmm5, [rsi]
movq xmm6, [rdi]
movdqa xmm3, xmm5
movdqa xmm4, xmm6
movq xmm3, [rsi]
movq xmm4, [rdi]
punpcklbw xmm3, xmm0 ; low_s
punpcklbw xmm4, xmm0 ; low_r
......@@ -197,19 +197,20 @@ NextRow2:
SUM_ACROSS_Q xmm11
mov rdi,arg(4)
movq [rdi], xmm15;
movd [rdi], xmm15;
mov rdi,arg(5)
movq [rdi], xmm14;
movd [rdi], xmm14;
mov rdi,arg(6)
movq [rdi], xmm13;
movd [rdi], xmm13;
mov rdi,arg(7)
movq [rdi], xmm12;
movd [rdi], xmm12;
mov rdi,arg(8)
movq [rdi], xmm11;
movd [rdi], xmm11;
; begin epilog
pop rdi
pop rsi
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment