pca.c 6.35 KB
Newer Older
1
/*
2
 * principal component analysis (PCA)
3 4
 * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
 *
5
 * This file is part of Libav.
6
 *
7
 * Libav is free software; you can redistribute it and/or
8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * Libav is distributed in the hope that it will be useful,
13 14 15 16 17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with Libav; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 21 22
 */

/**
23
 * @file
24
 * principal component analysis (PCA)
25 26
 */

Michael Niedermayer's avatar
Michael Niedermayer committed
27
#include "common.h"
28 29
#include "pca.h"

30 31 32 33 34 35 36
typedef struct PCA{
    int count;
    int n;
    double *covariance;
    double *mean;
}PCA;

37 38
PCA *ff_pca_init(int n){
    PCA *pca;
39
    if(n<=0)
40
        return NULL;
41

42
    pca= av_mallocz(sizeof(PCA));
43 44 45 46 47
    pca->n= n;
    pca->count=0;
    pca->covariance= av_mallocz(sizeof(double)*n*n);
    pca->mean= av_mallocz(sizeof(double)*n);

48
    return pca;
49 50 51 52 53
}

void ff_pca_free(PCA *pca){
    av_freep(&pca->covariance);
    av_freep(&pca->mean);
54
    av_free(pca);
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
}

void ff_pca_add(PCA *pca, double *v){
    int i, j;
    const int n= pca->n;

    for(i=0; i<n; i++){
        pca->mean[i] += v[i];
        for(j=i; j<n; j++)
            pca->covariance[j + i*n] += v[i]*v[j];
    }
    pca->count++;
}

int ff_pca(PCA *pca, double *eigenvector, double *eigenvalue){
70 71
    int i, j, pass;
    int k=0;
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
    const int n= pca->n;
    double z[n];

    memset(eigenvector, 0, sizeof(double)*n*n);

    for(j=0; j<n; j++){
        pca->mean[j] /= pca->count;
        eigenvector[j + j*n] = 1.0;
        for(i=0; i<=j; i++){
            pca->covariance[j + i*n] /= pca->count;
            pca->covariance[j + i*n] -= pca->mean[i] * pca->mean[j];
            pca->covariance[i + j*n] = pca->covariance[j + i*n];
        }
        eigenvalue[j]= pca->covariance[j + j*n];
        z[j]= 0;
    }

    for(pass=0; pass < 50; pass++){
        double sum=0;

        for(i=0; i<n; i++)
            for(j=i+1; j<n; j++)
                sum += fabs(pca->covariance[j + i*n]);

        if(sum == 0){
            for(i=0; i<n; i++){
                double maxvalue= -1;
                for(j=i; j<n; j++){
                    if(eigenvalue[j] > maxvalue){
                        maxvalue= eigenvalue[j];
                        k= j;
                    }
                }
                eigenvalue[k]= eigenvalue[i];
                eigenvalue[i]= maxvalue;
                for(j=0; j<n; j++){
                    double tmp= eigenvector[k + j*n];
                    eigenvector[k + j*n]= eigenvector[i + j*n];
                    eigenvector[i + j*n]= tmp;
                }
            }
            return pass;
        }

        for(i=0; i<n; i++){
            for(j=i+1; j<n; j++){
                double covar= pca->covariance[j + i*n];
                double t,c,s,tau,theta, h;

                if(pass < 3 && fabs(covar) < sum / (5*n*n)) //FIXME why pass < 3
                    continue;
123
                if(fabs(covar) == 0.0) //FIXME should not be needed
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
                    continue;
                if(pass >=3 && fabs((eigenvalue[j]+z[j])/covar) > (1LL<<32) && fabs((eigenvalue[i]+z[i])/covar) > (1LL<<32)){
                    pca->covariance[j + i*n]=0.0;
                    continue;
                }

                h= (eigenvalue[j]+z[j]) - (eigenvalue[i]+z[i]);
                theta=0.5*h/covar;
                t=1.0/(fabs(theta)+sqrt(1.0+theta*theta));
                if(theta < 0.0) t = -t;

                c=1.0/sqrt(1+t*t);
                s=t*c;
                tau=s/(1.0+c);
                z[i] -= t*covar;
                z[j] += t*covar;

141
#define ROTATE(a,i,j,k,l) {\
142 143 144
    double g=a[j + i*n];\
    double h=a[l + k*n];\
    a[j + i*n]=g-s*(h+g*tau);\
145
    a[l + k*n]=h+s*(g-h*tau); }
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
                for(k=0; k<n; k++) {
                    if(k!=i && k!=j){
                        ROTATE(pca->covariance,FFMIN(k,i),FFMAX(k,i),FFMIN(k,j),FFMAX(k,j))
                    }
                    ROTATE(eigenvector,k,i,k,j)
                }
                pca->covariance[j + i*n]=0.0;
            }
        }
        for (i=0; i<n; i++) {
            eigenvalue[i] += z[i];
            z[i]=0.0;
        }
    }

    return -1;
}

164
#ifdef TEST
165 166 167 168

#undef printf
#include <stdio.h>
#include <stdlib.h>
169
#include "lfg.h"
170

171
int main(void){
172
    PCA *pca;
173 174 175 176
    int i, j, k;
#define LEN 8
    double eigenvector[LEN*LEN];
    double eigenvalue[LEN];
177
    AVLFG prng;
178

179
    av_lfg_init(&prng, 1);
180

181
    pca= ff_pca_init(LEN);
182 183 184 185

    for(i=0; i<9000000; i++){
        double v[2*LEN+100];
        double sum=0;
186 187 188
        int pos = av_lfg_get(&prng) % LEN;
        int v2  = av_lfg_get(&prng) % 101 - 50;
        v[0]    = av_lfg_get(&prng) % 101 - 50;
189 190 191 192 193 194 195 196
        for(j=1; j<8; j++){
            if(j<=pos) v[j]= v[0];
            else       v[j]= v2;
            sum += v[j];
        }
/*        for(j=0; j<LEN; j++){
            v[j] -= v[pos];
        }*/
197
//        sum += av_lfg_get(&prng) % 10;
198 199 200 201
/*        for(j=0; j<LEN; j++){
            v[j] -= sum/LEN;
        }*/
//        lbt1(v+100,v+100,LEN);
202
        ff_pca_add(pca, v);
203 204 205
    }


206
    ff_pca(pca, eigenvector, eigenvalue);
207
    for(i=0; i<LEN; i++){
208 209
        pca->count= 1;
        pca->mean[i]= 0;
210 211 212 213 214 215

//        (0.5^|x|)^2 = 0.5^2|x| = 0.25^|x|


//        pca.covariance[i + i*LEN]= pow(0.5, fabs
        for(j=i; j<LEN; j++){
216
            printf("%f ", pca->covariance[i + j*LEN]);
217 218 219 220 221 222 223 224 225 226 227
        }
        printf("\n");
    }

#if 1
    for(i=0; i<LEN; i++){
        double v[LEN];
        double error=0;
        memset(v, 0, sizeof(v));
        for(j=0; j<LEN; j++){
            for(k=0; k<LEN; k++){
228
                v[j] += pca->covariance[FFMIN(k,j) + FFMAX(k,j)*LEN] * eigenvector[i + k*LEN];
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
            }
            v[j] /= eigenvalue[i];
            error += fabs(v[j] - eigenvector[i + j*LEN]);
        }
        printf("%f ", error);
    }
    printf("\n");
#endif
    for(i=0; i<LEN; i++){
        for(j=0; j<LEN; j++){
            printf("%9.6f ", eigenvector[i + j*LEN]);
        }
        printf("  %9.1f %f\n", eigenvalue[i], eigenvalue[i]/eigenvalue[0]);
    }

    return 0;
}
#endif