vandeWiel.c

Go to the documentation of this file.
00001 
00015 #include <R.h>
00016 #include <Rmath.h>
00017 #include <Rdefines.h>
00018                     
00019 
00031 typedef struct {
00032     long length;
00033     double *c;
00034     double *x;
00035 } celW;
00036 
00037 double binomi(int m, int n) { 
00038 
00039     double bin = 1;
00040     double bin1 = 1;
00041     double bin2 = 1;
00042     int i, j;
00043         
00044     for (i = 1; i <= n; i++) bin1 = bin1 * (m + 1 -i);
00045     for (j = 1; j <= n; j++) bin2 = bin2 * j;
00046     bin = bin1/bin2;
00047     
00048     return(bin);
00049 }
00050 
00051 celW** reserveW(int a, int b)
00052 {
00053     long res = 0;
00054     int i, j;
00055     celW** W;
00056     
00057     /* <FIXME>: need to free memory in case Calloc barfs
00058        Writing R Extension advertises .on.exit but
00059        I still need a pointer to the memory.
00060        </FIXME>
00061     */
00062 
00063     W = Calloc(a + 1, celW*);
00064 
00065     for (i = 0; i <= a; i++)
00066         W[i] = Calloc(b + 1, celW);
00067         
00068     for (i = 0; i <= a; i++) {
00069         for (j = i; j <= b; j++) {
00070             res = (long) binomi(j,i);
00071             /* the majority of memory is freed on exit and error
00072                thanks to S_alloc */
00073             W[i][j].c = (double *) S_alloc(res, sizeof(double));
00074             W[i][j].x = (double *) S_alloc(res, sizeof(double));
00075         }
00076         R_CheckUserInterrupt();
00077     }
00078     return(W);
00079 }
00080 
00081 void FreeW(int a, celW **W)
00082 {
00083      int i;
00084  
00085      for (i = a; i >= 0; i--)
00086          Free(W[i]);
00087          
00088      Free(W);
00089 }
00090 
00091 void initW(int a, int b, celW **W) {
00092 
00093     int i, j;
00094 
00095     for (i = 1; i <= a; i++)
00096     for (j = 0; j <= b; j++) {
00097         W[i][j].length = 0;
00098     }
00099     for (j = 0; j <= b; j++) {
00100         W[0][j].length = 1;
00101         W[0][j].c[0] = 1;  
00102         W[0][j].x[0] = 0;  
00103     }
00104 }
00105 
00106 void mult(celW *tem, int a, int b, int rank, double *rs) {
00107 
00108     /*
00109 
00110     mult multiplies the polynomial c_i*x^(l_i) by x^(rs[rank]), 
00111     which means adding the exponents
00112 
00113     */
00114 
00115     int j;
00116     for (j = 0; j < tem[0].length; j++)
00117         tem[0].x[j] += rs[rank];
00118 }
00119 
00120 void plus(celW **W, celW *tempie, int a, int b, double tol) {
00121 
00122     /*
00123 
00124     plus adds terms with the same exponents after multiplication 
00125     with 1 + x^(rs[rank]), so c1*x^j + c2*x^j becomes (c1+c2)*x^j
00126 
00127     */
00128 
00129     int elep = 0;
00130     int k = 0;
00131     int test = 1;
00132     int i, j;
00133     
00134     for (i = 0; i < W[a][b-1].length; i++) {
00135 
00136         test = 1;
00137         
00138         for (j = elep; j < tempie[0].length && test==1; j++) {
00139         
00140             if (tempie[0].x[j] - tol <= W[a][b-1].x[i]
00141                 && W[a][b-1].x[i] <= tempie[0].x[j] + tol) {
00142 
00143                 tempie[0].c[j] += W[a][b-1].c[i];
00144                 test = 0;
00145                 elep = j;             
00146             }
00147         }
00148          
00149         if (test == 1) {
00150             tempie[0].c[tempie[0].length + k] = W[a][b-1].c[i];
00151             tempie[0].x[tempie[0].length + k] = W[a][b-1].x[i];
00152             k++;
00153         }
00154         R_CheckUserInterrupt();
00155     }
00156     tempie[0].length += k;
00157 }
00158 
00159 void mymergesort(celW temptw, long tijd)
00160 {
00161 
00162     /*
00163     
00164     mymergesort composes one sorted list (increasing exponents of 
00165     the polynomial) from two separately sorted lists. c1*x^3 + c2*x^5 
00166     and  c3*x^4 + c4*x^7  becomes  c1*x^3 + c3*x^4 + c2*x^5 + c4*x^7.
00167 
00168     */
00169 
00170     celW copiep;
00171     int t1 = 0; 
00172     int t2 = 0;
00173     int i, j;
00174 
00175     copiep.c = Calloc(temptw.length, double);
00176     copiep.x = Calloc(temptw.length, double);
00177         
00178     for (i = 0; i < temptw.length; i++) {
00179         copiep.c[i] = temptw.c[i];
00180         copiep.x[i] = temptw.x[i];
00181     }
00182     
00183     for (j = 0; j < temptw.length; j++) {
00184         if (t1 <= tijd-1 && t2 <= temptw.length - tijd - 1) {
00185             if (copiep.x[t1] < copiep.x[tijd + t2]) {
00186                 temptw.x[j] = copiep.x[t1];
00187                 temptw.c[j] = copiep.c[t1];
00188                 t1++;
00189             } else {
00190                 temptw.x[j] = copiep.x[tijd + t2];
00191                 temptw.c[j] = copiep.c[tijd + t2];
00192                 t2++;
00193             }
00194         } else {
00195             if (t1 > tijd - 1) {
00196                 temptw.x[j] = copiep.x[tijd + t2];
00197                 temptw.c[j] = copiep.c[tijd + t2];
00198                 t2++; 
00199             } else {   
00200                 temptw.x[j] = copiep.x[t1];
00201                 temptw.c[j] = copiep.c[t1];
00202                 t1++;
00203             }
00204         }   
00205         R_CheckUserInterrupt();       
00206     } 
00207     Free(copiep.c);
00208     Free(copiep.x);
00209 }
00210 
00211 void fillcell(celW **W, int i1, int j1, int r, double *rs, double tol) {
00212     
00213     /*
00214 
00215     fillcell makes the new recursive polynomial W[i1][j1] from 
00216     W[i1 - 1][j1 - 1] and W[i1][j1 - 1]. j1 is the total number of 
00217     rank scores assigned so far to either of the two groups, 
00218     i1 is the number of rank scores assigned to the smallest sample.
00219 
00220     */
00221 
00222     long tijd;
00223     celW temp2;
00224     int j, j2;
00225 
00226     temp2.c = Calloc(W[i1 - 1][j1 - 1].length + 
00227                      W[i1][j1 - 1].length, double);
00228     temp2.x = Calloc(W[i1 - 1][j1 - 1].length + 
00229                      W[i1][j1 - 1].length, double);
00230     temp2.length = W[i1 - 1][j1 - 1].length;
00231 
00232     for (j = 0; j < temp2.length; j++) {
00233        temp2.c[j] = W[i1 - 1][j1 - 1].c[j];
00234        temp2.x[j] = W[i1 - 1][j1 - 1].x[j];
00235     }
00236 
00237     if (i1 == j1) {       
00238         mult(&temp2, i1 - 1, j1 - 1, r, rs); 
00239     } else {           
00240         mult(&temp2, i1 - 1, j1 - 1, r, rs);                        
00241         tijd = temp2.length;                                
00242         plus(W, &temp2, i1, j1, tol);                            
00243         mymergesort(temp2, tijd);                              
00244     }
00245 
00246     W[i1][j1].length = temp2.length;
00247 
00248     for (j2 = 0; j2 < temp2.length; j2++) {
00249         W[i1][j1].c[j2] = temp2.c[j2];
00250         W[i1][j1].x[j2] = temp2.x[j2];
00251     }          
00252 
00253     Free(temp2.c);
00254     Free(temp2.x);
00255 }
00256 
00257 void mirrorW(celW **W,int ce, int bep, int start, double *rs) {   
00258 
00259     /* 
00260 
00261     mirrorW contains a trick to speed op computations considerably. 
00262     By symmetry arguments it is easy to find W[i][tot] from W[tot-i][tot]
00263 
00264     */
00265 
00266     double totsum = 0;
00267     long len;
00268     int r, h;
00269     
00270     for (r = 0; r < bep; r++) totsum += rs[start + r];
00271     
00272     len = W[bep-ce][bep].length;
00273         
00274     for (h = 0; h < len; h++) {
00275         W[ce][bep].length = W[bep-ce][bep].length;
00276         W[ce][bep].c[len-1-h] = W[bep-ce][bep].c[h];
00277         W[ce][bep].x[len-1-h] = totsum - W[bep-ce][bep].x[h];
00278     }
00279 }
00280 
00281 void makeW(celW **W, int a, int b, int start, double *rs, double tol) {
00282 
00283 
00284     /* 
00285 
00286     makeW simply determines whether a new polynomial W[i][j] 
00287     can be found from mirrorW (if W[j-i][j] is available) or needs 
00288     to be constructed via multiplication etc.
00289 
00290     */
00291 
00292     long i,j;
00293     int rank;
00294     int hulp;
00295 
00296     for (j = 1; j <= b; j++) {  /* verander naar 0!! */
00297 
00298         if (j < a) {
00299             hulp = j; 
00300         } else {
00301             hulp = a;
00302         }
00303 
00304         for (i=1; i <= hulp; i++) {   
00305             if (i <= j/2 || j == 1) {
00306                 rank = start+j;
00307                 fillcell(W, i, j, rank - 1, rs, tol);
00308             } else {
00309                 mirrorW(W, i, j, start, rs);
00310             }                               
00311             R_CheckUserInterrupt();
00312         }
00313     }
00314 }
00315 
00316 void cumulcoef(celW **W, int i1, int j1) {
00317 
00318 
00319     /*
00320 
00321     cumulcoef recursively adds the coefficients of the 
00322     sorted polynomial. So, 3*x^4 + 4*x^6 + 2*x^7  becomes  
00323     3*x^4 + 7*x^6 + 9*x^7.  
00324 
00325     */
00326     double coef = 0;
00327     int i;
00328      
00329     for(i = 0; i < W[i1][j1].length; i++) {
00330         W[i1][j1].c[i] += coef;
00331         coef = W[i1][j1].c[i];
00332     }
00333 }
00334 
00335 double numbersmall(int c, int b, double ob, celW **W1, celW **W2, double tol) {
00336 
00337 
00338     /*
00339 
00340     numbersmall is the core of the split-up algorithm. 
00341     
00342     It efficiently combines two polynomials which have used 
00343     complementary sets of rank scores and computes their contribution 
00344     to the tail-probability. 
00345 
00346     */
00347 
00348     double tot = 0;
00349     long le;
00350     int test = 1;
00351     int be = b/2;
00352     int bp = (b+1)/2;
00353     int tempel = 0;
00354     int i, j, h;
00355     double th;
00356     
00357     for (h = 0; h <= c; h++) {
00358 
00359         tempel = 0;
00360         le = W2[c-h][bp].length;
00361         
00362         for (i = 0; i < W1[h][be].length; i++) {
00363             test = 1;
00364             for (j = tempel; j < le && test == 1; j++) {
00365                 th = W1[h][be].x[i] + W2[c-h][bp].x[le-j-1];
00366                 if (th < ob | th - ob < tol) {
00367                     tot += W1[h][be].c[i] * W2[c - h][bp].c[le - j -1];
00368                     tempel = j;
00369                     test = 0;
00370                 } 
00371             }
00372         }
00373     }
00374     return(tot);
00375 }
00376        
00377 SEXP R_split_up_2sample(SEXP scores, SEXP m, SEXP obs, SEXP tol) {
00378         
00379     /*
00380 
00381     R interface to the split-up algorithm. 
00382 
00383     `scores' is a REAL vector giving the scores of the total sample 
00384     and `m' is a scalar integer with the sample size of one group.
00385     `obs' is the scalar observed test statistic, namely the
00386     sum of the `m' scores measured in one group.
00387 
00388     */
00389 
00390     int b, c, d, u;
00391     double tot, bino, prob;
00392     double ob;  
00393     SEXP ans;
00394 
00395     celW **W1;
00396     celW **W2;
00397     double *rs;
00398 
00399     b = LENGTH(scores);
00400     rs = REAL(scores);
00401     c = INTEGER(m)[0];
00402     d = b - INTEGER(m)[0];
00403     ob = REAL(obs)[0];
00404 
00405     /* total number of possible permutations */
00406     bino = binomi(b, c);
00407 
00408     /* allocate and initialise memory */
00409     W1 = reserveW(c, (b+1)/2);
00410     initW(c, (b+1)/2, W1);
00411     W2 = reserveW(c, (b+1)/2);
00412     initW(c, (b+1)/2, W2);
00413     
00414     makeW(W1, c, b/2, 0, rs, REAL(tol)[0]);  
00415     makeW(W2, c, (b+1)/2, b/2, rs, REAL(tol)[0]);
00416 
00417     for (u = 0; u <= c; u++) cumulcoef(W2, u, (b+1)/2);
00418 
00419     /* number of permutations <= ob */
00420     tot = numbersmall(c, b, ob, W1, W2, REAL(tol)[0]);
00421     
00422     /* probability */
00423     prob = tot/bino; 
00424     
00425     /* free memory: this will _not_ take place 
00426        in case of an error */
00427     FreeW(c, W1);
00428     FreeW(c, W2);
00429 
00430     /* return to R */
00431     PROTECT(ans = allocVector(REALSXP, 1));
00432     REAL(ans)[0] = prob;
00433     UNPROTECT(1);
00434     return(ans);
00435 }