StreitbergRoehmel.c

Go to the documentation of this file.
00001 
00011 #include <R.h>
00012 #include <Rmath.h>
00013 #include <Rdefines.h>
00014 
00039 SEXP R_cpermdist2(SEXP score_a, SEXP score_b, SEXP m_a,  SEXP m_b, 
00040                   SEXP retProb) {
00041     /*
00042       compute the joint permutation distribution of the 
00043       sum of the first m_a elements of score_a and score_b
00044       (usually score_a = rep(1, length(score_a)) and 
00045                score_b = Data scores, Wilcoxon, Ansari ...).
00046       In this case the exact conditional distribution 
00047       in the simple independent two-sample problem is computed.
00048     */ 
00049 
00050     int n, im_a, im_b;          /* number of observations */
00051 
00052     SEXP H, x;                  /* matrix of permutations and vector 
00053                                    of probabilities */ 
00054   
00055     int i, j, k, sum_a = 0, sum_b = 0, s_a = 0, s_b = 0, isb;
00056     double msum = 0.0;          /* little helpers */
00057   
00058     int *iscore_a, *iscore_b;   /* pointers to R structures */
00059     double *dH, *dx;
00060 
00061     /* some basic checks, should be improved */  
00062 
00063     if (!isVector(score_a))
00064         error("score_a is not a vector");
00065 
00066     n = LENGTH(score_a);
00067 
00068     if (!isVector(score_b))
00069         error("score_b is not a vector");
00070         
00071     if (LENGTH(score_b) != n)
00072         error("length of score_a and score_b differ");
00073   
00074     iscore_a = INTEGER(score_a);
00075     iscore_b = INTEGER(score_b);
00076         
00077     if (TYPEOF(retProb) != LGLSXP)
00078         error("retProb is not a logical");                                  
00079 
00080     im_a = INTEGER(m_a)[0];  /* cosmetics only */
00081     im_b = INTEGER(m_b)[0];
00082 
00083     /* compute the total sum of the scores and check if they are >= 0 */
00084         
00085     for (i = 0; i < n; i++) {
00086         if (iscore_a[i] < 0) 
00087             error("score_a for observation number %d is negative", i);
00088         if (iscore_b[i] < 0) 
00089             error("score_b for observation number %d is negative", i);
00090         sum_a += iscore_a[i];
00091         sum_b += iscore_b[i];
00092     }
00093 
00094     /*
00095       optimization according to Streitberg & Roehmel
00096     */
00097         
00098     sum_a = imin2(sum_a, im_a);
00099     sum_b = imin2(sum_b, im_b);
00100 
00101     /*
00102         initialize H
00103     */
00104 
00105     PROTECT(H = allocVector(REALSXP, (sum_a + 1) * (sum_b + 1)));
00106     dH = REAL(H);
00107 
00108     for (i = 0; i <= sum_a; i++) {
00109         isb = i * (sum_b + 1);
00110         for (j = 0; j <= sum_b; j++) dH[isb + j] = 0.0;
00111     }
00112                 
00113     /*
00114         start the Shift-Algorithm with H[0,0] = 1
00115     */
00116                 
00117     dH[0] = 1.0;
00118         
00119     for (k = 0; k < n; k++) {
00120         s_a += iscore_a[k];
00121         s_b += iscore_b[k];
00122 
00123         /*
00124             compute H up to row im_aand column im_b
00125             Note: 
00126             sum_a = min(sum_a, m)
00127             sum_b = min(sum_b, c)
00128         */
00129                 
00130         for (i = imin2(im_a, s_a); i >= iscore_a[k]; i--) {
00131             isb = i * (sum_b + 1);
00132             for (j = imin2(im_b,s_b); j >= iscore_b[k]; j--)
00133                 dH[isb + j] += 
00134                     dH[(i - iscore_a[k]) * (sum_b + 1) + (j - iscore_b[k])];
00135         }
00136     }
00137 
00138     /*
00139         return the whole matrix H 
00140         Note: use matrix(H, nrow=m_a+1, byrow=TRUE) in R
00141     */ 
00142 
00143     if (!LOGICAL(retProb)[0]) {
00144         UNPROTECT(1);
00145         return(H);
00146     } else {    
00147         PROTECT(x = allocVector(REALSXP, sum_b));
00148         dx = REAL(x);
00149 
00150         /* 
00151             get the values for sample size im_a (in row m) and sum it up
00152         */
00153 
00154         isb = im_a * (sum_b + 1);
00155         for (j = 0; j < sum_b; j++) {
00156             if (!R_FINITE(dH[isb + j + 1]))
00157                 error("overflow error; cannot compute exact distribution");
00158             dx[j] = dH[isb + j + 1];
00159             msum += dx[j];
00160         }
00161         if (!R_FINITE(msum) || msum == 0.0) 
00162             error("overflow error; cannot compute exact distribution");
00163         
00164         /*
00165             compute probabilities and return the density x to R
00166             the support is min(score_b):sum(score_b)
00167             [dpq] stuff is done in R
00168         */
00169                 
00170         for (j = 0; j < sum_b; j++)
00171             dx[j] = dx[j]/msum;
00172                 
00173         UNPROTECT(2);
00174         return(x);
00175     }
00176 }
00177 
00198 SEXP R_cpermdist1(SEXP scores) {
00199 
00200     /*
00201       compute the permutation distribution of the sum of the 
00202       absolute values of the positive elements of `scores'
00203     */ 
00204 
00205     int n;      /* number of observations */ 
00206     SEXP H;     /* vector giving the density of statistics 0:sum(scores) */
00207   
00208     int i, k, sum_a = 0, s_a = 0; /* little helpers */
00209     int *iscores;
00210     double msum = 0.0;
00211     double *dH;
00212         
00213     n = LENGTH(scores);
00214     iscores = INTEGER(scores);
00215                        
00216     for (i = 0; i < n; i++) sum_a += iscores[i];
00217 
00218     /*
00219       Initialize H
00220     */
00221 
00222     PROTECT(H = allocVector(REALSXP, sum_a + 1));
00223     dH = REAL(H);
00224     for (i = 0; i <= sum_a; i++) dH[i] = 0.0;
00225 
00226     /*
00227       start the shift-algorithm with H[0] = 1.0
00228     */
00229                 
00230     dH[0] = 1.0;
00231         
00232     for (k = 0; k < n; k++) {
00233         s_a = s_a + iscores[k];
00234             for (i = s_a; i >= iscores[k]; i--)
00235                 dH[i] = dH[i] + dH[i - iscores[k]];
00236     }
00237 
00238 
00239     /* 
00240         get the number of permutations
00241     */
00242 
00243     for (i = 0; i <= sum_a; i++) {
00244         if (!R_FINITE(dH[i]))
00245             error("overflow error: cannot compute exact distribution");
00246         msum += dH[i];
00247     }
00248     if (!R_FINITE(msum) || msum == 0.0)
00249         error("overflow error: cannot compute exact distribution");
00250         
00251     /*
00252         compute probabilities and return the density H to R
00253         [dpq] stuff is done in R
00254     */ 
00255         
00256     for (i = 0; i <= sum_a; i++)
00257         dH[i] = dH[i]/msum;     /* 0 is a possible realization */
00258 
00259     UNPROTECT(1);       
00260     return(H);
00261 }