## bug fixed in March 2009, thanks to Jing Leng from Yale calculatePValues <- function(expr, windowSize=15, nullLength, paketSize=5*10^4, ## to restrict memory usage maxCluster, verbose=FALSE, ... ){ if( missing(expr) ){ stop(" missing expr data \n") } if(verbose){ cat("using window size: ", windowSize, "\n") } ## samples x genes probenzahl=nrow(expr) genzahl=ncol(expr) zahlderfenster=genzahl-windowSize+1 if( missing(maxCluster) ){ maxCluster=ceiling(probenzahl/2) }else{ stopifnot( maxCluster <= ceiling(probenzahl/2) ) } ## given statistics statistiken = calculateStatistics(expr,windowSize,maxCluster,...) ## trick: ## add a small value < 1 for the sorting of the union ## of the given statistics and the null hypothesis statistics ## (based on resampling) statistiken = statistiken + 0.1 ## null hypothesis statistics if( missing( nullLength ) ){ pakete = 5 }else{ pakete = (nullLength %/% paketSize)+1 } if( verbose ){ cat(" null hypothesis is sampled on a sequence of ", pakete, " times ", paketSize, "\n") } pvalues = array(data=0, dim=c(maxCluster,zahlderfenster,2), dimnames=list(NULL, dimnames(statistiken)[[2]], c("up","down") ) ) for (paket in 1:pakete){ if( verbose ){ cat(" null hypothesis block number: ",paket,"\n") } ## block bootstrapping of two consecutive genes strang = sample(1:(genzahl-1),paketSize/2,replace=T) strang = as.vector(rbind(strang,strang+1)) nullstat = calculateStatistics(expr[,strang],windowSize,maxCluster,...) for (clustergroesse in 1:maxCluster){ ## go for ranks of statistiken embedded in all pvalues[clustergroesse,,1] = pvalues[clustergroesse,,1] + rank(c(statistiken[clustergroesse,,1], nullstat[clustergroesse,,1]), ties.method="min")[1:zahlderfenster] pvalues[clustergroesse,,2] <- pvalues[clustergroesse,,2] + rank(c(statistiken[clustergroesse,,2], nullstat[clustergroesse,,2]), ties.method="min")[1:zahlderfenster] } }## for pakete for (clustergroesse in 1:maxCluster){ pvalues[clustergroesse,,1] <- pvalues[clustergroesse,,1] - pakete*(rank(statistiken[clustergroesse,,1], ties.method="min")) pvalues[clustergroesse,,2] <- pvalues[clustergroesse,,2] - pakete*(rank(statistiken[clustergroesse,,2], ties.method="min")) } pvalues=pvalues/(pakete*paketSize) return(pvalues) }