This vignette demonstrates the usage of the FDR controlling procedure of T. Bodnar and T. Dickhaus (2014). The paper is available at https://projecteuclid.org/euclid.ejs/1414588192. All following references to equations and theorems refer to this paper.
ac_fdr.testConsider, for example, p-values \(P_i\) given by \[ \begin{aligned} P_i&=\begin{cases} U_i&\text{for }i=1,\cdots,m_0\\ \Phi\left(\mu_i+\Phi^{-1}(U_i)\right)&\text{for }i=m_0+1,\cdots,m \end{cases} \end{aligned} \] where \(U=(U_1,\cdots,U_m)^T\) is a vector of marginally uniformly distributed random variables following a Clayton copula with parameter \(\eta=1\). These can be generated as follows:
library(copula)
set.seed(1)
m <- 20
m0 <- 0.8*m
p_values <- rCopula(1,onacopulaL(copClayton,list(1,1:20)))
mu<-runif(m-m0, min=-1, max=-1/2)
p_values[1,(m0+1):m]<-pnorm(sqrt(m)*mu+qnorm(p_values[(m0+1):m]),0,1)The FDR controlled simultaneous test of the hypotheses \(H_i:\mu_i=0\) versus the alternatives \(K_i:\mu_i<0\) can then be performed in terms of these p-values:
ac_fdr.test(p_values,setTheta(copClayton,1),m0,0.05,1e4)$test
#> [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#> [12] FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUEFor a discussion of the methods than can be utilised for empirical copula calibration (i.e. estimating the copula parameter from observations) see section 5 of T. Bodnar and T. Dickhaus (2014).
The distribution function at \(z\) can be evaluated by
cdf.Z(copula::setTheta(cop,theta),z)where cop is a copula object (i.e. copula::copClayton). Thus for the simulation it is only necessary to specify the calculation of \(z^*\). For the Clayton copula this is given by
z_star_Clayton <- function(m,q,eta) {
log(m) / ((m/q)^eta - (1/q)^eta)
}Note that, to be compatible with the copula-package, this example uses a generator that differs from the one used in the paper (cf. page 2218). The generator used for the Clayton copula is given by \(\psi(x)=(1+x)^{-1/\eta}\) which leads to qualitatively similar results to those obtained in the paper.
For the Gumbel copula \(z^*\) can be calculated by
z_star_Gumbel <- function(m,q,eta) {
log(m) / ((-log(q/m))^eta-(-log(q))^eta)
}Note: Since z_star_Gumbel is very small for larger theta (for example z_star_Gumbel(200,0.05,15) is of the same magnitude as \(10^{-14}\)) a Monte Carlo approximation to the distribution function is utilized for larger theta. In the paper this was done for all theta.
To see the full code for generating the figures execute the following code:
v <- vignette('fdr-test',package = 'MHTcop')
file.edit(paste(v$Dir,'doc',v$R,sep='/'))The upper bound is calculated according to the formula given in corollary 3.1. The adjustement factor \(\delta\) is calculated using the function ac_fdr.calc_delta:
calcBounds <- function(cop) {
upperBound <- (m0/m)*alpha
cat("Calculating upper bound for the",cop@name,"copula ( m =",m,")\n")
delta <- pbsapply(theta,function(theta)
ac_fdr.calc_delta(copula::setTheta(cop,theta),m,m0,
alpha=alpha,num.reps=NZ,calc.var=TRUE))
sharperUpperBound <- upperBound * delta[1,]
sharperUpperBound.var <- upperBound^2 * delta[2,]
delta <- delta[1,]
lowerBound <- sapply(theta,function(theta){alpha*(m0/m)*
(1+pgamma(log(m)/(m^(theta)-1),shape=1/(theta),scale=1)-
pgamma((log(m)*(m^(theta)))/(m^(theta)-1),shape=1/(theta),scale=1))})
list(upperBound=upperBound,sharperUpperBound=sharperUpperBound,
sharperUpperBound.var=sharperUpperBound.var,
delta=delta,lowerBound=lowerBound)
}Samples from model (16) for use in a Monte Carlo study of the FDR using the delta calculated previously by calc_bounds:
simFDR <- function(cop,delta) {
cat("Performing simulation for the",cop@name,"copula ( m =",m,")\n")
Sim <- do.call(cbind,pblapply(seq_along(theta),function(l) {
p_values<-matrix(0,3,m)
q_k<-(alpha/m)*seq(1,m,1)
p_values_data <- copula::rCopula(NZ,copula::onacopulaL(cop,list(theta[l],1:m)))
data_f<-function(s)
{
p_values[1,]<-p_values_data[s,]
p_values[2,1:m]<-0
mu<-runif(m-m0, min=-1, max=-1/2)
p_values[1,(m0+1):m]<-pnorm(sqrt(m)*mu+qnorm(p_values_data[s,(m0+1):m]),0,1)
p_values[2,(m0+1):m]<-1
sp_values<-matrix(0,3,m)
sp_values<-p_values[,order(p_values[1,])]
sp_values[3,]<-seq(1,m,1)
#Calculations for the FDR
R_m<-0
V_m<-0
if (sum(sp_values[1,]<q_k)>0)
{
R_m<-sum(max(sp_values[3,sp_values[1,]<q_k]))
V_m<-R_m-sum(sp_values[2,1:R_m])
}
#Calculations for the power
S_m<-0
if (sum(sp_values[1,]<q_k)>0)
{
S_m<-sum(sp_values[2,1:sum(max(sp_values[3,sp_values[1,]<q_k]))])
}
#Calculations for the power of the improved procedure
S_m.improved<-0
if (sum(sp_values[1,]<q_k/delta[l])>0)
{
S_m.improved<-sum(sp_values[2,1:sum(max(sp_values[3,sp_values[1,]<q_k/delta[l]]))])
}
return(c(V_m/max(1,R_m),S_m/(m-m0),S_m.improved/(m-m0)))
}
data_in<-sapply(1:NZ,data_f)
FDR.sd <- sd(data_in[1,])
data_in<-rowMeans(data_in)
data_in[4]<-FDR.sd
return(data_in)
}))
list(FDR=Sim[1,],empiricalPower=Sim[2,],empiricalPowerImproved=Sim[3,],
FDR.sd=Sim[4,])
}#> Generating Figure 2 a )
#> Generating Figure 3 a )
#> Generating Figure 4 a )
#> Generating Figure 6 a )
#> Generating Figure 7 a )
#> Generating Figure 8 a )