% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/GenData.R
\name{GenData}
\alias{GenData}
\title{Simulating Data Following John Ruscio's RGenData}
\usage{
GenData(
  response,
  nfact = 1,
  N.pop = 10000,
  Max.Trials = 5,
  lr = 1,
  cor.type = "pearson",
  use = "pairwise.complete.obs",
  isSort = FALSE
)
}
\arguments{
\item{response}{A required \code{N} × \code{I} matrix or data.frame consisting of the responses of \code{N} individuals
to \code{I} items.}

\item{nfact}{The number of factors to extract in factor analysis. (default = 1)}

\item{N.pop}{Size of finite populations for simulating. (default = 10,000)}

\item{Max.Trials}{The maximum number of consecutive trials without obtaining a lower RMSR. (default = 5)}

\item{lr}{The learning rate for updating the correlation matrix during iteration. (default = 1)}

\item{cor.type}{A character string indicating which correlation coefficient (or covariance) is to be computed. One of "pearson" (default),
"kendall", or "spearman". @seealso \link[stats]{cor}.}

\item{use}{An optional character string specifying a method for computing covariances in the presence of missing values. This
must be one of the strings "everything", "all.obs", "complete.obs", "na.or.complete", or "pairwise.complete.obs" (default).
@seealso \link[stats]{cor}.}

\item{isSort}{Logical, determines whether the simulated data needs to be sorted in descending order. (default = FALSE)}
}
\value{
A \code{N.pop} * \code{I} matrix containing the simulated data.
}
\description{
This function simulates data with \eqn{nfact} factors based on empirical data.
It represents the simulation data part of the \link[EFAfactors]{CD} function
and the \link[EFAfactors]{CDF} function. This function improves upon
\link[RGenData]{GenDataPopulation} by utilizing C++ code to achieve faster data simulation.
}
\details{
The core idea of \code{GenData} is to start with the empirical data's correlation matrix
and iteratively approach data with \code{nfact} factors. Any value in the simulated data must come
from the empirical data. The specific steps of \code{GenData} are as follows:

\describe{
  \item{(1)}{Use the empirical data (\eqn{\mathbf{Y}_{emp}}) correlation matrix as the target, \eqn{\mathbf{R}_{targ}}.}
  \item{(2)}{Simulate scores for \eqn{N.pop} examinees on \eqn{nfact} factors using a multivariate standard normal distribution:
        \deqn{\mathbf{S}_{(N.pop \times nfact)} \sim \mathcal{N}(0, 1)}
        Simulate noise for \eqn{N.pop} examinees on \eqn{I} items:
        \deqn{\mathbf{U}_{(N.pop \times I)} \sim \mathcal{N}(0, 1)}}
  \item{(3)}{Initialize \eqn{\mathbf{R}_{temp} = \mathbf{R}_{targ}}, and set the minimum Root
        Mean Square Residual \eqn{RMSR_{min} = \text{Inf}}. Start the iteration process.}
  \item{(4)}{Extract \code{nfact} factors from \eqn{\mathbf{R}_{temp}}, and obtain the factor
             loadings matrix \eqn{\mathbf{L}_{shar}}. Ensure that the first element of
             \eqn{\mathbf{L}_{share}} is positive to standardize the direction.}
  \item{(5)}{Calculate the unique factor matrix \eqn{\mathbf{L}_{uniq, (I \times 1)}}:
        \deqn{L_{uniq,i} = \sqrt{1 - \sum_{j=1}^{nfact} L_{share, i, j}^2}}}
  \item{(6)}{Calculate the simulated data \eqn{\mathbf{Y}_{sim}}:
        \deqn{Y_{sim, i, j} = \mathbf{S}_{i} \mathbf{L}_{shar, j}^T + U_{i, j} L_{uniq,i}}}
  \item{(7)}{Compute the correlation matrix of the simulated data, \eqn{\mathbf{R}_{simu}}.}
  \item{(8)}{Calculate the residual correlation matrix \eqn{\mathbf{R}_{resi}} between the
        target matrix \eqn{\mathbf{R}_{targ}} and the simulated data's correlation matrix \eqn{\mathbf{R}_{simu}}:
        \deqn{\mathbf{R}_{resi} = \mathbf{R}_{targ} - \mathbf{R}_{simu}}}
  \item{(9)}{Calculate the current RMSR:
        \deqn{RMSR_{cur} = \sqrt{\frac{\sum_{i < j} \mathbf{R}_{resi, i, j}^2}{0.5 \times (I^2 - I)}}}}
  \item{(10)}{If \eqn{RMSR_{cur} < RMSR_{min}}, update \eqn{\mathbf{R}_{temp} = \mathbf{R}_{temp} +
              lr \times \mathbf{R}_{resi}}, \eqn{RMSR_{min} = RMSR_{cur}}, set \eqn{\mathbf{R}_{min, resi} = \mathbf{R}_{resi}},
              and reset the count of consecutive trials without improvement \eqn{cou = 0}.
              If \eqn{RMSR_{cur} \geq RMSR_{min}}, update \eqn{\mathbf{R}_{temp} = \mathbf{R}_{temp} +
              0.5 \times cou \times lr \times \mathbf{R}_{min, resi}} and increment \eqn{cou = cou + 1}.}
  \item{(11)}{Repeat steps (4) through (10) until \eqn{cou \geq Max.Trials}.}
}

Of course C++ code is used to speed up.
}
\references{
Ruscio, J., & Roche, B. (2012). Determining the number of factors to retain in an exploratory factor analysis using comparison data of known factorial structure. Psychological Assessment, 24, 282–292. http://dx.doi.org/10.1037/a0025697.
}
