Reproducing published Structural Equation Models with OpenMx

This post presents R scripts to reproduce published Structural Equation Modeling analyses using OpenMx.

Click on the R file to see the R script. Click on the model to see a diagram of the model.

The publications

  • Jose, P. (2013). Doing statistical mediation and moderation. New York, NY: Guilford Press.
    A basic three-variable mediation analysis (See Chapter 3).
Mediation model
Jose_2013.r
## Jose, P. (2013). Doing statistical mediation and moderation. 
## New York, NY: Guilford Press.

## Chapter 3 describes a basic three-variable mediation analysis.

## Load packages
library(OpenMx)
library(haven)   # To read SPSS data files

# OpenMx is very verbose;
# it requires everything to be coded;
# means and intercepts must be included.

## Get the data from Guilford Press web site
url <- "http://www.guilford.com/add/jose/mediation_example.sav"
df <- data.frame(haven::read_spss(url))

str(df)
head(df)
summary(df)

## The model is shown in Fig 3.3 (p. 46);
## or see the model diagram above.

#### Collect the bits and pieces needed by OpenMx
## Get the names of the variables
manifest <- names(df)

## Get data into OpenMx format
dataRaw <- mxData(observed = df, type = "raw")

## Regressions
#  - Arrows from "ple" and "grat" to "shs".
#  - Arrows are single headed - arrow head at "shs".
#  - Starting values are 0.5.  When they are the same,
#    they need to be listed once only.
#  - Labels are "cprime" for arrow "ple" to "shs";
#    and "b" for arrow "grat" to "shs".
regPaths1 <- mxPath(from = c("ple", "grat"), to = "shs",
   arrows = 1, values = 0.5,
   labels = c("cprime", "b"))

#  - Arrow from "ple" to "grat".
#  - Arrow is single headed - arrow head at "grat".
#  - Starting value is 0.5.
#  - Label is "a".
regPaths2 <- mxPath(from = "ple", to = "grat",
   arrows = 1, values = 0.5,
   labels = "a")

## Variances
## Exogenous variables ("ple") have variances;
## Endogenous variables ("grat" and "shs") have residual or error variances.
## Variance for exogenous variable is not shown in the model diagram.
## The distinction does not matter to OpenMx, but I distinguish in the labels.

#  - Arrows are from manifest variables to manifest variables
#    (when "arrows = 2", the "to" argument can be omitted); 
#    ie, from "ple" to "ple"; from "grat" to "grat"; and from "shs" to "shs".
#  - Arrows are two headed.
#  - Starting values are 1.
#  - Labels are "vPLE" "eGRAT", and "eSHS".
varPaths <- mxPath(from = manifest,
   arrows = 2, values = 1,
   labels = c("vPLE", "eGRAT", "eSHS"))

## Means and intercepts
## Exogenous variables ("ple") have means;
## Endogenous variables ("grat" and "shs") have intercepts.
## Regress variables on a constant - in OpenMx, "one".
## Means and intercepts are not shown in the model diagram.
## The distinction does not matter to OpenMx, but I distinguish in the labels.

#  - Arrows from "one" to the manifest variables;
#    ie, from "one" to "ple"; from "one" to "grat"; from "one" to "shs".
#  - Arrows are single headed.
#  - Starting values are 1.
#  - labels are "mPLE", "iGRAT", "iSHS".
means <- mxPath(from = "one", to = manifest,
   arrows = 1, values = 1,
   labels = c("mPLE", "iGRAT", "iSHS"))

## Indirect and total effects
indirect <- mxAlgebra(a * b, name = "indirect")
total <- mxAlgebra(a * b + cprime, name = "total")

## Setup the model with all the bits
medModel <- mxModel(model = "Mediation",
   type = "RAM",
   data = dataRaw,
   manifestVars = manifest,
   varPaths,
   regPaths1, regPaths2,
   means, 
   indirect, total)

## Run the model and get summary
# Compare with regression outputs in Tables 3.4 and 3.5 (p. 52)
fit <- mxRun(medModel)
summary(fit)   # Note: summary() does not return indirect and total effects
coef(fit)      # Just the estimates

## Extract indirect and total effects (and their standard errors) from "fit" object
# Compare with unstandardised indirect and total effect in Fig 3.6 (p. 59).
estimates <- mxEval(c(indirect, total), fit); estimates
SE <- sapply(c("indirect", "total"), function(x) mxSE(x, fit, silent = TRUE)); SE

## To get the standardised effects, mxStandardizeRAMpaths(fit) will give standardised
## effects for free parameters, but not derived effects.

# Could use mxStandardizeRAMpaths(fit) to extract standardised "a", "b", and "cprime",
# then calculate standardised indirect effect (a * b),
# and standardised total effect (a * b + cprime) by hand.

# Compare with standardised indirect and total effect in Fig 3.6.
# Compare standardised regression coefficients given in Fig 3.6, or
# in Tables 3.4 and 3.5.
mxStandardizeRAMpaths(fit)
estZ <- mxStandardizeRAMpaths(fit)[1:3, 8]
names(estZ) <- mxStandardizeRAMpaths(fit)[1:3, "label"]; estZ

estZ["indirect"] <- estZ["a"] * estZ["b"]
estZ["total"] <- estZ["indirect"] + estZ["cprime"]
estZ

## To get bootstrap CIs
fitBoot <- mxBootstrap(fit, 2000)
# statistics <- summary(fitBoot, boot.quantile = c(0.025, 0.975),
#    boot.SummaryType = "bcbci"); statistics
# Note: No defined effects
ci <- mxBootstrapEval(c(a, b, cprime, indirect, total), fitBoot,
   bq = c(0.025, 0.975), method = "bcbci"); ci

## To get likelihood-based CIs
ci <- mxCI(c("a", "b", "cprime", "indirect", "total"))

# Add to the model
medModel <- mxModel(medModel, ci)

# Run the model
fit <- mxRun(medModel, intervals = TRUE)
summary(fit)$CI


  • Kurbanoglu, N. & Takunyaci, M. (2021). A structural equation modeling on relationship between self-efficacy, physics laboratory anxiety and attitudes. Journal of Family, Counseling and Education, 6(1), 47-56.
    A basic three-variable mediation analysis using summary data.
Mediation model
Kurbanoglu_2021.r
## Kurbanoglu, N. & Takunyaci, M. (2021). A structural equation modeling
## on relationship between self-efficacy, physics laboratory anxiety
## and attitudes. Journal of Family, Counseling and Education, 6(1), 47-56.

## This example shows a basic three-variable mediation
## analysis, and how to use summary data (correlations, 
## standard deviations, and means) when the raw sample
## are not available.

## Load package
library(OpenMx)

## Get the data from Table 1 (p. 50)
# Vectors of correlations, standard deviations, and means.
# Correlations entered row-by-row.
# (If entered column-by-column, use 'lower.tri' in place of 'upper.tri' below.)
vcor <- c(
   1,
   0.30,  1,
  -0.42, -0.32,  1)
vsd <- c(8.81, 7.95, 18.30)             # Standard deviations
vmean <- c(56.57, 40.39, 68.22)         # Means
n <- 513                                # Sample size

## Get the variable names (make sure order is the same as in Table 1)
names <- c("Att", "SE", "Anx")

## Get the co/variance matrix
# First, get full correlation matrix
mcor = matrix( , 3, 3)                           # Empty matrix
mcor[upper.tri(mcor, diag = TRUE)] <- vcor       # Fill the upper triangle
mcor = pmax(mcor, t(mcor), na.rm = TRUE)         # Fill the lower triangle

# Get co/variances
mcov <- outer(vsd, vsd) * mcor

# Name the rows and columns
dimnames(mcov) <- list(names, names); mcov
names(vmean) = names   # OpenMx requires the means be named

## The model is shown in Fig 1 (p. 51); or see the model diagram above.

#### Collect the bits and pieces needed by OpenMx
## Get data into OpenMx format
dataCov <- mxData(observed = mcov, type = "cov", means = vmean, numObs = n)

## Regressions
regPaths1 <- mxPath(from = c("SE", "Att"), to = "Anx",
   arrows = 1, values = 0.5, labels = c("cprime", "b"))

regPaths2 <- mxPath(from = "SE", to = "Att",
   arrows = 1, values = 0.5, labels = "a")

## Variances
varPaths <- mxPath(from = names, 
   arrows = 2, values = 1, labels = c("eAtt", "vSE", "eAnx"))

## Means and intercepts
means <- mxPath(from = "one", to = names,
   arrows = 1, values = 1, labels = c("iAtt", "mSE", "iAnx"))

## Indirect and total effects
indirect <- mxAlgebra(a * b, name = "indirect")
total <- mxAlgebra(a * b + cprime, name = "total")

## Setup the model with all the bits
medModel <- mxModel(model = "Mediation",
   type = "RAM",
   data = dataCov,
   manifestVars = names,
   varPaths,
   regPaths1, regPaths2,
   means, 
   indirect, total) 

## Run the model and get summary
fit <- mxRun(medModel)
summary(fit)

## Extract indirect and total effects (and their standard errors) from "fit" object
estimates <- mxEval(c(indirect, total), fit); estimates
SE <- sapply(c("indirect", "total"), function(x) mxSE(x, fit, silent = TRUE)); SE

## Get the standardised effects
# Compare with standardised estimates in Fig 1
mxStandardizeRAMpaths(fit)
estZ <- mxStandardizeRAMpaths(fit)[1:3, 8]
names(estZ) <- mxStandardizeRAMpaths(fit)[1:3, "label"]; estZ

# Calculate standardised effects for "indirect" and "total" by hand
estZ["indirect"] <- estZ["a"] * estZ["b"]
estZ["total"] <- estZ["indirect"] + estZ["cprime"]
estZ

## R squares - have to calculate by hand
# For variables implicated in regressions,
# R square is given by matrix product of:
#   row matrix of correlations and
#   column matrix of standardised regression coefficients

# Compare with R squares given in Fig 1.

# Get correlations (with names) and standardised regression coefficients
dimnames(mcor) = list(names, names); mcor
estZ

# R Squared for SE predicting Att:
# Correlation between Att and SE, and
# "a" standardised regression coefficient
RsqAtt = mcor["Att", "SE"] * estZ["a"]

# R Squared for SE and Att predicting Anx
# Correlations between Anx and SE, and Anx and Att
# "b" and "cprime" standardised regression coefficients
RsqAnx =  matrix(mcor["Anx", c("Att", "SE")], 1)  %*%
   matrix(estZ[c("b", "cprime")], 2)

# Combine them
Rsq = matrix(c(RsqAtt, RsqAnx), 
  dimnames = list(c("Att", "Anx"), "R Square"),
  2)
Rsq

## To get likelihood-based CIs
ci <- mxCI(c("a", "b", "cprime", "indirect", "total"))

# Add to the model
medModel <- mxModel(medModel, ci)

# Run the model
fit <- mxRun(medModel, intervals = TRUE)
summary(fit)$CI


  • Little, T., Slegers, D., & Card, N. (2006). A non-arbitrary method of identifying and scaling latent variables in SEM and MACS models. Structural Equation Modeling, 13(1), 59-72.
    Three methods of scaling (reference group, marker variable, and effects scaling) in a two-factor, two-group model. Some easier examples (one-factor model, two-factor model, one-factor two-group model) are shown here.
Model for Reference-Group Method
Model for Marker-Variable Method
Model for Effects-Scaling Method
Little_Scaling.r
## Little, T., Slegers, D., & Card, N. (2006). A non-arbitrary method of
## identifying and scaling latent variables in SEM and MACS models.
## Structural Equation Modeling, 13(1), 59-72.

## Methods of Scaling and Identification

## Demonstrates three methods of scaling in two-factor, two-group model:
## 1. Reference-group method - Constrain both latent variables' variances and means;
## 2. Marker-variable method - Constrain one loading and that indicator's intercept in both factors;
## 3. Effect-scaling method - Constrain sums of loadings and intercepts for both factors.

## Little et al assume strong metric invariance:
## Corresponding loadings and intercepts constrained to equality across groups

## Compare with results given in Table 2 (pp. 64-65)
## Little et al show results for three versions of Method 2.
## Only the third is demonstrated here.

## Load package
library(OpenMx)

## Get the data from Appendix A
# 7th grade
cor7 <- c(
   1.00000,
   0.75854,  1.00000,
   0.76214,  0.78705,  1.00000,
   0.02766,  0.00973, -0.05762,  1.00000,
  -0.06112, -0.06105, -0.14060,  0.78501,  1.00000,
  -0.02222, -0.05180, -0.10250,  0.81616,  0.81076,  1.00000)

mean7 <- c(3.13552, 2.99061, 3.06945, 1.70069, 1.52705, 1.54483)
sd7 <- c(0.66770, 0.68506, 0.70672, 0.71418, 0.66320, 0.65276)
n7 <- 380

# 8th grade
cor8 <- c(
   1.00000,
   0.81366,  1.00000,
   0.84980,  0.83523,  1.00000,
  -0.18804, -0.15524, -0.21520,  1.00000,
  -0.28875, -0.24951, -0.33769,  0.78418,  1.00000,
  -0.29342, -0.21022, -0.30553,  0.79952,  0.83156,  1.00000)

mean8 <- c(3.07338, 2.84716, 2.97882, 1.71700, 1.57955, 1.55001)
sd8 <- c(0.70299, 0.71780, 0.76208, 0.65011, 0.60168, 0.61420)
n8 <- 379

## Get the variable names from Appendix A
names = c("pos1", "pos2", "pos3", "neg1", "neg2", "neg3")

# Get full correlation matrix for each Grade
mcor7 <- matrix( , 6, 6)                         # Empty matrix
mcor7[upper.tri(mcor7, diag = TRUE)] <- cor7     # Fill the upper triangle
mcor7 <- pmax(mcor7, t(mcor7), na.rm = TRUE)     # Fill the lower triangle

mcor8 <- matrix( , 6, 6)                          # Empty matrix
mcor8[upper.tri(mcor8, diag = TRUE)] <- cor8      # Fill the upper triangle
mcor8 <- pmax(mcor8, t(mcor8), na.rm = TRUE)      # Fill the lower triangle

# Get co/variance matrix for each grade
mcov7 <- outer(sd7, sd7) * mcor7
mcov8 <- outer(sd8, sd8) * mcor8

# Name the rows and columns
dimnames(mcov7) <- list(names, names)
dimnames(mcov8) <- list(names, names)
mcov7; mcov8

names(mean7) <- names   # OpenMx requires the means be named
names(mean8) <- names

# Get data into OpenMx format
data7 <- mxData(observed = mcov7, type = "cov", means = mean7, numObs = n7)
data8 <- mxData(observed = mcov8, type = "cov", means = mean8, numObs = n8)


### Meethod 1: Reference-Group Method
## Constrain latent variances to 1
## Constrain latent means to 0
## These constraints apply to Grade 7 only.

## Collect the bits and pieces needed by OpenMx
# Factor loadings
loadings1 <- mxPath(from = "POS", to = c("pos1", "pos2", "pos3"), arrows = 1,
   free = TRUE, values = 0.5,
   labels = c("lambda1", "lambda2", "lambda3"))

loadings2 <- mxPath(from = "NEG", to = c("neg1", "neg2", "neg3"), arrows = 1,
   free = TRUE, values = 0.5,
   labels = c("lambda4", "lambda5", "lambda6"))

# Factor variances and covariance - constrain variances to 1 for Grade 7 only
varFac7 <- mxPath(from = c("POS", "NEG"), arrows = 2, connect = "unique.pairs",
   free = c(FALSE, TRUE, FALSE), values = 1,
   labels = c("phi7_11", "phi7_12", "phi7_22"))
 
varFac8 <- mxPath(from = c("POS", "NEG"), arrows = 2, connect = "unique.pairs",
   free = TRUE, values = 1,
   labels = c("phi8_11", "phi8_12", "phi8_22"))

# Factor means - constrain means to 0 for Grade 7 only
means7 <- mxPath(from = "one", to = c("POS", "NEG"), arrows = 1,
   free = FALSE, values = 0,
   labels = c("kappa7_1", "kappa7_2"))

means8 <- mxPath(from = "one", to = c("POS", "NEG"), arrows = 1,
   free = TRUE, values = 1,
   labels = c("kappa8_1", "kappa8_2"))

# Residual variances
varRes7 <- mxPath(from = names, arrows = 2,
   free = TRUE, values = 1,
   labels = c("theta7_1", "theta7_2", "theta7_3", "theta7_4", "theta7_5", "theta7_6"))

varRes8 <- mxPath(from = names, arrows = 2,
   free = TRUE, values = 1,
   labels = c("theta8_1", "theta8_2", "theta8_3", "theta8_4", "theta8_5", "theta8_6"))

# Intercepts
intercepts <- mxPath(from = "one", to = names, arrows = 1,
   free = TRUE, values = 1,
   labels = c("tau1", "tau2", "tau3", "tau4", "tau5", "tau6"))

## Setup models for each Grade
modGr7 <- mxModel("Grade7", type = "RAM",
   manifestVars = names, latentVars = c("POS", "NEG"),
   data7, loadings1, loadings2, varFac7, means7, varRes7, intercepts)

modGr8 <- mxModel("Grade8", type = "RAM",
   manifestVars = names, latentVars = c("POS", "NEG"),
   data8, loadings1, loadings2, varFac8, means8, varRes8, intercepts)

## Combine the two models
fun <- mxFitFunctionMultigroup(c("Grade7.fitfunction", "Grade8.fitfunction"))
model1 <- mxModel("Referemce Group Method", modGr7, modGr8, fun)

## Run the model and get summary
# Compare with results for Method 1 in Table 2.
fit1 <- mxRun(model1)
summary1 <- summary(fit1, refModels = mxRefModels(fit1, run = TRUE))
summary1


### Method 2: Marker-Variable Method
## Constrain 3rd loading in POS to 1 in both groups
## Constrain 1st loading in NEG to 1 in both groups
## Constrain 3rd intercept in POS to 0 in both groups
## Constrain 1st intercept in NEG to 0 in both groups

## Collect the bits and pieces needed by OpenMx
# Factor loadings - 3rd loading for POS constrained to 1
#                 - 1st loading for NEG constrained to 1
loadings1 <- mxPath(from = "POS", to = c("pos1", "pos2", "pos3"), arrows = 1,
   free = c(TRUE, TRUE, FALSE), values = c(0.5, 0.5, 1),
   labels = c("lambda1", "lambda2", "lambda3"))

loadings2 <- mxPath(from = "NEG", to = c("neg1", "neg2", "neg3"), arrows = 1,
   free = c(FALSE, TRUE, TRUE), values = c(1, 0.5, 0.5),
   labels = c("lambda4", "lambda5", "lambda6"))

# Factor variances and covariance
varFac7 <- mxPath(from = c("POS", "NEG"), arrows = 2, connect = "unique.pairs",
   free = TRUE, values = 1, 
   labels = c("phi7_11", "phi7_12", "phi7_22"))

varFac8 <- mxPath(from = c("POS", "NEG"), arrows = 2, connect = "unique.pairs",
   free = TRUE, values = 1,
   labels = c("phi8_11", "phi8_12", "phi8_22"))

# Factor means
means7 <- mxPath(from = "one", to = c("POS", "NEG"), arrows = 1,
   free = TRUE, values = 1,
   labels = c("kappa7_1", "kappa7_2"))

means8 <- mxPath(from = "one", to = c("POS", "NEG"), arrows = 1,
   free = TRUE, values = 1,
   labels = c("kappa8_1", "kappa8_2"))

# Residual variances
varRes7 <- mxPath(from = names, arrows = 2,
   free = TRUE, values = 1,
   labels = c("theta7_1", "theta7_2", "theta7_3", "theta7_4", "theta7_5", "theta7_6"))

varRes8 <- mxPath(from = names, arrows = 2,
   free = TRUE, values = 1, 
   labels = c("theta8_1", "theta8_2", "theta8_3", "theta8_4", "theta8_5", "theta8_6"))

# Intercepts - 3rd intercept for POS & 1st intercept for NEG constrained to 0
intercepts <- mxPath(from = "one", to = names, arrows = 1,
   free = c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE), values = c(1, 1, 0, 0, 1, 1),
   labels = c("tau1", "tau2", "tau3", "tau4", "tau5", "tau6"))

## Setup models for each Grade
modGr7 <- mxModel("Grade7", type = "RAM",
   manifestVars = names, latentVars = c("POS", "NEG"),
   data7, loadings1, loadings2, varFac7, means7, varRes7, intercepts)

modGr8 <- mxModel("Grade8", type = "RAM",
   manifestVars = names, latentVars = c("POS", "NEG"),
   data8, loadings1, loadings2, varFac8, means8, varRes8, intercepts)

## Combine the two models
fun <- mxFitFunctionMultigroup(c("Grade7.fitfunction", "Grade8.fitfunction"))
model2 <- mxModel("Referemce Group Method", modGr7, modGr8, fun)

## Run the model and get summary
# Compare with results for third Method 2 in Table 2.
fit2 <- mxRun(model2)
summary2 <- summary(fit2, refModels = mxRefModels(fit2, run = TRUE))
summary2


### Method 3: Effects-Scaling Method
## Constrain loadings to add to 3 in both factors for both Grades
## Constrain intercepts to add to 0 in both factors for both Grades

## Collect the bits and pieces needed by OpenMx
# Factor loadings
loadings1 <- mxPath(from = "POS", to = c("pos1", "pos2", "pos3"), arrows = 1,
   free = TRUE, values = 0.5,
   labels = c("lambda1", "lambda2", "lambda3"))

loadings2 <- mxPath(from = "NEG", to = c("neg1", "neg2", "neg3"), arrows = 1,
   free = TRUE, values = 0.5,
   labels = c("lambda4", "lambda5", "lambda6"))

# Factor variances and covariance
varFac7 <- mxPath(from = c("POS", "NEG"), arrows = 2, connect = "unique.pairs",
   free = TRUE, values = 1, 
   labels = c("phi7_11", "phi7_12", "phi7_22"))

varFac8 <- mxPath(from = c("POS", "NEG"), arrows = 2, connect = "unique.pairs",
   free = TRUE, values = 1,
   labels = c("phi8_11", "phi8_12", "phi8_22"))

# Factor means
means7 <- mxPath(from = "one", to = c("POS", "NEG"), arrows = 1,
   free = TRUE, values = 1,
   labels = c("kappa7_1", "kappa7_2"))

means8 <- mxPath(from = "one", to = c("POS", "NEG"), arrows = 1,
   free = TRUE, values = 1,
   labels = c("kappa8_1", "kappa8_2"))

# Residual variances
varRes7 <- mxPath(from = names, arrows = 2,
   free = TRUE, values = 1,
   labels = c("theta7_1", "theta7_2", "theta7_3", "theta7_4", "theta7_5", "theta7_6"))

varRes8 <- mxPath(from = names, arrows = 2,
   free = TRUE, values = 1,
   labels = c("theta8_1", "theta8_2", "theta8_3", "theta8_4", "theta8_5", "theta8_6"))

# Intercepts
intercepts <- mxPath(from = "one", to = names, arrows = 1,
   free = TRUE, values = 1,
   labels = c("tau1", "tau2", "tau3", "tau4", "tau5", "tau6"))

## Setup models for each Grade
modGr7 <- mxModel("Grade7", type = "RAM",
   manifestVars = names, latentVars = c("POS", "NEG"),
   data7, loadings1, loadings2, varFac7, means7, varRes7, intercepts)

modGr8 <- mxModel("Grade8", type = "RAM",
   manifestVars = names, latentVars = c("POS", "NEG"),
   data8, loadings1, loadings2, varFac8, means8, varRes8, intercepts)

## Constraints
conLoad1  <- mxConstraint(lambda1 + lambda2 + lambda3 == 3)
conLoad2  <- mxConstraint(lambda4 + lambda5 + lambda6 == 3)
conInter1 <- mxConstraint(tau1 + tau2 + tau3 == 0)
conInter2 <- mxConstraint(tau4 + tau5 + tau6 == 0)

## Combine the two models
fun <- mxFitFunctionMultigroup(c("Grade7.fitfunction", "Grade8.fitfunction"))
model3 <- mxModel("Referemce Group Method", modGr7, modGr8,
          conLoad1, conLoad2, conInter1, conInter2, fun)

## Run the model and get summary
# Compare with results for Method 3 in Table 2.
fit3 <- mxRun(model3)
summary3 <- summary(fit3, refModels = mxRefModels(fit3, run = TRUE))
summary3

## Get fit measures
#  Compare with fit measures given on page 66
models <- list(
   "Method 1" = summary1,
   "Method 2" = summary2,
   "Method 3" = summary3)

measures = c("Chi", "ChiDoF", "p", "CFI", "TLI", "RMSEA")

fit1 <- do.call(rbind, lapply(models, `[`, measures))
fit2 <- do.call(rbind, lapply(models, `[[`, "RMSEACI"))

cbind(fit1, fit2)

## Note: RMSEA given by OpenMx does not agree with the value given by Little et al. 
#  OpenMx does not adjust RMSEA in multiple group models -
#  see the RMSEA section in ?mxSummary for brief explanation, and
# https://openmx.ssri.psu.edu/index.php/forums/opensem-forums/fit-functions/rmsea-multiple-group-analysis
#  for another discussion.
#  LISREL, lavaan, Mplus (and possibly other packages) do make the adjustment.

# Equation for RMSEA, as given by Steiger (1998, Eq. 25, p. 417):
#  RMSEA = sqrt((ChiSq/df - 1) / n)

#  Substitute values from above;
#  gives 'unadjusted RMSEA' as obtained by OpneMx.
sqrt((58.60023/24 - 1) / 759)

#  Steiger (1998, p. 417) states that 'adjusted RMSEA' can be obtained
#  by multiplying 'unadjusted RMSEA' by sqrt(g) (where g is the number
#  of groups).
sqrt(2) * sqrt((58.60023/24 - 1) / 759)

#  Gives 'adjusted RMSEA' as given by Little et al (ie, LISREL)
#  and by lavaan (see 'SEMs_with_lavaan').


# Steiger, J. (1998). A note on multiple sample extensions of the RMSEA 
# fit index. Structural Equation Modeling, 5(4), 411-419.


  • Thompson, M., Lie, Y. & Green, S. (2023). Flexible structural equation modeling approaches for analyzing means. In R. Hoyle (Ed.), Handbook of structural equation modeling (2nd ed., pp. 385-408). New York, NY: Guilford Press.
    SEMs approaches for ANOVA and MANOVA type models.
The Models
one-way ANOVA model
one-way ANCOVA model
two-way ANOVA model
one-way MANOVA model
one-way LATENT model
The Data
satisfactionI.r
### Data for Tables 21.1, 21.2, 21.3, 21.4 ###

df <- structure(list(x = c("a", "a", "a", "a", "a", "a", "a", "a", 
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "b", "b", "b", 
"b", "b", "b", "b", "b", "b", "b", "b", "b", "c", "c", "c", "c", 
"c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", 
"c"), g = c("m", "m", "m", "m", "m", "m", "f", "f", "f", "m", 
"m", "m", "m", "m", "m", "f", "f", "f", "m", "m", "m", "f", "f", 
"f", "m", "m", "m", "f", "f", "f", "m", "m", "m", "f", "f", "f", 
"f", "f", "f", "m", "m", "m", "f", "f", "f", "f", "f", "f"), 
    c = c("before", "before", "before", "before", "before", "before", 
    "before", "before", "before", "after", "after", "after", 
    "after", "after", "after", "after", "after", "after", "before", 
    "before", "before", "before", "before", "before", "after", 
    "after", "after", "after", "after", "after", "before", "before", 
    "before", "before", "before", "before", "before", "before", 
    "before", "after", "after", "after", "after", "after", "after", 
    "after", "after", "after"), y = c(21, 19, 22, 21, 24, 23, 
    21, 24, 23, 22, 22, 24, 25, 27, 30, 22, 23, 24, 23, 23, 21, 
    19, 22, 21, 30, 26, 22, 25, 26, 27, 27, 25, 24, 25, 23, 22, 
    23, 28, 26, 34, 30, 26, 26, 27, 28, 29, 40, 42)), class = "data.frame", row.names = c(NA, 
-48L))


head(df)

## x - Coping Strategy (a - No strategy; b - Discussion; c - Exercise)
## g - Gender
## c - before/after 
## y - dependent variable (Life Satisfaction)
ANOVA_data.r
### Data for Tables 21.1, 21.2, 21.3, 21.4 ###

## Reshape data - long to wide
tab <- 0.5 * table(df$x)  # in each condition
df$id <- c(rep(1:tab[1], 2), rep(1:tab[2], 2), rep(1:tab[3], 2))  # id variable 

df <- reshape(df, timevar = "c", idvar = c("id", "x", "g"), varying = c("pre", "y"), 
   direction = "wide")

df <- within(df, {
## Grand mean centered "pre" - the before scores
   preC <- scale(pre, scale = FALSE)

## Drop the id variable
   id <- NULL

## Gender X Coping Strategy interaction
  sg <- interaction(x, g, sep = "")
})
satisfactionII.r
### Data for Tables 21.5 and 21.6 ###

df <- structure(list(x = c("a", "a", "a", "a", "a", "a", "a", "a", 
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", 
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", 
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", 
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", 
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", 
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", 
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", 
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", 
"b", "b", "b", "b", "b", "b", "b", "b", "c", "c", "c", "c", "c", 
"c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", 
"c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", 
"c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", 
"c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", 
"c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", 
"c", "c", "c", "c", "c", "c", "c", "c", "c", "c"), x1 = c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L), x2 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L), x3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), y1 = c(18L, 24L, 
21L, 24L, 19L, 22L, 23L, 32L, 24L, 22L, 24L, 23L, 28L, 22L, 22L, 
23L, 19L, 28L, 25L, 25L, 27L, 21L, 33L, 24L, 23L, 28L, 29L, 24L, 
28L, 24L, 26L, 28L, 21L, 26L, 20L, 24L, 22L, 32L, 31L, 22L, 22L, 
27L, 22L, 26L, 24L, 24L, 25L, 27L, 26L, 24L, 22L, 18L, 25L, 27L, 
29L, 24L, 22L, 32L, 23L, 27L, 28L, 24L, 18L, 32L, 27L, 25L, 24L, 
25L, 29L, 21L, 29L, 25L, 25L, 25L, 19L, 32L, 29L, 22L, 18L, 26L, 
23L, 26L, 21L, 18L, 24L, 24L, 17L, 24L, 33L, 21L, 23L, 27L, 26L, 
28L, 20L, 27L, 25L, 25L, 25L, 18L, 27L, 25L, 22L, 23L, 26L, 23L, 
29L, 26L, 24L, 27L, 22L, 24L, 26L, 31L, 27L, 22L, 22L, 26L, 25L, 
21L, 26L, 25L, 24L, 26L, 28L, 27L, 26L, 26L, 19L, 22L, 25L, 26L, 
30L, 22L, 26L, 25L, 27L, 32L, 22L, 27L, 26L, 30L, 32L, 28L, 25L, 
23L, 21L, 14L, 26L, 28L, 29L, 25L, 27L, 25L, 26L, 21L, 23L, 25L, 
26L, 30L, 30L, 26L, 22L, 31L, 28L, 26L, 29L, 25L, 26L, 24L, 28L, 
22L, 35L, 26L, 34L, 29L, 26L, 27L, 32L, 16L, 26L, 22L, 25L, 30L, 
28L, 25L, 22L, 23L, 28L, 23L, 36L, 27L, 24L, 23L, 34L, 31L, 33L, 
26L, 27L, 22L), y2 = c(49L, 50L, 51L, 53L, 44L, 50L, 52L, 55L, 
53L, 48L, 48L, 51L, 57L, 51L, 48L, 51L, 48L, 53L, 59L, 55L, 51L, 
54L, 63L, 49L, 54L, 54L, 52L, 47L, 50L, 49L, 54L, 57L, 51L, 53L, 
49L, 53L, 53L, 57L, 58L, 49L, 53L, 55L, 59L, 57L, 55L, 53L, 55L, 
54L, 47L, 54L, 48L, 47L, 50L, 59L, 52L, 52L, 52L, 60L, 59L, 50L, 
55L, 59L, 55L, 59L, 61L, 48L, 55L, 55L, 60L, 50L, 62L, 54L, 56L, 
61L, 52L, 55L, 51L, 56L, 52L, 56L, 53L, 49L, 59L, 51L, 57L, 55L, 
48L, 54L, 56L, 53L, 47L, 54L, 52L, 54L, 50L, 54L, 52L, 54L, 59L, 
54L, 61L, 54L, 54L, 50L, 56L, 51L, 59L, 50L, 52L, 55L, 57L, 57L, 
62L, 55L, 53L, 51L, 50L, 60L, 51L, 52L, 52L, 56L, 52L, 55L, 56L, 
51L, 64L, 54L, 47L, 51L, 54L, 55L, 55L, 55L, 54L, 55L, 58L, 57L, 
56L, 60L, 55L, 54L, 61L, 55L, 50L, 53L, 60L, 49L, 58L, 61L, 55L, 
51L, 58L, 53L, 55L, 49L, 55L, 53L, 56L, 53L, 55L, 53L, 48L, 59L, 
56L, 52L, 55L, 58L, 54L, 54L, 59L, 49L, 60L, 62L, 57L, 59L, 57L, 
61L, 58L, 53L, 56L, 52L, 53L, 55L, 54L, 53L, 49L, 48L, 59L, 55L, 
61L, 59L, 50L, 55L, 58L, 63L, 53L, 56L, 55L, 54L), y3 = c(42L, 
42L, 46L, 39L, 39L, 37L, 38L, 43L, 36L, 37L, 40L, 45L, 46L, 39L, 
39L, 36L, 38L, 43L, 44L, 42L, 37L, 38L, 41L, 40L, 40L, 48L, 41L, 
37L, 42L, 32L, 38L, 43L, 38L, 41L, 45L, 39L, 40L, 41L, 49L, 40L, 
39L, 40L, 41L, 39L, 41L, 43L, 43L, 37L, 38L, 42L, 44L, 36L, 39L, 
44L, 41L, 38L, 40L, 49L, 41L, 39L, 46L, 45L, 40L, 50L, 45L, 43L, 
40L, 42L, 44L, 34L, 42L, 39L, 46L, 39L, 39L, 42L, 41L, 36L, 42L, 
46L, 39L, 39L, 37L, 36L, 42L, 32L, 37L, 43L, 42L, 42L, 46L, 47L, 
42L, 47L, 39L, 36L, 38L, 43L, 38L, 40L, 47L, 42L, 43L, 42L, 44L, 
42L, 45L, 41L, 39L, 45L, 42L, 41L, 46L, 44L, 43L, 38L, 42L, 44L, 
36L, 37L, 45L, 45L, 37L, 41L, 38L, 42L, 42L, 40L, 35L, 46L, 40L, 
42L, 48L, 42L, 42L, 44L, 44L, 48L, 38L, 43L, 42L, 40L, 48L, 39L, 
40L, 32L, 46L, 34L, 45L, 43L, 42L, 38L, 42L, 35L, 46L, 38L, 42L, 
39L, 43L, 43L, 50L, 41L, 42L, 43L, 44L, 35L, 44L, 42L, 41L, 47L, 
48L, 40L, 46L, 44L, 51L, 43L, 39L, 47L, 51L, 37L, 42L, 38L, 37L, 
38L, 43L, 40L, 36L, 40L, 46L, 43L, 50L, 42L, 42L, 40L, 43L, 46L, 
43L, 40L, 42L, 41L), y4 = c(29L, 31L, 34L, 36L, 26L, 30L, 34L, 
38L, 37L, 31L, 37L, 30L, 38L, 26L, 36L, 27L, 30L, 39L, 37L, 35L, 
39L, 33L, 35L, 32L, 34L, 40L, 32L, 31L, 38L, 38L, 34L, 42L, 30L, 
32L, 27L, 33L, 32L, 35L, 40L, 27L, 31L, 35L, 32L, 37L, 38L, 31L, 
29L, 28L, 33L, 35L, 31L, 22L, 34L, 37L, 27L, 33L, 35L, 47L, 30L, 
39L, 38L, 40L, 29L, 43L, 34L, 34L, 32L, 41L, 34L, 33L, 34L, 34L, 
32L, 32L, 30L, 34L, 32L, 38L, 25L, 35L, 34L, 24L, 34L, 33L, 26L, 
31L, 30L, 35L, 37L, 35L, 35L, 40L, 34L, 33L, 28L, 35L, 36L, 35L, 
40L, 34L, 39L, 33L, 28L, 34L, 31L, 29L, 39L, 40L, 35L, 37L, 36L, 
34L, 38L, 33L, 32L, 26L, 33L, 36L, 30L, 25L, 33L, 35L, 35L, 38L, 
36L, 39L, 32L, 34L, 35L, 34L, 36L, 28L, 35L, 30L, 31L, 38L, 35L, 
40L, 31L, 40L, 37L, 32L, 42L, 35L, 34L, 34L, 35L, 23L, 35L, 41L, 
39L, 37L, 34L, 26L, 35L, 34L, 35L, 33L, 31L, 40L, 38L, 32L, 29L, 
37L, 39L, 34L, 35L, 35L, 28L, 40L, 37L, 35L, 40L, 35L, 42L, 40L, 
42L, 37L, 39L, 32L, 38L, 31L, 34L, 39L, 38L, 35L, 32L, 33L, 39L, 
36L, 43L, 36L, 30L, 36L, 42L, 35L, 32L, 32L, 33L, 35L)), class = "data.frame", row.names = c(NA, 
-200L))



head(df)

## x - Coping Strategy (a - No strategy; b - Discussion; c - Exercise)
## y1, y2, y3, y4 - Multiple dependent variables (life-satisfaction scores)
R Scripts
one_way_ANOVA.r
## One-Way ANOVA
##
## Thompson, M., Lie, Y. & Green, S. (2023). Flexible structural equation modeling
## approaches for analyzing means. In R. Hoyle (Ed.), Handbook of structural
## equation modeling (2nd ed., pp. 385-408). New York, NY: Guilford Press.

# This example shows the SEM approach to a one-way ANOVA. 
# Results are presented in Table 21.1 (p. 389).

# The data are discussed on page 388.
# Data are available in "satisfactionI.r"
# "ANOVA_data.r" rearranges the data - from "long" to "wide".

# The variables used here are:
#   x - Coping Strategy (a - No strategy; b - Discussion; c - Exercise)
#   y - dependent variable (Life-Satisfaction) 

## Load packages
library(OpenMx)

## Get the data
source("satisfactionI.r")
head(df)

## Rearrange the data file
source("ANOVA_data.r")
head(df)

## Two models:
# "Less Constrained" model - means allowed to differ across the groups;
# "More Constrained" model - means constrained to equality across the groups.
# To be consistent with ANOVA's assumption of homogeneity of variances, 
# the residual variances are constrained to equality across the groups.

## Get data into OpenMx format for each group
dataA <- mxData(observed = df[df$x == "a", c("x","y")], type = "raw")
dataB <- mxData(observed = df[df$x == "b", c("x","y")], type = "raw")
dataC <- mxData(observed = df[df$x == "c", c("x","y")], type = "raw")

### "Less Constrained" model
## Means for each group - Means differ across the groups
meanA <- mxPath(from = "one", to = "y", values = 0.5, arrows = 1, label = "a1")
meanB <- mxPath(from = "one", to = "y", values = 0.5, arrows = 1, label = "a2")
meanC <- mxPath(from = "one", to = "y", values = 0.5, arrows = 1, label = "a3")
   
## Residual variances - Constrained to equality across the groups
var <- mxPath(from = "y", values = 1, arrows = 2, label = "e") 

## Setup the group models
modA <- mxModel("GrA", type = "RAM",
   manifestVars = "y", dataA, meanA, var)

modB <- mxModel("GrB", type = "RAM",
   manifestVars = "y", dataB, meanB, var)

modC <- mxModel("GrC", type = "RAM",
   manifestVars = "y", dataC, meanC, var)

## Combine the three models
fun <- mxFitFunctionMultigroup(c("GrA", "GrB", "GrC"))
modelLC <- mxModel("LC", modA, modB, modC, fun)

## Run the LC model and get the summary
fitLC <- mxRun(modelLC)
summary(fitLC, refModels = mxRefModels(fitLC, run = TRUE))

## Get the means and error variance
## Compare with SEM results in Table 21.1
meansLC <- coef(fitLC)[c("a1", "a2", "a3")]; meansLC
thetaLC <- coef(fitLC)["e"]; thetaLC

### "More Constrained" model
## Constraints
C1 <- mxConstraint(a1 == a2)
C2 <- mxConstraint(a2 == a3)

## Add them to "Less Constrained" model
modelMC <- mxModel(modelLC, C1, C2)
modelMC <- mxModel(modelMC, name = "MC")       # Change its name 

## Run the MC model and get the summary
fitMC <- mxRun(modelMC)
summary(fitMC, refModels = mxRefModels(fitMC, run = TRUE))
coef(fitMC)

## Get the means and error variance
## Compare with SEM results in Table 21.1
meansMC <- coef(fitMC)[c("a1", "a2", "a3")]; meansMC
thetaMC <- coef(fitMC)["e"]; thetaMC

## Contrast the two models, and campare with chi sq test in Table 21.1
anova(fitMC, fitLC)

## Get R square and compare with result given on page 390.
## R square formula given in Equation 21.4 (p. 390).
Rsquare <- (thetaMC - thetaLC)/thetaMC; Rsquare

## The warning message at the bottom of the summary might be disconcerting for some. 
# One way to avoid the message is not to run the reference models.
# Without the reference models, there will be no chi sq tests, 
# but that's okay; I'm not interested in the chi squares for each separate model.
# The model comparison is what I want, and it is still available.

summary(fitLC)
summary(fitMC)
anova(fitMC, fitLC)

## But where does the warning come from.
# First, some counting of degrees of freedom.

# There is 1 variable per group;
# thus, the co/variance matrix contains (1 X 2) / 2 = 1 piece of information,
# plus the mean; that's 2 pieces of information per group.
# There are 3 groups, thus 6 pieces of information submitted to the model.

# For the LC model, 
# 1 mean per group is estimated = 3 means, and
# 1 variance in total (it is constrainted to equality across the groups)
# giving 4 quantities estimated.

# Resulting is 2 degrees of freedom for the LC model.

# When chi squares are required, and thus when reference models are included,
# two additional models are estimated:
# saturated model and independence model.

# The saturated model estimates means and variances, 
# but does not constrain variances to equality across the groups.
# There are 6 quantities estimated (3 means and 3 variances),
# leaving 0 degrees of freedom (hence the name, "saturated").
# The saturated model's log likelihood is used in the chi square calculation.
# The chi square value and df are correct (check with the values given in Table 21.1).

# The independence model sets all correlations to 0, 
# and estimates just the variances.
# But with just 1 variable, there is only 1 variance to be estimated
# (plus the means), and as a consequence, the independence model is
# the same as the saturated model, and indeed it generates the same
# log likelihood as the saturated model.
# This means that the independence model too is a perfect fit to the data.
# This, I think, is the basis of OpenMx's complaint, and the origin of the
# warning message. The independence model is supposed to be the
# worst fitting model, but in this situation it ends up fitting the data
# perfectly; that is, better, not worse, than the preferred model;

# Therefore, the warning message can be safely ignored - everything is
# correctly estimated (and an overly cautious OpenMx alerts you to the fact that
# the preferred model fits the data worse than the independence model).


### Relax homogeneity of variances assumption
### The "Less Constrained" model

## Residual variances - differ across the groups
varA <- mxPath(from = "y", arrows = 2,
   free = TRUE, values = 1, label = "e1")

varB <- mxPath(from = "y", arrows = 2,
   free = TRUE, values = 1, label = "e2")

varC <- mxPath(from = "y", arrows = 2,
   free = TRUE, values = 1, label = "e3")

## Everythiing else stays the same

## Setup the group models
modA <- mxModel("GrA", type = "RAM",
   manifestVars = "y", dataA, meanA, varA)

modB <- mxModel("GrB", type = "RAM",
   manifestVars = "y", dataB, meanB, varB)

modC <- mxModel("GrC", type = "RAM",
   manifestVars = "y", dataC, meanC, varC)

## Combine the three models
fun <- mxFitFunctionMultigroup(c("GrA", "GrB", "GrC"))
modelLC <- mxModel("LC", modA, modB, modC, fun) 

## Run the LC model and get the summary
fitLC <- mxRun(modelLC)
summary(fitLC, refModels = mxRefModels(fitLC, run = TRUE))
coef(fitLC)

# Note: the LC model is the same as the saturated and, in turn, the independence model.
# All three generate the same log likelihood.

## Get the means and error variances
meansLC <- coef(fitLC)[c("a1", "a2", "a3")]; meansLC
thetaLC <- coef(fitLC)[c("e1", "e2", "e3")]; thetaLC

### The "More Constrained" model
## Constraints - same as before
C1; C2

## Add them to "Less Constrained" model
modelMC <- mxModel(modelLC, C1, C2)
modelMC <- mxModel(modelMC, name = "MC")       # Change its name 

## Run the MC model and get the summary
fitMC <- mxRun(modelMC)
summary(fitMC, refModels = mxRefModels(fitMC, run = TRUE))
coef(fitMC)

## Get the means and error variances
meansMC <- coef(fitMC)[c("a1", "a2", "a3")]; meansMC
thetaMC <- coef(fitMC)[c("e1", "e2", "e3")]; thetaMC

## Contrast the two models
anova(fitMC, fitLC)
one_way_ANCOVA.r
## One-Way ANCOVA
##
## Thompson, M., Lie, Y. & Green, S. (2023). Flexible structural equation modeling
## approaches for analyzing means. In R. Hoyle (Ed.), Handbook of structural
## equation modeling (2nd ed., pp. 385-408). New York, NY: Guilford Press.

# This example show the SEM approach to a one-way ANCOVA.
# Results presented in Table 21.2 (p. 393).

# Means and variances for exogenous variables - the covariate (preC) - are not
# normally included in SEM diagrams, and they are not shown in these SEM diagrams.
# However, OpenMX requires them in its model statements.

# Data are available in "satisfactionI.r"
# "ANOVA_data.r" rearranges the data 
#   - from "long" to "wide",
#   - centering the Pre-Life_Satisfaction scores.

# The variables used here are:
#   x - Coping Strategy (a - No strategy; b - Discussion; c - Exercise)
#   y - dependent variable (Life-Satisfaction) 
#   preC - Pre-Life-Satisfaction scores centred on the grand mean

## Load packages
library(OpenMx)

## Get the data
source("satisfactionI.r")
head(df)

## Rearrange the data file
source("ANOVA_data.r")
head(df)

## Two models:
# "Less Constrained" model - means allowed to differ across the groups;
# "More Constrained" model - means constrained to equality across the groups.
# To be consistent with ANCOVA's assumptions of homogeneity of variances,
# and homogeneity of regression slopes,
# the residual variances and regression slopes are constrained to equality
# across the groups.

## Get data into OpenMx format for each group
dataA <- mxData(observed = df[df$x == "a", c("x","y", "preC")], type = "raw")
dataB <- mxData(observed = df[df$x == "b", c("x","y", "preC")], type = "raw")
dataC <- mxData(observed = df[df$x == "c", c("x","y", "preC")], type = "raw")

### "Less Constrained" model
## Means for each group - 
## "y" means differ across the groups.
## The means for the covariate ("preC") need to be included in the model.
manifest <- c("y", "preC")
meanA <- mxPath(from = "one", to = manifest, arrows = 1,
   values = 0.5, label = c("a1", "cov1"))
meanB <- mxPath(from = "one", to = manifest, arrows = 1,
   values = 0.5, label = c("a2", "cov2"))
meanC <- mxPath(from = "one", to = manifest, arrows = 1,
  values = 0.5 , label = c("a3", "cov3"))

## Regression slopes - Constrained to equality across the groups
reg <- mxPath(from = "preC", to = "y", arrows = 1,
   values = 0.5, label = "b")

## Variances - 
## Residual variances - Constrained to equality across the groups
## The variances for the covariate ("preC") need to be included in the model
varA <- mxPath(from = manifest, arrows = 2,
   values = 1, label = c("e", "cov1var")) 
   
varB <- mxPath(from = manifest, arrows = 2,
   values = 1, label = c("e", "cov2var"))
   
varC <- mxPath(from = manifest, arrows = 2,
   values = 1, label = c("e", "cov3var"))

## Setup the group models
modA <- mxModel("GrA", type = "RAM",
   manifestVars = manifest, dataA, meanA, reg, varA)

modB <- mxModel("GrB", type = "RAM",
   manifestVars = manifest, dataB, meanB, reg, varB)

modC <- mxModel("GrC", type = "RAM",
   manifestVars = manifest, dataC, meanC, reg, varC)

## Combine the three models
fun <- mxFitFunctionMultigroup(c("GrA.fitfunction", "GrB.fitfunction", "GrC.fitfunction"))
modelLC <- mxModel("LC", modA, modB, modC, fun)

## Run the LC model and get the summary
fitLC <- mxRun(modelLC)
summary(fitLC, refModels = mxRefModels(fitLC, run = TRUE))

## Get the means and error variance
## Compare with SEM results in Table 21.2
meansLC <- coef(fitLC)[c("a1", "a2", "a3")]; meansLC
thetaLC <- coef(fitLC)["e"]; thetaLC

## Get regression slopes and compare with Table 21.2 footnote
slopeLC <- coef(fitLC)["b"]; slopeLC

### "More Constrained" model
## Constraints
C1 <- mxConstraint(a1 == a2)
C2 <- mxConstraint(a2 == a3)

## Add them to "Less Constrained" model
modelMC <- mxModel(modelLC, C1, C2)
modelMC <- mxModel(modelMC, name = "MC")       # Change its name 

## Run the MC model and get the summary
fitMC <- mxRun(modelMC)
summary(fitMC, refModels = mxRefModels(fitMC, run = TRUE))
coef(fitMC)

## Get the means and error variance
## Compare with SEM results in Table 21.2
meansMC <- coef(fitMC)[c("a1", "a2", "a3")]; meansMC
thetaMC <- coef(fitMC)["e"]; thetaMC

## Get regression slopes and compare with Table 21.2 footnote
slopeMC <- coef(fitMC)["b"]; slopeMC

## Contrast the two models, and campare with chi sq test in Table 21.2
anova(fitMC, fitLC)

## Get R square and compare with result given on page 394.
## R square formula given in Equation 21.9 (p. 394).
Rsquare <- (thetaMC - thetaLC)/thetaMC; Rsquare
two_way_ANOVA.r
## Two-Way ANOVA
##
## Thompson, M., Lie, Y. & Green, S. (2023). Flexible structural equation modeling
## approaches for analyzing means. In R. Hoyle (Ed.), Handbook of structural
## equation modeling (2nd ed., pp. 385-408). New York, NY: Guilford Press.

# This example shows the SEM approach to a two-way ANOVA.
# Results presented in Table 21.4 (p. 396).

# Data are available in "satisfactionI.r"
# "ANOVA_data.r" rearranges the data 
#   - from "long" to "wide",
#   - sets up the Gender X Coping Strategy interaction

# The variables used here are:
#   x - Coping Strategy (a - No strategy; b - Discussion; c - Exercise)
#   y - dependent variable (Life-Satisfaction) 
#   g - Gender
#   sg - Gender X Coping Strategy interaction

## Load packages
library(OpenMx)

## Get the data
source("satisfactionI.r")
head(df)

## Rearrange the data file
source("ANOVA_data.r")
head(df)

## Code to get the  prelimnary results presented Table 21.3 (p. 395) 
## is not shown here

## Two-way ANOVA
# There are six groups in the model, and thus there are six means.
# They are represented by the labels am, af, ..., cf in the model diagram.
# The "Less Constrained" model allows the six means to differ. 
#
# There are three "More Constrained" models to test for:
#   Gender main effect,
#   Coping Strategy main effect, and
#   Gender X Coping Strategy interaction.
# These effects can be tested for unweighted means and weighted means.
# One page 394, TLG discuss when to use weighted and unweighted means.

# To be consistent with ANOVA's assumption of homogeneity of variances,
# the variances are constrained to equality aross the groups.

# Table 21.4 (p. 396) shows the results for the test of the
# "Coping Strategy" main effect for weighted means.
# Here, tests for both main effects and the interaction
# for weighted and unweighted means are shown.

## Get data into OpenMx format for each group
dataAM <- mxData(observed = df[df$sg == "am", c("sg","y")], type = "raw")
dataAF <- mxData(observed = df[df$sg == "af", c("sg","y")], type = "raw")
dataBM <- mxData(observed = df[df$sg == "bm", c("sg","y")], type = "raw")
dataBF <- mxData(observed = df[df$sg == "bf", c("sg","y")], type = "raw")
dataCM <- mxData(observed = df[df$sg == "cm", c("sg","y")], type = "raw")
dataCF <- mxData(observed = df[df$sg == "cf", c("sg","y")], type = "raw")

### "Less Constrained" model
## Means for each group - Means differ across the groups
meanAM <- mxPath(from = "one", to = "y", arrows = 1, values = 0.5, label = "am")
meanAF <- mxPath(from = "one", to = "y", arrows = 1, values = 0.5, label = "af")
meanBM <- mxPath(from = "one", to = "y", arrows = 1, values = 0.5, label = "bm")
meanBF <- mxPath(from = "one", to = "y", arrows = 1, values = 0.5, label = "bf")
meanCM <- mxPath(from = "one", to = "y", arrows = 1, values = 0.5, label = "cm")
meanCF <- mxPath(from = "one", to = "y", arrows = 1, values = 0.5, label = "cf")

## Residual variance - Constrained to equality across the groups
var <- mxPath(from = "y", arrows = 2, values = 1, label = "e")

## Setup the group models
modAM <- mxModel("GrAM", type = "RAM",
   manifestVars = "y", dataAM, meanAM, var)

modAF <- mxModel("GrAF", type = "RAM",
   manifestVars = "y", dataAF, meanAF, var)

modBM <- mxModel("GrBM", type = "RAM",
   manifestVars = "y", dataBM, meanBM, var)

modBF <- mxModel("GrBF", type = "RAM",
   manifestVars = "y", dataBF, meanBF, var)

modCM <- mxModel("GrCM", type = "RAM",
   manifestVars = "y", dataCM, meanCM, var)

modCF <- mxModel("GrCF", type = "RAM",
   manifestVars = "y", dataCF, meanCF, var)

## Combine the six models
fun <- mxFitFunctionMultigroup(c(
   "GrAM", "GrAF", "GrBM", "GrBF", "GrCM", "GrCF"))
modelLC <- mxModel("LC", modAM, modAF, modBM, modBF, modCM, modCF, fun)

## Run the LC model and get the summary
fitLC <- mxRun(modelLC)
summary(fitLC, refModels = mxRefModels(fitLC, run = TRUE))

## Get the means and error variance
## Compare with SEM results in Table 21.4
meansLC <- coef(fitLC)[c("am", "bm", "cm", "af", "bf", "cf")]; meansLC
thetaLC <- coef(fitLC)["e"]; thetaLC

### Gender main effect - unweighted means
## To test the Gender main effect (applied to unweighted means), 
## constrain the mean for males to equal the mean for females. 
## But there are three means for males and three means for females. 
## Simply constrain the sum of the three means for males to equal 
## the sum of the three means for females.

## Constraint
conGU <- mxConstraint(af + bf + cf == am + bm + cm)

## Add it to "Less Constrained" model
modelGU <- mxModel(modelLC, conGU)
modelGU <- mxModel(modelGU, name = "Gender Unweighted")       # Change its name 

## Run the MC model and get the summary
fitGU <- mxRun(modelGU)
summary(fitGU, refModels = mxRefModels(fitGU, run = TRUE))

## Get the means and error variance
meansGU <- coef(fitGU)[c("am", "bm", "cm", "af", "bf", "cf")]; meansGU
thetaGU <- coef(fitGU)["e"]; thetaGU

## Contrast the two models
anova(fitGU, fitLC)

### Coping Strategy main effect - unweighted means
## To test for the "Coping Strategy" main effect, 
## restrict the mean for "a" strategy to equal the mean for "b" strategy
## to equal the mean for "c" strategy. That is, constrain 
## the sum of the two "a" means to equal 
## the sum of the two "b" means; and 
## the sum of the two "b" means to equal 
## the sum of the two "c" means.

## Constraints
conCU1 <- mxConstraint(af + am == bf + bm)
conCU2 <- mxConstraint(af + am == cf + cm)

## Add them to "Less Constrained" model
modelCU <- mxModel(modelLC, conCU1, conCU2)
modelCU <- mxModel(modelCU, name = "Coping Unweighted")       # Change its name 

## Run the MC model and get the summary
fitCU <- mxRun(modelCU)
summary(fitCU, refModels = mxRefModels(fitCU, run = TRUE))

## Get the means and error variance
meansCU <- coef(fitCU)[c("am", "bm", "cm", "af", "bf", "cf")]; meansCU
thetaCU <- coef(fitCU)["e"]; thetaCU

## Contrast the two models
anova(fitCU, fitLC)

### Gender main effect - weighted means
## To test for the main effects applied to weighted means,
## the constraints are set the same way as before except this time
## the means are weighted in proportion to the cell frequencies.

# Constraint
freq <- table(df$g, df$x); freq      # cell frequencies
conGW <- mxConstraint((3*af + 3*bf + 6*cf)/12 == (6*am + 3*bm + 3*cm)/12)

## Add it to "Less Constrained" model
modelGW <- mxModel(modelLC, conGW)
modelGW <- mxModel(modelGW, name = "Gender Weighted")       # Change its name

## Run the MC model and get the summary
fitGW <- mxRun(modelGW)
summary(fitGW, refModels = mxRefModels(fitGW, run = TRUE))

## Get the means and error variance
meansGW <- coef(fitGW)[c("am", "bm", "cm", "af", "bf", "cf")]; meansGW
thetaGW <- coef(fitGW)["e"]; thetaGW

## Contrast the two models
anova(fitGW, fitLC)

### Coping Strategy main effect - weighted means
# Compare with SEM section in Table 21.4

## Constraints
freq <- table(df$g, df$x); freq      # cell frequencies
conCW1 <- mxConstraint((3*af + 6*am)/9 == (3*bf + 3*bm)/6 )
conCW2 <- mxConstraint((3*bf + 3*bm)/6 == (6*cf + 3*cm)/9)

## Add them to "Less Constrained" model
modelCW <- mxModel(modelLC, conCW1, conCW2)
modelCW <- mxModel(modelCW, name = "Coping Weighted")       # Change its name

## Run the MC model and get the summary
fitCW <- mxRun(modelCW)
summary(fitCW, refModels = mxRefModels(fitCW, run = TRUE))

## Get the means and error variance
## Compare with SEM results in Table 21.4
meansCW <- coef(fitCW)[c("am", "bm", "cm", "af", "bf", "cf")]; meansCW
thetaCW <- coef(fitCW)["e"]; thetaCW

## Contrast the two models
anova(fitCW, fitLC)

### Gender X Coping Strategy interaction
## To test for the Gender X Coping Strategy interaction,
## the "More Constrained" model needs the means to be constrained so that
## the difference between the mean for "female" and the mean for "male" 
## remains constant across levels of "Coping Strategy". That is:

## the difference between "female" mean and "male" mean for the "a" strategy equals
## the difference between "female" mean and "male" mean for the "b" strategy; and
## the difference between "female" mean and "male" mean for the "b" strategy equals
## the difference between "female" mean and "male" mean for the "c" strategy.

## Constraints
conI1 <- mxConstraint((af - am) == (bf - bm))
conI2 <- mxConstraint((bf - bm) == (cf - cm))

## Add them to "Less Constrained" model
modelI <- mxModel(modelLC, conI1, conI2)
modelI <- mxModel(modelI, name = "Interaction")       # Change its name

## Run the MC model and get the summary
fitI <- mxRun(modelI)
summary(fitI, refModels = mxRefModels(fitI, run = TRUE))

## Get the means and error variance
meansI <- coef(fitI)[c("am", "bm", "cm", "af", "bf", "cf")]; meansI
thetaI <- coef(fitI)["e"]; thetaI

## Contrast the two models
anova(fitI, fitLC)
one_way_MANOVA.r
## One-way MANOVA
##
## Thompson, M., Lie, Y. & Green, S. (2023). Flexible structural equation modeling
## approaches for analyzing means. In R. Hoyle (Ed.), Handbook of structural
## equation modeling (2nd ed., pp. 385-408). New York, NY: Guilford Press.

# This example shows the SEM approach to a one-way MANOVA.
# Results presented in Table 21.5 (p. 399).

# The data are discussed on page 397.
# Data are available in "satisfactionII.r".

# The variables used here are:
#   x - Coping Strategy (a - No strategy; b - Discussion; c - Exercise)
#   y1, y2, y3, y4 - multiple dependent variables (Life-Satisfaction)

## Load packages
library(OpenMx)

## Get the data
source("satisfactionII.r")
head(df)

## Two models:
# "Less Constrained" model - means allowed to differ across the groups;
# "More Constrained" model - means constrained to equality across the groups.
# Variances and covariances are constrained to equality across the groups.

## Get data into OpenMx format for each group
dataA <- mxData(observed = df[df$x == "a", c("x", "y1", "y2", "y3", "y4")], type = "raw")
dataB <- mxData(observed = df[df$x == "b", c("x", "y1", "y2", "y3", "y4")], type = "raw")
dataC <- mxData(observed = df[df$x == "c", c("x", "y1", "y2", "y3", "y4")], type = "raw")

### "Less Constrained" model
## Means for each group - Means differ across the groups
manifest <- c("y1", "y2", "y3", "y4")
meanA <- mxPath(from = "one", to = manifest , arrows = 1,
   values = 0.5, label = c("a1y1", "a1y2", "a1y3", "a1y4"))

meanB <- mxPath(from = "one", to = manifest , arrows = 1,
   values = 0.5, label = c("a2y1", "a2y2", "a2y3", "a2y4"))

meanC <- mxPath(from = "one", to = manifest , arrows = 1,
   values = 0.5, label = c("a3y1", "a3y2", "a3y3", "a3y4"))

## Residual variances/covariances - Constrained to equality across the groups
var <- mxPath(from = manifest, to = manifest, connect = "unique.pairs", arrows = 2,
   values = c(
   1, 0.5, 0.5, 0.5,
   1, 0.5, 0.5,
   1, 0.5,
   1),
   labels = 
   c("e1", "e12", "e13", "e14",
     "e2", "e23", "e24",
    "e3", "e34",
    "e4"))

## Setup the group models
modA <- mxModel("GrA", type = "RAM",
   manifestVars = manifest, dataA, meanA, var)

modB <- mxModel("GrB", type = "RAM",
   manifestVars = manifest, dataB, meanB, var)

modC <- mxModel("GrC", type = "RAM",
   manifestVars = manifest, dataC, meanC, var)

## Combine the three models
fun <- mxFitFunctionMultigroup(c("GrA", "GrB", "GrC"))
modelLC <- mxModel("LC", modA, modB, modC, fun)

## Run the LC model and get the summary
fitLC <- mxRun(modelLC)
summary(fitLC, refModels = mxRefModels(fitLC, run = TRUE))

## Get the means, and compare with SEM results in Table 21.5
meansLC <- coef(fitLC)[grepl("^a", names(coef(fitLC)))]
matrix(meansLC, byrow = TRUE, nrow = 3, dimnames = list(c("a", "b", "c"), manifest))

## Get the error SSCP matrix - compare with Table 21.5
## error SSCP = variance/covariance matrix X sample size  
thetaLC <- coef(fitLC)[grepl("^e", names(coef(fitLC)))]; thetaLC

eLC <- matrix( , 4, 4)                            # Empty matrix
eLC[upper.tri(eLC, diag = TRUE)] <- thetaLC       # Fill the upper triangle
eLC <- pmax(eLC, t(eLC), na.rm = TRUE)            # Fill the lower triangle
eLC <- eLC * 200
matrix(eLC, 4, 4, dimnames = list(manifest, manifest))

### "More Constrained" model
# Constraints
C1 <- mxConstraint(a1y1 == a2y1)
C2 <- mxConstraint(a2y1 == a3y1)

C3 <- mxConstraint(a1y2 == a2y2)
C4 <- mxConstraint(a2y2 == a3y2)

C5 <- mxConstraint(a1y3 == a2y3)
C6 <- mxConstraint(a2y3 == a3y3)

C7 <- mxConstraint(a1y4 == a2y4)
C8 <- mxConstraint(a2y4 == a3y4)

## Add them to "Less Constrained" model
modelMC <- mxModel(modelLC, C1, C2, C3, C4, C5, C6, C7, C8)
modelMC <- mxModel(modelMC, name = "MC")       # Change its name 

## Run the MC model and get the summary
fitMC <- mxRun(modelMC)
summary(fitMC, refModels = mxRefModels(fitMC, run = TRUE))

## Get the means, and compare with SEM results in Table 21.5
meansMC <- coef(fitMC)[grepl("^a", names(coef(fitLC)))]
matrix(meansMC, byrow = TRUE, nrow = 3, dimnames = list(c("a", "b", "c"), manifest))

## Get the error SSCP matrix - Table 21.5
## error SSCP = variance/covariance matrix X sample size
thetaMC <- coef(fitMC)[grepl("^e", names(coef(fitLC)))]; thetaMC

eMC <- matrix( , 4, 4)                              # Empty matrix
eMC[upper.tri(eMC, diag = TRUE)] <- thetaMC         # Fill the upper triangle
eMC <- pmax(eMC, t(eMC), na.rm = TRUE)              # Fill the lower triangle
eMC <- eMC*200
matrix(eMC, 4, 4, dimnames = list(manifest, manifest))

## Contrast the two models, and campare with chi sq test in Table 21.5
anova(fitLC, fitMC)


### Relax homogeneity of variances and covariances assumption. 
### See discussion in section headed "Avoiding OLS Assumptions for 
### ANOVA/MANOVA Designs Using SEM" (pp. 389-401)
### "Less Constrained" model
# Variance and covariances - differ across the groups
varA <- mxPath(from = manifest, to = manifest, connect = "unique.pairs", arrows = 2,
   values = c(
   1, 0.5, 0.5, 0.5,
        1, 0.5, 0.5,
             1, 0.5,
                  1),
   labels = 
   c("e11a1", "e12a1", "e13a1", "e14a1",
              "e22a1", "e23a1", "e24a1",
                      "e33a1", "e34a1",
                               "e44a1"))

varB <- mxPath(from = manifest, to = manifest, connect = "unique.pairs", arrows = 2,
   values = c(
   1, 0.5, 0.5, 0.5,
   1, 0.5, 0.5,
   1, 0.5,
   1),
   labels = 
   c("e11a2", "e12a2", "e13a2", "e14a2",
     "e22a2", "e23a2", "e24a2",
    "e33a2", "e34a2",
    "e44a2"))

varC <- mxPath(from = manifest, to = manifest, connect = "unique.pairs", arrows = 2,
   values = c(
   1, 0.5, 0.5, 0.5,
   1, 0.5, 0.5,
   1, 0.5,
   1),
   labels = 
   c("e11a3", "e12a3", "e13a3", "e14a3",
     "e22a3", "e23a3", "e24a3",
    "e33a3", "e34a3",
    "e44a3"))

## Everything else stays the same

## Setup the group models
modA <- mxModel("GrA", type = "RAM",
   manifestVars = manifest, dataA, meanA, varA)

modB <- mxModel("GrB", type = "RAM",
   manifestVars = manifest, dataB, meanB, varB)

modC <- mxModel("GrC", type = "RAM",
   manifestVars = manifest, dataC, meanC, varC)

## Combine the three models
fun <- mxFitFunctionMultigroup(c("GrA", "GrB", "GrC"))
modelLC <- mxModel("LC", modA, modB, modC, fun)

## Run the LC model and get the summary
fitLC <- mxRun(modelLC)
summary(fitLC, refModels = mxRefModels(fitLC, run = TRUE))

## Get the means
meansLC = coef(fitLC)[grepl("^a", names(coef(fitLC)))]
matrix(meansLC, byrow = TRUE, nrow = 3, dimnames = list(c("a", "b", "c"), manifest))

## Get variance/covariance matrix
thetaLC = coef(fitLC)[grepl("^e", names(coef(fitLC)))]; thetaLC

### "More Constrained" model
## Constraints - same as before
C1; C2; C3; C4; C5; C6; C7; C8

## Add them to less constrained model
modelMC <- mxModel(modelLC, C1, C2, C3, C4, C5, C6, C7, C8)
modelMC <- mxModel(modelMC, name = "MC")       # Change its name 

## Run the MC model and get the summary
fitMC <- mxRun(modelMC)
summary(fitMC, refModels = mxRefModels(fitMC, run = TRUE))

## Get the means
meansMC <- coef(fitMC)[grepl("^a", names(coef(fitLC)))]
matrix(meansMC, byrow = TRUE, nrow = 3, dimnames = list(c("a", "b", "c"), manifest))

## Get the variance/covariance matrix
thetaMC <- coef(fitMC)[grepl("^e", names(coef(fitLC)))]; thetaMC

## Contrast the two models, and campare with chi sq test on page 401
anova(fitLC, fitMC)



#### lavaan
library(lavaan)

# Variances and covariances (for both models)
vcov <- "
   y1 ~~ y1 + y2 + y3 + y4
   y2 ~~ y2 + y3 + y4
   y3 ~~ y3 + y4
   y4 ~~ y4"

models <- list(

"Less Constrained" =  c(
# Means
   "y1 ~ c(a1, b1, c1)*1
    y2 ~ c(a2, b2, c2)*1
    y3 ~ c(a3, b3, c3)*1
    y4 ~ c(a4, b4, c4)*1",
    
    vcov),

"More Constrained" = c(
# Means
   "y1 ~ c(a1, a1, a1)*1
    y2 ~ c(a2, a2, a2)*1
    y3 ~ c(a3, a3, a3)*1
    y4 ~ c(a4, a4, a4)*1",
    
    vcov)
)

# Fit the models 
fit <- lapply(models, sem, data = df, group = "x")

# Get model summaries
lapply(fit, summary)[[1]]

# Contrast model fits
Reduce(anova, fit)
one_way_LATENT.r
## One-way ANOVA of latent variable
##
## Thompson, M., Lie, Y. & Green, S. (2023). Flexible structural equation modeling
## approaches for analyzing means. In R. Hoyle (Ed.), Handbook of structural
## equation modeling (2nd ed., pp. 385-408). New York, NY: Guilford Press.

# This example shows the SEM approach to a one-way ANOVA of latent means.
# Results presented in Table 21.6 (p. 404).

# Data are available in "satisfactionII.r".

# The variables used here are:
#   x - Coping Strategy (a - No strategy; b - Discussion; c - Exercise)
#   y1, y2, y3, y4 - multiple dependent variables (Life-Satisfaction) 

## Load packages
library(OpenMx)

## Get the data
source("satisfactionII.r")
head(df)

## Comparisons of latent means assume some level of measurement invariance.
# Thompson, Lie, and Green (TLG) assume strict measurement invariance:
# loadings, intercepts, and residual variances and covariances are constrainted
# to equality across the groups (covariances are zero, and thus they are equal).
# TLG also constrain the latent error variance to equality across the groups.

# Latent variables require constraints for the purposes of identification and scaling.
# TLG claim the loading for the 4th indicator is constrained to 1,
# but I think that is a typo. TLG constrain the loading of the 1st indicator to 1,
# and because of measurement invariance, this constraint applies to all groups.
# Also, TLG constrain the latent mean to 0 in the first group only.

## Two models:
# "Less Constrained" model - latent means are freely estimated in the other two groups;
# "More Constrained" model - latent means constrained to equality across the groups;
# that is, they are all constrained to 0.

## Get data into OpenMx format for each group
dataA <- mxData(observed = df[df$x == "a", c("x", "y1", "y2", "y3", "y4")], type = "raw")
dataB <- mxData(observed = df[df$x == "b", c("x", "y1", "y2", "y3", "y4")], type = "raw")
dataC <- mxData(observed = df[df$x == "c", c("x", "y1", "y2", "y3", "y4")], type = "raw")

### "Less Constrained" model
# Factor loadings - equal across groups
manifest <- c("y1", "y2", "y3", "y4")
loadings <- mxPath(from = "LS", to = manifest, arrows = 1,
   free = c(FALSE, TRUE, TRUE, TRUE), values = 1,   # First loading constrained to 1
   labels = c("l1", "l2", "l3", "l4"))

## Factor variances - equal across groups
varFac <- mxPath(from = "LS", arrows = 2,
   free = TRUE, values = 1, labels = "d")

## Residual variances - equal across groups
varRes <- mxPath(from = manifest, arrows = 2,
   free = TRUE, values = 1, 
   labels = c("e1", "e2", "e3", "e4"))

## Intercepts - equal across groups
intercepts <- mxPath(from = "one", to = manifest, arrows = 1,
   free = TRUE, values = 1,
   labels = c("t1", "t2", "t3", "t4"))

## Factor means - differs across the groups
meanA <- mxPath(from = "one", to = "LS", arrows = 1,
   free = FALSE, values = 0, labels = "a1")
meanB <- mxPath(from = "one", to = "LS", arrows = 1,
   free = TRUE, values = 1, labels = "a2")
meanC <- mxPath(from = "one", to = "LS", arrows = 1,
   free = TRUE, values = 1, labels = "a3")

## Setup the group models
modA <- mxModel("GrA", type = "RAM",
   manifestVars = manifest, latentVars = "LS",
   dataA, loadings, varFac, varRes, intercepts, meanA)

modB <- mxModel("GrB", type = "RAM",
   manifestVars = manifest, latentVars = "LS",
   dataB, loadings, varFac, varRes, intercepts, meanB)

modC <- mxModel("GrC", type = "RAM",
   manifestVars = manifest, latentVars = "LS",
   dataC, loadings, varFac, varRes, intercepts, meanC)

## Combine the three models
fun <- mxFitFunctionMultigroup(c("GrA", "GrB", "GrC"))
modelLC <- mxModel("LC", modA, modB, modC, fun)

## Run the LC model and get the summary
fitLC <- mxRun(modelLC)
summary(fitLC, refModels = mxRefModels(fitLC, run = TRUE))

## Trouble!! 
## Try letting OpenMx select starting values
modelLC = mxAutoStart(modelLC)

## Try again
fitLC <- mxRun(modelLC)
summary(fitLC, refModels = mxRefModels(fitLC, run = TRUE))
## All good

## Get latent means and variance, and compare with "All measures" row in Table 21.6
estimates <- coef(fitLC)

latentMeans = coef(fitLC)[c("a2", "a3")]; latentMeans
latentVar = coef(fitLC)["d"]; latentVar

### "More Constrained" model
## Constraints
C1 <- mxConstraint(a2 == 0)
C2 <- mxConstraint(a3 == 0)

## Add them to "Less Constrained" model
modelMC <- mxModel(modelLC, C1, C2)
modelMC <- mxModel(modelMC, name = "MC")       # Change its name 

## Run the MC model and get the summary
fitMC <- mxRun(modelMC)
summary(fitMC, refModels = mxRefModels(fitMC, run = TRUE))

## Contrast the two models, and campare with chi sq test in Table 21.6
anova(fitLC, fitMC)

## Effect sizes, and compare with values on p. 405
# Cut-and-paste means and variances to get effect sizes
d1 <- (0.6638 - 0) / sqrt(8.1346); d1    # "no strategy" vs "discussion"
d2 <- (1.9446 - 0) / sqrt(8.1346); d2    # "no strategy" vs "exercise"

## Better - extract latent means and error variances from "Less Constrained" model
## Already extracted above
d1 <- (latentMeans[1] - 0) / sqrt(latentVar); d1   # "no strategy" vs "discussion"
d2 <- (latentMeans[2] - 0) / sqrt(latentVar); d2   # "no strategy" vs "exercise"


#### Minimal constraints
# This section applies to the 2nd and 3rd rows in Table 21.6.
# And see the discussion at the end of page 406, and top of page 407.

## Get data into OpenMx format for each group
dataA <- mxData(observed = df[df$x == "a", c("x", "y1", "y2", "y3", "y4")], type = "raw")
dataB <- mxData(observed = df[df$x == "b", c("x", "y1", "y2", "y3", "y4")], type = "raw")
dataC <- mxData(observed = df[df$x == "c", c("x", "y1", "y2", "y3", "y4")], type = "raw")


### ANOVA model for 2nd row in Table 21.6
# Constrain 1st loading only to equality across groups
# and constrain 1st intercept only to equality across groups.

## "Less Constrained" model

manifest <- c("y1", "y2", "y3", "y4")

# Factor loadings - 1st loading constrained to 1
#                 - other loadings freely estimated across groups
loadsA <- mxPath(from = "LS", to = manifest, arrows = 1,
   free = c(FALSE, TRUE, TRUE, TRUE), values = 1,
   labels = c("l1", "l2A", "l3A", "l4A"))

loadsB <- mxPath(from = "LS", to = manifest, arrows = 1,
   free = c(FALSE, TRUE, TRUE, TRUE), values = 1,
   labels = c("l1", "l2B", "l3B", "l4B"))

loadsC <- mxPath(from = "LS", to = manifest, arrows = 1,
   free = c(FALSE, TRUE, TRUE, TRUE), values = 1,
   labels = c("l1", "l2C", "l3C", "l4C"))

# Factor variances - freely estimated across groups 
varFacA <- mxPath(from = "LS", arrows = 2,
   free = TRUE, values = 1, labels = "dA")

varFacB <- mxPath(from = "LS", arrows = 2,
   free = TRUE, values = 1, labels = "dB")

varFacC <- mxPath(from = "LS", arrows = 2,
   free = TRUE, values = 1, labels = "dC")

# Residual variances - freely estimated across groups
varResA <- mxPath(from = manifest, arrows = 2,
   free = TRUE, values = 1, 
   labels = c("e1A", "e2A", "e3A", "e4A"))

varResB <- mxPath(from = manifest, arrows = 2,
   free = TRUE, values = 1, 
   labels = c("e1B", "e2B", "e3B", "e4B"))

varResC <- mxPath(from = manifest, arrows = 2,
   free = TRUE, values = 1,
   labels = c("e1C", "e2C", "e3C", "e4C"))

# Intercepts - 1st intercept constrained to equality across groups
#            - other intercepts freely estimated across groups
interceptsA <- mxPath(from = "one", to = manifest, arrows = 1,
   free = TRUE, values = 1,
   labels = c("t1", "t2A", "t3A", "t4A"))

interceptsB <- mxPath(from = "one", to = manifest, arrows = 1,
   free = TRUE, values = 1,
   labels = c("t1", "t2B", "t3B", "t4B"))

interceptsC <- mxPath(from = "one", to = manifest, arrows = 1,
   free = TRUE, values = 1,
   labels = c("t1", "t2C", "t3C", "t4C"))

# Factor means - differ across the groups
meanA <- mxPath(from = "one", to = "LS", arrows = 1,
   free = FALSE, values = 0, labels = "aA")
meanB <- mxPath(from = "one", to = "LS", arrows = 1,
   free = TRUE, values = 1, labels = "aB")
meanC <- mxPath(from = "one", to = "LS", arrows = 1,
   free = TRUE, values = 1, labels = "aC")

# Setup the group models
modA <- mxModel("GrA", type = "RAM",
   manifestVars = manifest, latentVars = "LS",
   dataA, loadsA, varFacA, varResA, interceptsA, meanA)

modB <- mxModel("GrB", type = "RAM",
   manifestVars = manifest, latentVars = "LS",
   dataB, loadsB, varFacB, varResB, interceptsB, meanB)

modC <- mxModel("GrC", type = "RAM",
   manifestVars = manifest, latentVars = "LS",
   dataC, loadsC, varFacC, varResC, interceptsC, meanC)

# Combine the three models
fun <- mxFitFunctionMultigroup(c("GrA", "GrB", "GrC"))
modelLC <- mxModel("LC", modA, modB, modC, fun)

# Run the LC model and get the summary
fitLC <- mxRun(modelLC)

# Again trouble, but let OpenMx choose starting values
modelLC = mxAutoStart(modelLC)

fitLC <- mxRun(modelLC)
summary(fitLC, refModels = mxRefModels(fitLC, run = TRUE))
# All good

## More Consttained model
# Constraints
C1 = mxConstraint(aB == 0)
C2 = mxConstraint(aC == 0)

# Add them to less constrained model
modelMC = mxModel(modelLC, C1, C2)
modelMC = mxModel(modelMC, name = "MC")       # Change its name

# Run the MC model and get the summary
fitMC <- mxRun(modelMC)
summary(fitMC, refModels = mxRefModels(fitMC, run = TRUE))

## Contrast the two models, and campare with chi sq test in 2dn row in Table 21.6
anova(fitLC, fitMC)

## Get latent means and variances, and compare with "All measures" row in Table 21.6
estimates <- coef(fitLC)

latentMeans <- coef(fitLC)[c("aB", "aC")]; latentMeans
latentVar <- coef(fitLC)[c("dA", "dB", "dC")]; latentVar


## Section of Table 21.6 headed "Results for measures with invariant parameters"
# Sample statistics - get means and variances of 1st indicator for the 3 groups
dfY1 <- split(df$y1, df$x)
meansY1 <- do.call(cbind, lapply(dfY1, mean)); meansY1
varY1 <- do.call(cbind, lapply(dfY1, var)); varY1

# Note: means agree with means in Table 21.6, but variances don't.
# To get TLG variances, the denominator in the variance formula needs to be n, not (n-1)

# Get the n's
n <- do.call(cbind, lapply(dfY1, length)); n

# Adjust variance calculation
varY1 <- varY1 * (n - 1) / n; varY1   # All good


# Differences between y1 means
meansY1[2] - meansY1[1]
meansY1[3] - meansY1[1]

# These equal the latent means; compare with latent means
latentMeans

# Alternatively, the y1 intercepts (which are constrained to equality)
# added to the latent means give the y1 Means
intercepts <- coef(fitLC)["t1"]; intercepts
intercepts + latentMeans; meansY1


# Extract residual variances for y1 from estimates
residVarY1 <- coef(fitLC)[c("e1A", "e1B", "e1C")]   # Compare with 2nd row in Table 21.6

# Differences between y1 variances and y1 residual variances
varY1 - residVarY1

# These are latent variances - Compare with the latent variances
latentVar




## ANOVA model for 3rd row in Table 21.6
# Constrain 2nd loading to equality across groups
# and constrain 2nd intercept to equality across groups

## "Less Constrained" model

manifest <- c("y1", "y2", "y3", "y4")

# Factor loadings
loadsA <- mxPath(from = "LS", to = manifest, arrows = 1,
   free = c(TRUE, FALSE, TRUE, TRUE), values = 1,
   labels = c("l1A", "l2", "l3A", "l4A"))

loadsB <- mxPath(from = "LS", to = manifest, arrows = 1,
   free = c(TRUE, FALSE, TRUE, TRUE), values = 1,
   labels = c("l1B", "l2", "l3B", "l4B"))

loadsC <- mxPath(from = "LS", to = manifest, arrows = 1,
   free = c(TRUE, FALSE, TRUE, TRUE), values = 1,
   labels = c("l1C", "l2", "l3C", "l4C"))

# Factor variances - equal across groups
varFacA <- mxPath(from = "LS", arrows = 2,
   free = TRUE, values = 1, labels = "dA")

varFacB <- mxPath(from = "LS", arrows = 2,
   free = TRUE, values = 1, labels = "dB")

varFacC <- mxPath(from = "LS", arrows = 2,
   free = TRUE, values = 1, labels = "dC")

# Residual variances - equal across groups
varResA <- mxPath(from = manifest, arrows = 2,
   free = TRUE, values = 1, 
   labels = c("e1A", "e2A", "e3A", "e4A"))

varResB <- mxPath(from = manifest, arrows = 2,
   free = TRUE, values = 1, 
   labels = c("e1B", "e2B", "e3B", "e4B"))

varResC <- mxPath(from = manifest, arrows = 2,
   free = TRUE, values = 1, 
   labels = c("e1C", "e2C", "e3C", "e4C"))

# Intercepts - equal across groups
interceptsA <- mxPath(from = "one", to = manifest, arrows = 1,
   free = TRUE, values = 1,
   labels = c("t1A", "t2", "t3A", "t4A"))

interceptsB <- mxPath(from = "one", to = manifest, arrows = 1,
   free = TRUE, values = 1,
   labels = c("t1B", "t2", "t3B", "t4B"))

interceptsC <- mxPath(from = "one", to = manifest, arrows = 1,
   free = TRUE, values = 1,
   labels = c("t1C", "t2", "t3C", "t4C"))

# Factor mean - differ across the groups
meanA <- mxPath(from = "one", to = "LS", arrows = 1,
   free = FALSE, values = 0, labels = "aA")
meanB <- mxPath(from = "one", to = "LS", arrows = 1,
   free = TRUE, values = 1, labels = "aB")
meanC <- mxPath(from = "one", to = "LS", arrows = 1,
   free = TRUE, values = 1, labels = "aC")

# Setup the group models
modA <- mxModel("GrA", type = "RAM",
   manifestVars = manifest, latentVars = "LS",
   dataA, loadsA, varFacA, varResA, interceptsA, meanA)

modB <- mxModel("GrB", type = "RAM",
   manifestVars = manifest, latentVars = "LS",
   dataB, loadsB, varFacB, varResB, interceptsB, meanB)

modC <- mxModel("GrC", type = "RAM",
   manifestVars = manifest, latentVars = "LS",
   dataC, loadsC, varFacC, varResC, interceptsC, meanC)

# Combine the three models
fun <- mxFitFunctionMultigroup(c("GrA", "GrB", "GrC"))
modelLC <- mxModel("LC", modA, modB, modC, fun)

# Run the LC model and get the summary
fitLC <- mxRun(modelLC)

# Again, let OpenMx choose starting values
modelLC = mxAutoStart(modelLC)

fitLC <- mxRun(modelLC)
summary(fitLC, refModels = mxRefModels(fitLC, run = TRUE))
# All good

## More Consttained model
# Constraints
C1 <- mxConstraint(aB == 0)
C2 <- mxConstraint(aC == 0)

# Add them to less constrained model
modelMC <- mxModel(modelLC, C1, C2)
modelMC <- mxModel(modelMC, name = "MC")       # Change its name 

# Run the MC model and get the summary
fitMC <- mxRun(modelMC)
summary(fitMC, refModels = mxRefModels(fitMC, run = TRUE))

## Contrast the two models, and campare with chi sq test in 2nd row in Table 21.6
anova(fitLC, fitMC)

## Get latent means and variance, and compare with "All measures" row in Table 21.6
estimates <- coef(fitLC)

latentMeans <- coef(fitLC)[c("aB", "aC")]; latentMeans
latentVar <- coef(fitLC)[c("dA", "dB", "dC")]; latentVar


## Section of Table 21.6 headed "Results for measures with invariant parameters"
# Sample statistics - get means and variances of 1st indicator for the 3 groups
dfY2 <- split(df$y2, df$x)
meansY2 <- do.call(cbind, lapply(dfY2, mean)); meansY2
varY2 <- do.call(cbind, lapply(dfY2, var)); varY2

n <- do.call(cbind, lapply(dfY2, length)); n

# Adjust variance calculation
varY2 <- varY2 * (n - 1) / n; varY2   # All good

# Differences between y1 means
meansY2[2] - meansY2[1]
meansY2[3] - meansY2[1]

# These equal the latent means; compare with latent means
latentMeans

# Alternatively, the y1 intercepts (which are constrained to equality)
# added to the latent means give the y2 Means
intercepts <- coef(fitLC)["t2"]; intercepts
intercepts + latentMeans; meansY2

# Extract residual variances for y1 from estimates
residVarY2 <- coef(fitLC)[c("e2A", "e2B", "e2C")]   # Compare with 2nd row in Table 21.6

# Differences between y2 variances and y2 residual variances
varY2 - residVarY2

# These are the latent variances - Compare with the latent variances
latentVar