Testing EdSurvey, WeMix, Dire, MICE and Other Important R packages with Sample Educational Research PART 2
Student Cognitive Data (School Average)
'data.frame': 164 obs. of 9 variables:
$ SN : int 1 2 3 4 5 6 7 8 9 10 ...
$ cntschid: int 84000001 84000002 84000004 84000005 84000006 84000007 84000008 84000009 84000010 84000011 ...
$ N : int 28 36 35 28 1 6 27 30 30 35 ...
$ WTD_N : num 17410 19312 15465 26753 820 ...
$ PCT : num 0.489 0.543 0.435 0.752 0.023 ...
$ SE.PCT. : num 0.4961 0.4772 0.442 0.7666 0.0178 ...
$ MEAN : num 510 446 435 501 260 ...
$ SE.MEAN.: chr "6.383587" "5.111687" "4.00493" "5.773581" ...
$ X : num 500 NA NA NA NA ...
### c. Changing Names of the Students' Cognitive Scores
<- teacher_2 |>
teacher_2_sub select(cntschid, MEAN) |>
mutate(
school_id = cntschid, reading_score = MEAN,
cntschid = NULL, MEAN = NULL
)# summary(teacher_2_sub)
### d. Changing Types of Certain Variables
$school_id <- as.factor(teacher_2_sub$school_id)
teacher_2_sub$school_id <- as.factor(teacher_1$CNTSCHID)
teacher_1# str(teacher_1$school_id)
# str(teacher_2_sub$school_id)
Working on Teacher Only Dataset
<- teacher_1 |>
teacher_1_sub select(school_id, CNTTCHID, TEACHERID, TC_SEX, TC_AGE, TC_EMPLST, TC_TCEXP, TC_TEDU, TC_INITQUAL, TC_PD, TC_WORKSHOP, TC_TRAINING, EMPLTIM) |>
::clean_names() |>
janitormutate(
teacher_type = as.factor(teacherid),
teacher_id = as.factor(cnttchid),
tc_sex = as.factor(tc_sex),
tc_age = as.double(tc_age),
tc_emplst = as.factor(tc_emplst),
tc_tcexp = as.double(tc_tcexp),
tc_tedu = as.factor(tc_tedu),
tc_initqual = as.factor(tc_initqual),
tc_pd = as.factor(tc_pd),
tc_workshop = as.factor(tc_workshop),
tc_training = as.factor(tc_training),
empltim = as.factor(empltim),
teacherid = NULL
)str(teacher_1_sub)
tibble [3,526 × 14] (S3: tbl_df/tbl/data.frame)
$ school_id : Factor w/ 158 levels "84000001","84000002",..: 132 10 74 72 147 120 147 69 81 71 ...
$ cnttchid : num [1:3526] 8.4e+07 8.4e+07 8.4e+07 8.4e+07 8.4e+07 ...
..- attr(*, "label")= chr "Intl. Teacher ID"
..- attr(*, "format.spss")= chr "F8.0"
..- attr(*, "display_width")= int 10
$ tc_sex : Factor w/ 2 levels "1","2": 2 2 2 1 1 1 NA 2 2 1 ...
$ tc_age : num [1:3526] 46 58 32 34 22 38 NA 24 41 26 ...
$ tc_emplst : Factor w/ 4 levels "1","2","3","4": 1 4 1 1 1 1 NA 1 1 1 ...
$ tc_tcexp : num [1:3526] 22 36 4 13 2 11 NA 1 17 5 ...
$ tc_tedu : Factor w/ 3 levels "1","2","3": 2 2 1 3 3 2 NA 1 2 2 ...
$ tc_initqual : Factor w/ 5 levels "1","2","3","4",..: 1 1 2 1 1 1 NA 3 1 1 ...
$ tc_pd : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 NA 1 1 1 ...
$ tc_workshop : Factor w/ 2 levels "1","2": NA 1 1 1 NA 1 NA NA NA 1 ...
$ tc_training : Factor w/ 2 levels "1","2": NA 2 1 NA NA 2 NA NA NA 1 ...
$ empltim : Factor w/ 2 levels "1","2": 1 2 1 1 1 1 NA 1 1 1 ...
$ teacher_type: Factor w/ 2 levels "4","5": 1 2 2 2 1 2 NA 1 1 2 ...
$ teacher_id : Factor w/ 3526 levels "84000001","84000002",..: 1 2 3 4 5 6 7 8 9 10 ...
#### e.1. Summarizing the data
# summary(teacher_1_sub)
### f. Merging Data Tables, and Getting Data Table Ready
<- merge(teacher_1_sub, teacher_2_sub, by = "school_id")
teacher_data
### Changing to Data Frame and Getting Rid of Repeated Column
<- data.frame(teacher_data) |>
teacher_data mutate(cnttchid = NULL) |>
select(
school_id, teacher_id, tc_sex, tc_age, teacher_type, empltim, tc_emplst,
tc_tcexp, tc_tedu, tc_initqual, tc_training, tc_workshop, tc_pd, reading_score
)
# Getting Rid of First Four Digits from 'teacher_id' and 'school_id'
$school_id <- stringr::str_sub(teacher_data$school_id, -4, -1)
teacher_data$teacher_id <- stringr::str_sub(teacher_data$teacher_id, -4, -1)
teacher_data
# Getting Rid of Leading Zeros
$school_id <- stringr::str_remove(teacher_data$school_id, "^0+")
teacher_data$teacher_id <- stringr::str_remove(teacher_data$teacher_id, "^0+")
teacher_data
# summary(teacher_data$school_id)
# tail(teacher_data)
# writing the merged data to local directory
# write.csv(teacher_data, "teacher_data_final.csv", row.names = FALSE)
Getting Descriptive Information using Teacher ID
<- teacher_data |>
total_teachers_by_schools select(school_id, teacher_id) |>
group_by(school_id) |>
count() |>
rename(total_teachers = n) |>
mutate(n = NULL) |>
arrange(desc(total_teachers), desc(as.numeric(school_id))) |>
data.frame()
head(total_teachers_by_schools)
school_id total_teachers
1 173 25
2 171 25
3 169 25
4 168 25
5 164 25
6 163 25
Pairwise Correlation Among Variables
i. Among Age, Gender, Teacher Type, Employment Status (Full vs Part-time) & Reading Scores
<- GGally::ggpairs(teacher_data[, c(3:6, 14)])
t_cor t_cor
ii. Among Employment Status, Experience, Teacher Education, Initial Qualification, & Reading Scores
<- GGally::ggpairs(teacher_data[, c(7:10, 14)])
t_cor1 t_cor1
iii. Among Training, Workshop, Professional Development, & Reading Scores
<- GGally::ggpairs(teacher_data[, c(11:14)])
t_cor2 t_cor2
iv. Correlation Statistics Rounded to Two Decimal Places
<- Hmisc::rcorr(as.matrix.data.frame(teacher_data[, -c(1:2)]))
t_corr3 # t_corr3
round(t_corr3$r, digits = 2)
tc_sex tc_age teacher_type empltim tc_emplst tc_tcexp tc_tedu
tc_sex 1.00 0.06 0.19 -0.02 0.00 0.05 0.00
tc_age 0.06 1.00 0.08 0.07 0.08 0.76 0.01
teacher_type 0.19 0.08 1.00 0.03 0.04 0.05 0.03
empltim -0.02 0.07 0.03 1.00 0.92 0.04 0.00
tc_emplst 0.00 0.08 0.04 0.92 1.00 0.05 0.02
tc_tcexp 0.05 0.76 0.05 0.04 0.05 1.00 0.03
tc_tedu 0.00 0.01 0.03 0.00 0.02 0.03 1.00
tc_initqual 0.05 -0.02 0.04 0.00 0.02 -0.19 0.09
tc_training -0.03 0.05 NaN 0.06 0.06 0.09 0.02
tc_workshop 0.06 0.03 NaN 0.14 0.13 0.05 -0.01
tc_pd 0.04 0.04 0.00 0.11 0.15 0.05 0.01
reading_score 0.01 0.05 -0.02 0.11 0.09 0.10 -0.04
tc_initqual tc_training tc_workshop tc_pd reading_score
tc_sex 0.05 -0.03 0.06 0.04 0.01
tc_age -0.02 0.05 0.03 0.04 0.05
teacher_type 0.04 NaN NaN 0.00 -0.02
empltim 0.00 0.06 0.14 0.11 0.11
tc_emplst 0.02 0.06 0.13 0.15 0.09
tc_tcexp -0.19 0.09 0.05 0.05 0.10
tc_tedu 0.09 0.02 -0.01 0.01 -0.04
tc_initqual 1.00 -0.07 0.00 -0.02 -0.07
tc_training -0.07 1.00 0.06 0.03 0.02
tc_workshop 0.00 0.06 1.00 0.07 0.00
tc_pd -0.02 0.03 0.07 1.00 0.05
reading_score -0.07 0.02 0.00 0.05 1.00
v. Assocaited P-Values for Correlation Statistics Rounded to Three Decimal Places
round(t_corr3$P, digits = 3)
tc_sex tc_age teacher_type empltim tc_emplst tc_tcexp tc_tedu
tc_sex NA 0.002 0.000 0.224 0.821 0.009 0.986
tc_age 0.002 NA 0.000 0.000 0.000 0.000 0.761
teacher_type 0.000 0.000 NA 0.175 0.035 0.006 0.084
empltim 0.224 0.000 0.175 NA 0.000 0.049 0.922
tc_emplst 0.821 0.000 0.035 0.000 NA 0.005 0.388
tc_tcexp 0.009 0.000 0.006 0.049 0.005 NA 0.112
tc_tedu 0.986 0.761 0.084 0.922 0.388 0.112 NA
tc_initqual 0.004 0.353 0.019 0.984 0.348 0.000 0.000
tc_training 0.152 0.023 NaN 0.021 0.017 0.000 0.339
tc_workshop 0.007 0.193 NaN 0.000 0.000 0.027 0.625
tc_pd 0.057 0.034 0.817 0.000 0.000 0.008 0.730
reading_score 0.530 0.008 0.363 0.000 0.000 0.000 0.030
tc_initqual tc_training tc_workshop tc_pd reading_score
tc_sex 0.004 0.152 0.007 0.057 0.530
tc_age 0.353 0.023 0.193 0.034 0.008
teacher_type 0.019 NaN NaN 0.817 0.363
empltim 0.984 0.021 0.000 0.000 0.000
tc_emplst 0.348 0.017 0.000 0.000 0.000
tc_tcexp 0.000 0.000 0.027 0.008 0.000
tc_tedu 0.000 0.339 0.625 0.730 0.030
tc_initqual NA 0.002 0.985 0.399 0.000
tc_training 0.002 NA 0.008 0.199 0.330
tc_workshop 0.985 0.008 NA 0.002 0.842
tc_pd 0.399 0.199 0.002 NA 0.013
reading_score 0.000 0.330 0.842 0.013 NA
# t_corr3$n
# ++++++++++++++++++++++++++++
# flattenCorrMatrix
# ++++++++++++++++++++++++++++
# cormat : matrix of the correlation coefficients
# pmat : matrix of the correlation p-values
# flattenCorrMatrix <- function(cormat, pmat) {
# ut <- upper.tri(cormat)
# data.frame(
# row = rownames(cormat)[row(cormat)[ut]],
# column = rownames(cormat)[col(cormat)[ut]],
# cor =(cormat)[ut],
# p = pmat[ut]
# )
# }
# flattenCorrMatrix(t_corr3$r, t_corr3$p)
vi. Creating a Copy of “teacher_data”, Running Missing Data Analysis, and Imputing Data Using the “MICE” Package
library(mice)
library(ggmice)
# summary(teacher_data)
# class(teacher_data)
<- teacher_data
lit_data
# Patterns of Missing Data Variable-wise
plot_pattern(lit_data)
# Or
::aggr(lit_data, col = c("dodgerblue", "maroon"), numbers = TRUE, sortVars = TRUE, labels = names(lit_data), cex.axis = .7, gap = 3, ylab = c("Histogram of missing data", "Pattern")) VIM
Variables sorted by number of missings:
Variable Count
tc_training 0.5065230
tc_workshop 0.4994328
tc_tcexp 0.2101531
tc_sex 0.2050482
tc_pd 0.2047646
tc_age 0.1985252
tc_tedu 0.1979580
tc_initqual 0.1973908
empltim 0.1965400
tc_emplst 0.1965400
teacher_type 0.1885990
school_id 0.0000000
teacher_id 0.0000000
reading_score 0.0000000
# MissMech::TestMCARNormality(data = teacher_data, alpha = 0.05)
# naniar package tests for the MCAR Normality
::mcar_test(lit_data) naniar
# A tibble: 1 × 4
statistic df p.value missing.patterns
<dbl> <dbl> <dbl> <int>
1 3347. 293 0 29
Use Little’s (1988) test statistic to assess if data is missing completely at random (MCAR). The null hypothesis for this test is: the data is MCAR, and the test statistic is a chi-squared (mcar_test
of the lit_data
. Given the high statistic value and low p-value, i.e., lit_data
data is not missing completely at random.
h.vii. Imputing the Missing Data Using Multivariate Imputation by Chained Equations (MICE) Package
General Structure:
Where,
- m = number of desired imputed data (default
) - seed = offsetting the random number generator
- method = imputation method to be used for each column in data
- predictorMatrix = data specifying the set of predictors to be used for each target column
- ignore = A logical vector of nrow(data) elements indicating which rows are ignored when creating the imputation model. The default NULL includes all rows that have an observed value of the variable to imputed
- where = A data frame or matrix with logicals of the same dimensions as data indicating where in the data the imputations should be created. The default, where = is.na(data), specifies that the missing data should be imputed.
- maxit = the number of iterations. The default is 5.
- printFlag = If TRUE, mice prints history on console
- data.init = A data frame of the same size and type as data, without missing data, used to initialize imputations before the start of the iterative process.
<- mice::mice(lit_data, m = 5, maxit = 50, method = "pmm", seed = 0922, printFlag = FALSE)
imp_data summary(imp_data)
Class: mids
Number of multiple imputations: 5
Imputation methods:
school_id teacher_id tc_sex tc_age teacher_type
"" "" "pmm" "pmm" "pmm"
empltim tc_emplst tc_tcexp tc_tedu tc_initqual
"pmm" "pmm" "pmm" "pmm" "pmm"
tc_training tc_workshop tc_pd reading_score
"pmm" "pmm" "pmm" ""
PredictorMatrix:
school_id teacher_id tc_sex tc_age teacher_type empltim tc_emplst
school_id 0 0 1 1 1 1 1
teacher_id 0 0 1 1 1 1 1
tc_sex 0 0 0 1 1 1 1
tc_age 0 0 1 0 1 1 1
teacher_type 0 0 1 1 0 1 1
empltim 0 0 1 1 1 0 1
tc_tcexp tc_tedu tc_initqual tc_training tc_workshop tc_pd
school_id 1 1 1 1 1 1
teacher_id 1 1 1 1 1 1
tc_sex 1 1 1 1 1 1
tc_age 1 1 1 1 1 1
teacher_type 1 1 1 1 1 1
empltim 1 1 1 1 1 1
reading_score
school_id 1
teacher_id 1
tc_sex 1
tc_age 1
teacher_type 1
empltim 1
Number of logged events: 2247
it im dep meth out
1 0 0 constant school_id
2 0 0 constant teacher_id
3 1 1 tc_sex pmm empltim2
4 1 1 tc_age pmm empltim2
5 1 1 tc_tcexp pmm empltim2
6 1 1 tc_tedu pmm empltim2
# checking the imputation method
A new data set is created, i.e., imp_data
and it has stored all five newly imputed data set. We can check the values simply by using imp$variable_name
function in the {mice} package. There are 3526 rows, thus, I just want to show top 6 and bottom 6 values to same some space.
rbind(head(imp_data$imp$tc_training), tail(imp_data$imp$tc_training))
1 2 3 4 5
6 1 2 2 2 2
7 1 2 1 2 1
13 2 2 2 2 2
15 2 2 2 2 2
19 2 1 2 2 2
20 1 2 2 2 2
3520 1 2 2 2 2
3521 2 2 2 2 2
3522 2 1 2 2 2
3523 2 2 2 1 1
3525 1 1 1 2 2
3526 2 2 2 2 2
I used the predicted mean matching (PMM) method to complete the multiple imputation in my data. It is one of the multivariate imputation methods for multiple imputation, in which the missing data are imputed conditional on all other variables [for further information: Austin, P. C.,White, I. R., Lee, D. S., & Buuren, S. (2021). Missing data in clinical research: A tutorial on multiple imputation. Canadian Journal of Cardiology, 37(9), 1322-1331. https://doi.org/10.1016/j.cjca.2020.11.010]. The predictive-mean matching imputation process randomly selects a value from a pool of closest values based on the fitted regression coefficients, conducted using the observed empirical distribution. The general recommendation is that identifying 10 closest values should perform well (Morris, White, & Royston, 2014). Checking, if the requested imputation method is really implemented.
$meth imp_data
school_id teacher_id tc_sex tc_age teacher_type
"" "" "pmm" "pmm" "pmm"
empltim tc_emplst tc_tcexp tc_tedu tc_initqual
"pmm" "pmm" "pmm" "pmm" "pmm"
tc_training tc_workshop tc_pd reading_score
"pmm" "pmm" "pmm" ""
h.viii. Comparing Imputed Values with the Original Values Using Plots
It is essential to compare the imputed data with the original data set. There are many things we could do, but I would like to check how the distribution of variables compare (imputed vs. original) while predicting reading_scores
using all variables.
# Original Mice Plot (blue = original data, red = imputed data)
densityplot(imp_data, ~tc_sex)
# GGmice Option
::plot_trace(imp_data, "tc_tcexp") ggmice
# GGmice Option 2
::ggmice(
ggmice
imp_data,::aes(x = tc_workshop, y = reading_score)
ggplot2+
) ::geom_jitter(width = 0.25) +
ggplot2::labs(x = "Teacher Workshop Status") ggplot2
# GG Mice option 3
# ggmice::plot_flux(imp_data$tc_age)
Looking at the plots above, the imputed data seem to match with the original data and the original distribution patterns. The next task is to check the linear regression estimates using the pooled imputed data.
<- with(imp_data, lm(reading_score ~ tc_sex + tc_age + teacher_type + empltim + tc_tcexp + tc_tedu + tc_initqual + tc_training + tc_workshop + tc_pd))
impute_lm # Saving the statistics in a vector
<- summary(impute_lm)
individual_lm # Changing the vector to a data.frame and printing the output
data.frame(individual_lm)
term estimate std.error statistic p.value nobs
1 (Intercept) 511.90885857 4.3406695 117.93315743 0.000000e+00 3526
2 tc_sex2 2.08453457 1.7306907 1.20445243 2.284959e-01 3526
3 tc_age -0.37613349 0.1142005 -3.29362502 9.988484e-04 3526
4 teacher_type5 -2.80749273 1.7220308 -1.63033831 1.031197e-01 3526
5 empltim2 32.29931951 5.9756111 5.40519099 6.905485e-08 3526
6 tc_tcexp 0.75608654 0.1379961 5.47904413 4.577232e-08 3526
7 tc_tedu2 -2.09500619 2.1547498 -0.97227352 3.309815e-01 3526
8 tc_tedu3 -12.72077831 3.5556947 -3.57757893 3.514742e-04 3526
9 tc_initqual2 -6.40761795 4.2536070 -1.50639631 1.320554e-01 3526
10 tc_initqual3 -2.14040770 3.0444017 -0.70306350 4.820628e-01 3526
11 tc_initqual4 -0.72309445 8.1976233 -0.08820782 9.297165e-01 3526
12 tc_initqual5 5.55895551 4.2313425 1.31375691 1.890139e-01 3526
13 tc_training2 1.83338002 1.9278483 0.95099808 3.416708e-01 3526
14 tc_workshop2 -4.09149985 3.3048286 -1.23803691 2.157852e-01 3526
15 tc_pd2 8.66980014 6.3688724 1.36127710 1.735135e-01 3526
16 (Intercept) 514.89379018 4.4201864 116.48689472 0.000000e+00 3526
17 tc_sex2 3.17516043 1.7485779 1.81585297 6.947819e-02 3526
18 tc_age -0.44330092 0.1180818 -3.75418535 1.767225e-04 3526
19 teacher_type5 -1.69532989 1.7387403 -0.97503340 3.296109e-01 3526
20 empltim2 31.28540036 5.8108266 5.38398451 7.763567e-08 3526
21 tc_tcexp 0.80183953 0.1412179 5.67803122 1.473182e-08 3526
22 tc_tedu2 -1.81389918 2.1894399 -0.82847635 4.074571e-01 3526
23 tc_tedu3 -11.66316587 3.6147970 -3.22650642 1.264624e-03 3526
24 tc_initqual2 -4.22227672 4.4019825 -0.95917618 3.375360e-01 3526
25 tc_initqual3 -2.44219160 3.1308915 -0.78003074 4.354254e-01 3526
26 tc_initqual4 -2.74618000 8.2950914 -0.33106085 7.406183e-01 3526
27 tc_initqual5 -2.27118396 4.2522523 -0.53411317 5.932971e-01 3526
28 tc_training2 -1.05433291 1.8742520 -0.56253529 5.737873e-01 3526
29 tc_workshop2 -0.02593619 3.3177096 -0.00781750 9.937630e-01 3526
30 tc_pd2 8.65706297 6.3252586 1.36864965 1.711964e-01 3526
31 (Intercept) 510.76693745 4.3879418 116.40239669 0.000000e+00 3526
32 tc_sex2 3.55720955 1.7428283 2.04105561 4.131993e-02 3526
33 tc_age -0.30515132 0.1178016 -2.59038327 9.626556e-03 3526
34 teacher_type5 -2.71490897 1.7394124 -1.56081964 1.186564e-01 3526
35 empltim2 26.82895259 5.9968094 4.47387116 7.925127e-06 3526
36 tc_tcexp 0.64971318 0.1406016 4.62095020 3.957176e-06 3526
37 tc_tedu2 -1.65550326 2.1531696 -0.76886803 4.420234e-01 3526
38 tc_tedu3 -13.10850548 3.6170790 -3.62405834 2.941271e-04 3526
39 tc_initqual2 -11.00395055 4.4099914 -2.49523174 1.263307e-02 3526
40 tc_initqual3 -4.97746026 3.0950802 -1.60818457 1.078847e-01 3526
41 tc_initqual4 -8.16596544 8.0702978 -1.01185429 3.116775e-01 3526
42 tc_initqual5 4.54703352 4.3065120 1.05585067 2.911091e-01 3526
43 tc_training2 0.66146186 1.9089991 0.34649668 7.289902e-01 3526
44 tc_workshop2 -0.90841135 3.3026833 -0.27505252 7.832921e-01 3526
45 tc_pd2 9.83807800 6.1367162 1.60315022 1.089914e-01 3526
46 (Intercept) 512.27121626 4.4491614 115.13882500 0.000000e+00 3526
47 tc_sex2 1.73967465 1.7426714 0.99828036 3.182122e-01 3526
48 tc_age -0.35608005 0.1180540 -3.01624774 2.577582e-03 3526
49 teacher_type5 -1.41101371 1.7408511 -0.81053096 4.176900e-01 3526
50 empltim2 28.63788398 5.9136090 4.84270841 1.336104e-06 3526
51 tc_tcexp 0.80671212 0.1415576 5.69882572 1.305769e-08 3526
52 tc_tedu2 -1.99106737 2.1816628 -0.91263752 3.614959e-01 3526
53 tc_tedu3 -8.05202480 3.5765285 -2.25135203 2.442484e-02 3526
54 tc_initqual2 -10.18334214 4.4045564 -2.31200174 2.083518e-02 3526
55 tc_initqual3 -3.07394846 3.1169252 -0.98621183 3.240971e-01 3526
56 tc_initqual4 -9.22684503 8.7602841 -1.05325866 2.922949e-01 3526
57 tc_initqual5 -1.11919303 4.2635028 -0.26250552 7.929471e-01 3526
58 tc_training2 -1.19757269 1.9518245 -0.61356575 5.395421e-01 3526
59 tc_workshop2 -7.63274943 3.1759668 -2.40328375 1.630013e-02 3526
60 tc_pd2 13.61656887 6.2092629 2.19294450 2.837676e-02 3526
61 (Intercept) 512.21821972 4.4474926 115.17011229 0.000000e+00 3526
62 tc_sex2 2.40201028 1.7417580 1.37907237 1.679603e-01 3526
63 tc_age -0.29579032 0.1159539 -2.55093030 1.078565e-02 3526
64 teacher_type5 -1.77156099 1.7344805 -1.02137846 3.071455e-01 3526
65 empltim2 31.84410927 5.7974957 5.49273534 4.238855e-08 3526
66 tc_tcexp 0.70113390 0.1392521 5.03499801 5.018951e-07 3526
67 tc_tedu2 -3.72343937 2.1742933 -1.71248253 8.689608e-02 3526
68 tc_tedu3 -13.82443024 3.6750950 -3.76165245 1.715486e-04 3526
69 tc_initqual2 -10.52442473 4.1514416 -2.53512529 1.128376e-02 3526
70 tc_initqual3 -3.58855440 3.1282773 -1.14713438 2.514043e-01 3526
71 tc_initqual4 -9.94930433 8.1830466 -1.21584354 2.241263e-01 3526
72 tc_initqual5 2.25413948 4.3995203 0.51236029 6.084311e-01 3526
73 tc_training2 -0.61834373 1.9514078 -0.31687058 7.513606e-01 3526
74 tc_workshop2 -7.18946375 3.4280260 -2.09726056 3.604186e-02 3526
75 tc_pd2 9.87447139 6.1155833 1.61464096 1.064783e-01 3526
The above output shows the linear regression estimates of all imputed data sets (1:5). There were 5-imputed data sets and each data had 15-rows of output. The reason of printing them is to compare them with the pooled statistics, and the original data statistics and finding the best to replace missing values in the original data set.
h.vix. Checking the Pooled Statistics Based on All of the Above Statistics
summary(pool(impute_lm))
term estimate std.error statistic df p.value
1 (Intercept) 512.41180444 4.7111097 108.76668926 239.73637 5.055838e-206
2 tc_sex2 2.59171789 1.9286556 1.34379510 112.47803 1.817192e-01
3 tc_age -0.35529122 0.1338633 -2.65413385 68.61951 9.873717e-03
4 teacher_type5 -2.08006126 1.8701686 -1.11223193 193.19485 2.674205e-01
5 empltim2 30.17913314 6.4366539 4.68863693 148.46050 6.177823e-06
6 tc_tcexp 0.74309705 0.1583701 4.69215351 82.34731 1.062729e-05
7 tc_tedu2 -2.25578307 2.3566180 -0.95721203 164.53298 3.398646e-01
8 tc_tedu3 -11.87378094 4.3848969 -2.70788144 37.74662 1.011717e-02
9 tc_initqual2 -8.46832242 5.4288808 -1.55986524 29.60041 1.294208e-01
10 tc_initqual3 -3.24451249 3.3368165 -0.97233769 204.45842 3.320315e-01
11 tc_initqual4 -6.16227785 9.4693994 -0.65075699 73.02423 5.172458e-01
12 tc_initqual5 1.79395030 5.7014477 0.31464821 21.05426 7.561257e-01
13 tc_training2 -0.07508149 2.3891200 -0.03142642 31.81691 9.751258e-01
14 tc_workshop2 -3.96961211 5.0549291 -0.78529531 12.12450 4.473548e-01
15 tc_pd2 10.13119627 6.6196468 1.53047385 281.39327 1.270229e-01
h.vxi. Replacing the Missing Values and Running the Same Regression to Compare Values and Running the Same Regression
<- complete(imp_data, 1)
complete_data_1 <- complete(imp_data, 2)
complete_data_2 <- complete(imp_data, 3)
complete_data_3 <- complete(imp_data, 4)
complete_data_4 <- complete(imp_data, 5)
complete_data_5 # write.csv(complete_data_1, "imputed_teacher_data.csv", row.names = FALSE)
h.vxi.a. Based on Complete Data 1
# First Complete Dataset
<- with(complete_data_1, lm(reading_score ~ tc_sex + tc_age + teacher_type + empltim + tc_tcexp + tc_tedu + tc_initqual + tc_training + tc_workshop + tc_pd))
complete_lm_1 summary(complete_lm_1)
Call:
lm(formula = reading_score ~ tc_sex + tc_age + teacher_type +
empltim + tc_tcexp + tc_tedu + tc_initqual + tc_training +
tc_workshop + tc_pd)
Residuals:
Min 1Q Median 3Q Max
-257.176 -34.577 -0.439 32.648 113.452
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 511.9089 4.3407 117.933 < 2e-16 ***
tc_sex2 2.0845 1.7307 1.204 0.228496
tc_age -0.3761 0.1142 -3.294 0.000999 ***
teacher_type5 -2.8075 1.7220 -1.630 0.103120
empltim2 32.2993 5.9756 5.405 6.91e-08 ***
tc_tcexp 0.7561 0.1380 5.479 4.58e-08 ***
tc_tedu2 -2.0950 2.1547 -0.972 0.330981
tc_tedu3 -12.7208 3.5557 -3.578 0.000351 ***
tc_initqual2 -6.4076 4.2536 -1.506 0.132055
tc_initqual3 -2.1404 3.0444 -0.703 0.482063
tc_initqual4 -0.7231 8.1976 -0.088 0.929717
tc_initqual5 5.5590 4.2313 1.314 0.189014
tc_training2 1.8334 1.9278 0.951 0.341671
tc_workshop2 -4.0915 3.3048 -1.238 0.215785
tc_pd2 8.6698 6.3689 1.361 0.173513
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 48.51 on 3511 degrees of freedom
Multiple R-squared: 0.02575, Adjusted R-squared: 0.02186
F-statistic: 6.628 on 14 and 3511 DF, p-value: 1.776e-13
h.vxi.b. Based on Complete Data 2
# Second Complete Dataset
<- with(complete_data_2, lm(reading_score ~ tc_sex + tc_age + teacher_type + empltim + tc_tcexp + tc_tedu + tc_initqual + tc_training + tc_workshop + tc_pd))
complete_lm_2 summary(complete_lm_2)
Call:
lm(formula = reading_score ~ tc_sex + tc_age + teacher_type +
empltim + tc_tcexp + tc_tedu + tc_initqual + tc_training +
tc_workshop + tc_pd)
Residuals:
Min 1Q Median 3Q Max
-256.072 -35.040 -1.115 32.738 116.559
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 514.89379 4.42019 116.487 < 2e-16 ***
tc_sex2 3.17516 1.74858 1.816 0.069478 .
tc_age -0.44330 0.11808 -3.754 0.000177 ***
teacher_type5 -1.69533 1.73874 -0.975 0.329611
empltim2 31.28540 5.81083 5.384 7.76e-08 ***
tc_tcexp 0.80184 0.14122 5.678 1.47e-08 ***
tc_tedu2 -1.81390 2.18944 -0.828 0.407457
tc_tedu3 -11.66317 3.61480 -3.227 0.001265 **
tc_initqual2 -4.22228 4.40198 -0.959 0.337536
tc_initqual3 -2.44219 3.13089 -0.780 0.435425
tc_initqual4 -2.74618 8.29509 -0.331 0.740618
tc_initqual5 -2.27118 4.25225 -0.534 0.593297
tc_training2 -1.05433 1.87425 -0.563 0.573787
tc_workshop2 -0.02594 3.31771 -0.008 0.993763
tc_pd2 8.65706 6.32526 1.369 0.171196
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 48.5 on 3511 degrees of freedom
Multiple R-squared: 0.02641, Adjusted R-squared: 0.02253
F-statistic: 6.803 on 14 and 3511 DF, p-value: 6.236e-14
h.vxi.c. Based on Complete Data 3
# Third Complete Dataset
<- with(complete_data_3, lm(reading_score ~ tc_sex + tc_age + teacher_type + empltim + tc_tcexp + tc_tedu + tc_initqual + tc_training + tc_workshop + tc_pd))
complete_lm_3 summary(complete_lm_3)
Call:
lm(formula = reading_score ~ tc_sex + tc_age + teacher_type +
empltim + tc_tcexp + tc_tedu + tc_initqual + tc_training +
tc_workshop + tc_pd)
Residuals:
Min 1Q Median 3Q Max
-258.11 -34.89 -0.86 33.15 116.40
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 510.7669 4.3879 116.402 < 2e-16 ***
tc_sex2 3.5572 1.7428 2.041 0.041320 *
tc_age -0.3052 0.1178 -2.590 0.009627 **
teacher_type5 -2.7149 1.7394 -1.561 0.118656
empltim2 26.8290 5.9968 4.474 7.93e-06 ***
tc_tcexp 0.6497 0.1406 4.621 3.96e-06 ***
tc_tedu2 -1.6555 2.1532 -0.769 0.442023
tc_tedu3 -13.1085 3.6171 -3.624 0.000294 ***
tc_initqual2 -11.0040 4.4100 -2.495 0.012633 *
tc_initqual3 -4.9775 3.0951 -1.608 0.107885
tc_initqual4 -8.1660 8.0703 -1.012 0.311677
tc_initqual5 4.5470 4.3065 1.056 0.291109
tc_training2 0.6615 1.9090 0.346 0.728990
tc_workshop2 -0.9084 3.3027 -0.275 0.783292
tc_pd2 9.8381 6.1367 1.603 0.108991
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 48.52 on 3511 degrees of freedom
Multiple R-squared: 0.02556, Adjusted R-squared: 0.02168
F-statistic: 6.579 on 14 and 3511 DF, p-value: 2.377e-13
h.vxi.d. Based on Complete Data 4
# Fourth Complete Dataset
<- with(complete_data_4, lm(reading_score ~ tc_sex + tc_age + teacher_type + empltim + tc_tcexp + tc_tedu + tc_initqual + tc_training + tc_workshop + tc_pd))
complete_lm_4 summary(complete_lm_4)
Call:
lm(formula = reading_score ~ tc_sex + tc_age + teacher_type +
empltim + tc_tcexp + tc_tedu + tc_initqual + tc_training +
tc_workshop + tc_pd)
Residuals:
Min 1Q Median 3Q Max
-257.939 -34.470 -0.812 32.666 116.960
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 512.2712 4.4492 115.139 < 2e-16 ***
tc_sex2 1.7397 1.7427 0.998 0.31821
tc_age -0.3561 0.1181 -3.016 0.00258 **
teacher_type5 -1.4110 1.7409 -0.811 0.41769
empltim2 28.6379 5.9136 4.843 1.34e-06 ***
tc_tcexp 0.8067 0.1416 5.699 1.31e-08 ***
tc_tedu2 -1.9911 2.1817 -0.913 0.36150
tc_tedu3 -8.0520 3.5765 -2.251 0.02442 *
tc_initqual2 -10.1833 4.4046 -2.312 0.02084 *
tc_initqual3 -3.0739 3.1169 -0.986 0.32410
tc_initqual4 -9.2268 8.7603 -1.053 0.29229
tc_initqual5 -1.1192 4.2635 -0.263 0.79295
tc_training2 -1.1976 1.9518 -0.614 0.53954
tc_workshop2 -7.6327 3.1760 -2.403 0.01630 *
tc_pd2 13.6166 6.2093 2.193 0.02838 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 48.5 on 3511 degrees of freedom
Multiple R-squared: 0.02618, Adjusted R-squared: 0.0223
F-statistic: 6.743 on 14 and 3511 DF, p-value: 8.963e-14
h.vxi.e. Based on Complete Data 5
# Fifth Complete Dataset
<- with(complete_data_5, lm(reading_score ~ tc_sex + tc_age + teacher_type + empltim + tc_tcexp + tc_tedu + tc_initqual + tc_training + tc_workshop + tc_pd))
complete_lm_5 summary(complete_lm_5)
Call:
lm(formula = reading_score ~ tc_sex + tc_age + teacher_type +
empltim + tc_tcexp + tc_tedu + tc_initqual + tc_training +
tc_workshop + tc_pd)
Residuals:
Min 1Q Median 3Q Max
-256.429 -34.919 -0.523 33.261 115.690
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 512.2182 4.4475 115.170 < 2e-16 ***
tc_sex2 2.4020 1.7418 1.379 0.167960
tc_age -0.2958 0.1160 -2.551 0.010786 *
teacher_type5 -1.7716 1.7345 -1.021 0.307146
empltim2 31.8441 5.7975 5.493 4.24e-08 ***
tc_tcexp 0.7011 0.1393 5.035 5.02e-07 ***
tc_tedu2 -3.7234 2.1743 -1.712 0.086896 .
tc_tedu3 -13.8244 3.6751 -3.762 0.000172 ***
tc_initqual2 -10.5244 4.1514 -2.535 0.011284 *
tc_initqual3 -3.5886 3.1283 -1.147 0.251404
tc_initqual4 -9.9493 8.1830 -1.216 0.224126
tc_initqual5 2.2541 4.3995 0.512 0.608431
tc_training2 -0.6183 1.9514 -0.317 0.751361
tc_workshop2 -7.1895 3.4280 -2.097 0.036042 *
tc_pd2 9.8745 6.1156 1.615 0.106478
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 48.45 on 3511 degrees of freedom
Multiple R-squared: 0.02833, Adjusted R-squared: 0.02446
F-statistic: 7.313 on 14 and 3511 DF, p-value: 2.936e-15
Observation Based on Above Outputs:
- The model fit statistics are pretty much identical in all of the models.
- SE ~ 48.5
- DF ~ (14 - 3511)
- F ~ (6.28 - 7.31)
- R-Squared ~ (0.025 - 0.028)
- Adjusted R-Squared ~ (0.021 - 0.022)
- Comparison with the Pooled Model, which shows that four variables (e.g., teacher age, employment type, teacher experience, and teacher education) including intercept were statistically predicting students’ reading scores:
- Complete Data 1 : Same variables are statistically significant but their level of significance are more severe; the
estimates are comparable - Complete Data 2 : All four variables are statistically significant at somewhat similar level. However, teachers’ sex seem to be almost statistically significant unlike the pooled and the complete data 1 models.
- Complete Data 3 : The teachers’ sex is statistically significant at 5% level, plus teachers’ initial certification type 2 has been added into the statistically significant column.
- Complete Data 4 : More extreme departure from previous models, all variables in the pooled model are statistically significant but somewhat similar different levels. Teachers’ sex is no longer statistically significant. However, teacher certification type 2, including teacher workshop and professional development opportunities are statistically significant at 5% level.
- Complete Data 5 : It looks like a little improvement over 4th model. Teacher Education type two is almost statistically significant, the professional development has is not significant anymore.
- Complete Data 1 : Same variables are statistically significant but their level of significance are more severe; the
i. Error Message and Checking for Correction
summary(teacher_data)
school_id teacher_id tc_sex tc_age teacher_type
Length:3526 Length:3526 1 :1784 Min. :20.00 4 :1049
Class :character Class :character 2 :1019 1st Qu.:34.00 5 :1812
Mode :character Mode :character NA's: 723 Median :42.00 NA's: 665
Mean :42.87
3rd Qu.:51.00
Max. :70.00
NA's :700
empltim tc_emplst tc_tcexp tc_tedu tc_initqual tc_training
1 :2773 1 :2773 Min. : 0.00 1 : 577 1 :2303 1 : 429
2 : 60 2 : 22 1st Qu.: 7.00 2 :2022 2 : 113 2 :1311
NA's: 693 3 : 21 Median :14.00 3 : 229 3 : 241 NA's:1786
4 : 17 Mean :14.95 NA's: 698 4 : 31
NA's: 693 3rd Qu.:21.00 5 : 142
Max. :50.00 NA's: 696
NA's :741
tc_workshop tc_pd reading_score
1 :1637 1 :2753 Min. :260.3
2 : 128 2 : 51 1st Qu.:468.2
NA's:1761 NA's: 722 Median :502.7
Mean :505.1
3rd Qu.:537.1
Max. :608.0
table(teacher_data$teacher_type, teacher_data$empltim)
1 2
4 1022 17
5 1751 43
table(teacher_data$teacher_type, teacher_data$tc_tedu)
1 2 3
4 216 759 65
5 361 1263 164
table(teacher_data$empltim, teacher_data$tc_tedu)
1 2 3
1 562 1983 221
2 15 37 8
table(teacher_data$teacher_type, teacher_data$empltim, teacher_data$tc_tedu)
, , = 1
1 2
4 212 4
5 350 11
, , = 2
1 2
4 745 13
5 1238 24
, , = 3
1 2
4 65 0
5 156 8
Because the model below did not converge, the Lavaan error report suggested that one of the combinations had the total Zero values. The above outcomes show cross tab between/among variables in an attempt to find out the troubling variable.
Structural Modeling
<- "
lat_mod # Measurement Model
demography =~ tc_sex + tc_age
education =~ tc_tedu + tc_initqual
training =~ tc_training + tc_workshop + tc_pd
# Regression Model
reading_score ~ demography + education + training
education ~ demography + training
training ~ demography
"
<- sem(lat_mod,
tech_model data = teacher_data,
ordered = c("tc_sex", "tc_tedu", "tc_initqual", "tc_training", "tc_workshop", "tc_pd")
)summary(tech_model, standardized = TRUE, rsquare = TRUE, fit.measures = TRUE, modindices = TRUE)
lavaan 0.6.14 ended normally after 625 iterations
Estimator DWLS
Optimization method NLMINB
Number of model parameters 27
Used Total
Number of observations 1709 3526
Model Test User Model:
Standard Scaled
Test Statistic 22.441 23.614
Degrees of freedom 15 15
P-value (Chi-square) 0.097 0.072
Scaling correction factor 0.976
Shift parameter 0.627
simple second-order correction
Model Test Baseline Model:
Test statistic 112.688 105.734
Degrees of freedom 28 28
P-value 0.000 0.000
Scaling correction factor 1.089
User Model versus Baseline Model:
Comparative Fit Index (CFI) 0.912 0.889
Tucker-Lewis Index (TLI) 0.836 0.793
Robust Comparative Fit Index (CFI) NA
Robust Tucker-Lewis Index (TLI) NA
Root Mean Square Error of Approximation:
RMSEA 0.017 0.018
90 Percent confidence interval - lower 0.000 0.000
90 Percent confidence interval - upper 0.031 0.032
P-value H_0: RMSEA <= 0.050 1.000 1.000
P-value H_0: RMSEA >= 0.080 0.000 0.000
Robust RMSEA NA
90 Percent confidence interval - lower NA
90 Percent confidence interval - upper NA
P-value H_0: Robust RMSEA <= 0.050 NA
P-value H_0: Robust RMSEA >= 0.080 NA
Standardized Root Mean Square Residual:
SRMR 0.039 0.039
Parameter Estimates:
Standard errors Robust.sem
Information Expected
Information saturated (h1) model Unstructured
Latent Variables:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
demography =~
tc_sex 1.000 0.198 0.198
tc_age 22.359 12.966 1.724 0.085 4.428 0.393
education =~
tc_tedu 1.000 0.160 0.160
tc_initqual 3.211 2.072 1.550 0.121 0.513 0.513
training =~
tc_training 1.000 0.276 0.276
tc_workshop 1.251 0.522 2.393 0.017 0.345 0.345
tc_pd 2.641 0.875 3.019 0.003 0.729 0.729
Regressions:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
reading_score ~
demography 84.407 132.183 0.639 0.523 16.717 0.353
education -111.557 120.322 -0.927 0.354 -17.823 -0.376
training -55.526 123.695 -0.449 0.654 -15.330 -0.324
education ~
demography 0.473 0.561 0.844 0.399 0.587 0.587
training -0.519 0.507 -1.024 0.306 -0.897 -0.897
training ~
demography 0.938 0.467 2.009 0.045 0.673 0.673
Intercepts:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.tc_sex 0.000 0.000 0.000
.tc_age 43.504 0.275 157.919 0.000 43.504 3.856
.tc_tedu 0.000 0.000 0.000
.tc_initqual 0.000 0.000 0.000
.tc_training 0.000 0.000 0.000
.tc_workshop 0.000 0.000 0.000
.tc_pd 0.000 0.000 0.000
.reading_score 505.190 1.171 431.485 0.000 505.190 10.661
demography 0.000 0.000 0.000
.education 0.000 0.000 0.000
.training 0.000 0.000 0.000
Thresholds:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
tc_sex|t1 0.166 0.030 5.439 0.000 0.166 0.166
tc_tedu|t1 -0.839 0.035 -24.291 0.000 -0.839 -0.839
tc_tedu|t2 1.351 0.043 31.510 0.000 1.351 1.351
tc_initqual|t1 0.852 0.035 24.557 0.000 0.852 0.852
tc_initqual|t2 1.035 0.037 27.943 0.000 1.035 1.035
tc_initqual|t3 1.510 0.047 32.176 0.000 1.510 1.510
tc_initqual|t4 1.631 0.051 32.193 0.000 1.631 1.631
tc_training|t1 -0.699 0.033 -21.070 0.000 -0.699 -0.699
tc_workshop|t1 1.449 0.045 32.008 0.000 1.449 1.449
tc_pd|t1 2.081 0.072 29.045 0.000 2.081 2.081
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.tc_sex 0.961 0.961 0.961
.tc_age 107.675 14.021 7.680 0.000 107.675 0.846
.tc_tedu 0.974 0.974 0.974
.tc_initqual 0.737 0.737 0.737
.tc_training 0.924 0.924 0.924
.tc_workshop 0.881 0.881 0.881
.tc_pd 0.468 0.468 0.468
.reading_score 2022.595 302.966 6.676 0.000 2022.595 0.901
demography 0.039 0.028 1.386 0.166 1.000 1.000
.education 0.014 0.013 1.086 0.278 0.560 0.560
.training 0.042 0.032 1.294 0.196 0.547 0.547
Scales y*:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
tc_sex 1.000 1.000 1.000
tc_tedu 1.000 1.000 1.000
tc_initqual 1.000 1.000 1.000
tc_training 1.000 1.000 1.000
tc_workshop 1.000 1.000 1.000
tc_pd 1.000 1.000 1.000
R-Square:
Estimate
tc_sex 0.039
tc_age 0.154
tc_tedu 0.026
tc_initqual 0.263
tc_training 0.076
tc_workshop 0.119
tc_pd 0.532
reading_score 0.099
education 0.440
training 0.453
Modification Indices:
lhs op rhs mi epc sepc.lv sepc.all sepc.nox
1 demography =~ tc_tedu 0.820 0.317 0.063 0.063 0.063
2 demography =~ tc_initqual 0.820 -1.018 -0.202 -0.202 -0.202
3 demography =~ tc_training 1.398 -0.915 -0.181 -0.181 -0.181
4 demography =~ tc_workshop 0.255 0.494 0.098 0.098 0.098
5 demography =~ tc_pd 0.379 1.134 0.225 0.225 0.225
6 education =~ tc_sex 3.308 0.917 0.147 0.147 0.147
7 education =~ tc_training 1.191 -0.730 -0.117 -0.117 -0.117
8 education =~ tc_workshop 1.254 0.930 0.149 0.149 0.149
9 education =~ tc_pd 0.001 -0.050 -0.008 -0.008 -0.008
10 training =~ tc_sex 2.419 -0.864 -0.239 -0.239 -0.239
11 training =~ tc_tedu 1.609 0.391 0.108 0.108 0.108
12 training =~ tc_initqual 1.609 -1.256 -0.347 -0.347 -0.347
13 tc_sex ~~ tc_tedu 0.008 -0.003 -0.003 -0.003 -0.003
14 tc_sex ~~ tc_initqual 3.844 0.087 0.087 0.103 0.103
15 tc_sex ~~ tc_training 6.453 -0.124 -0.124 -0.132 -0.132
16 tc_sex ~~ tc_workshop 5.557 0.151 0.151 0.164 0.164
17 tc_sex ~~ tc_pd 0.245 0.056 0.056 0.084 0.084
18 tc_age ~~ tc_tedu 0.516 0.238 0.238 0.023 0.023
19 tc_age ~~ tc_initqual 3.883 -1.481 -1.481 -0.166 -0.166
20 tc_age ~~ tc_training 0.033 0.099 0.099 0.010 0.010
21 tc_age ~~ tc_workshop 0.792 -0.609 -0.609 -0.062 -0.062
22 tc_age ~~ tc_pd 0.252 0.639 0.639 0.090 0.090
23 tc_tedu ~~ tc_training 3.029 0.073 0.073 0.076 0.076
24 tc_tedu ~~ tc_workshop 0.022 -0.008 -0.008 -0.009 -0.009
25 tc_tedu ~~ tc_pd 0.020 0.016 0.016 0.023 0.023
26 tc_initqual ~~ tc_training 3.279 -0.104 -0.104 -0.126 -0.126
27 tc_initqual ~~ tc_workshop 1.463 0.088 0.088 0.109 0.109
28 tc_initqual ~~ tc_pd 0.014 -0.016 -0.016 -0.028 -0.028
29 tc_training ~~ tc_workshop 1.982 0.110 0.110 0.122 0.122
30 tc_training ~~ tc_pd 1.174 -0.163 -0.163 -0.248 -0.248
31 tc_workshop ~~ tc_pd 0.037 -0.035 -0.035 -0.054 -0.054
lavaanPlot(model = tech_model, coefs = TRUE, sig = .05, stand = TRUE, stars = "latent")
Same Model Using the Complete Data 5
<- "
lat_mod_2 # Measurement Model
demography =~ tc_sex + tc_age
education =~ tc_tedu + tc_initqual
training =~ tc_training + tc_workshop + tc_pd
type =~ teacher_type + empltim
# Regression Model
reading_score ~ demography + education + training + type
education ~ demography + training + type
training ~ demography + type
demography ~ type
## Correlation
#tc_initqual ~~ tc_training
#tc_pd ~~ teacher_type
#tc_workshop ~~ empltim
"
<- sem(lat_mod_2,
tech_model_2 data = complete_data_5,
ordered = c("tc_sex", "tc_tedu", "tc_initqual", "tc_training", "tc_workshop", "tc_pd", "teacher_type", "empltim", "tc_tedu")
)summary(tech_model_2, standardized = TRUE, rsquare = TRUE, fit.measures = TRUE, modindices = TRUE)
lavaan 0.6.14 ended normally after 467 iterations
Estimator DWLS
Optimization method NLMINB
Number of model parameters 35
Number of observations 3526
Model Test User Model:
Standard Scaled
Test Statistic 180.119 192.769
Degrees of freedom 26 26
P-value (Chi-square) 0.000 0.000
Scaling correction factor 0.940
Shift parameter 1.078
simple second-order correction
Model Test Baseline Model:
Test statistic 490.942 452.505
Degrees of freedom 45 45
P-value 0.000 0.000
Scaling correction factor 1.094
User Model versus Baseline Model:
Comparative Fit Index (CFI) 0.654 0.591
Tucker-Lewis Index (TLI) 0.402 0.292
Robust Comparative Fit Index (CFI) NA
Robust Tucker-Lewis Index (TLI) NA
Root Mean Square Error of Approximation:
RMSEA 0.041 0.043
90 Percent confidence interval - lower 0.035 0.037
90 Percent confidence interval - upper 0.047 0.048
P-value H_0: RMSEA <= 0.050 0.995 0.983
P-value H_0: RMSEA >= 0.080 0.000 0.000
Robust RMSEA NA
90 Percent confidence interval - lower NA
90 Percent confidence interval - upper NA
P-value H_0: Robust RMSEA <= 0.050 NA
P-value H_0: Robust RMSEA >= 0.080 NA
Standardized Root Mean Square Residual:
SRMR 0.092 0.092
Parameter Estimates:
Standard errors Robust.sem
Information Expected
Information saturated (h1) model Unstructured
Latent Variables:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
demography =~
tc_sex 1.000 0.412 0.412
tc_age 5.534 0.947 5.844 0.000 2.279 0.202
education =~
tc_tedu 1.000 0.180 0.180
tc_initqual 2.012 0.810 2.485 0.013 0.363 0.363
training =~
tc_training 1.000 0.043 0.043
tc_workshop 12.796 15.448 0.828 0.408 0.549 0.549
tc_pd 15.965 19.260 0.829 0.407 0.684 0.684
type =~
teacher_type 1.000 0.318 0.318
empltim 1.075 0.221 4.857 0.000 0.342 0.342
Regressions:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
reading_score ~
demography 16.740 10.938 1.530 0.126 6.895 0.141
education -72.104 26.703 -2.700 0.007 -13.007 -0.265
training 5.614 137.491 0.041 0.967 0.241 0.005
type 1.308 9.950 0.131 0.895 0.416 0.008
education ~
demography 0.182 0.114 1.592 0.111 0.416 0.416
training -0.454 1.552 -0.293 0.770 -0.108 -0.108
type -0.047 0.115 -0.413 0.679 -0.084 -0.084
training ~
demography 0.044 0.054 0.806 0.420 0.422 0.422
type 0.017 0.029 0.583 0.560 0.126 0.126
demography ~
type 2.343 1.335 1.756 0.079 1.809 1.809
Intercepts:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.tc_sex 0.000 0.000 0.000
.tc_age 42.918 0.193 222.279 0.000 42.918 3.809
.tc_tedu 0.000 0.000 0.000
.tc_initqual 0.000 0.000 0.000
.tc_training 0.000 0.000 0.000
.tc_workshop 0.000 0.000 0.000
.tc_pd 0.000 0.000 0.000
.teacher_type 0.000 0.000 0.000
.empltim 0.000 0.000 0.000
.reading_score 505.100 0.848 595.959 0.000 505.100 10.299
.demography 0.000 0.000 0.000
.education 0.000 0.000 0.000
.training 0.000 0.000 0.000
type 0.000 0.000 0.000
Thresholds:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
tc_sex|t1 0.351 0.022 16.265 0.000 0.351 0.351
tc_tedu|t1 -0.827 0.024 -34.515 0.000 -0.827 -0.827
tc_tedu|t2 1.411 0.031 45.755 0.000 1.411 1.411
tc_initqual|t1 0.886 0.024 36.296 0.000 0.886 0.886
tc_initqual|t2 1.066 0.026 40.837 0.000 1.066 1.066
tc_initqual|t3 1.561 0.034 46.307 0.000 1.561 1.561
tc_initqual|t4 1.657 0.036 46.178 0.000 1.657 1.657
tc_training|t1 -0.734 0.023 -31.492 0.000 -0.734 -0.734
tc_workshop|t1 1.530 0.033 46.270 0.000 1.530 1.530
tc_pd|t1 2.081 0.050 41.724 0.000 2.081 2.081
teacher_typ|t1 -0.348 0.022 -16.132 0.000 -0.348 -0.348
empltim|t1 2.034 0.048 42.486 0.000 2.034 2.034
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.tc_sex 0.830 0.830 0.830
.tc_age 121.730 4.048 30.072 0.000 121.730 0.959
.tc_tedu 0.967 0.967 0.967
.tc_initqual 0.868 0.868 0.868
.tc_training 0.998 0.998 0.998
.tc_workshop 0.699 0.699 0.699
.tc_pd 0.532 0.532 0.532
.teacher_type 0.899 0.899 0.899
.empltim 0.883 0.883 0.883
.reading_score 2217.227 104.899 21.137 0.000 2217.227 0.922
.demography -0.385 0.323 -1.194 0.232 -2.272 -2.272
.education 0.032 0.016 2.010 0.044 0.976 0.976
.training 0.001 0.003 0.418 0.676 0.614 0.614
type 0.101 0.058 1.756 0.079 1.000 1.000
Scales y*:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
tc_sex 1.000 1.000 1.000
tc_tedu 1.000 1.000 1.000
tc_initqual 1.000 1.000 1.000
tc_training 1.000 1.000 1.000
tc_workshop 1.000 1.000 1.000
tc_pd 1.000 1.000 1.000
teacher_type 1.000 1.000 1.000
empltim 1.000 1.000 1.000
R-Square:
Estimate
tc_sex 0.170
tc_age 0.041
tc_tedu 0.033
tc_initqual 0.132
tc_training 0.002
tc_workshop 0.301
tc_pd 0.468
teacher_type 0.101
empltim 0.117
reading_score 0.078
demography NA
education 0.024
training 0.386
Modification Indices:
lhs op rhs mi epc sepc.lv sepc.all sepc.nox
1 demography =~ tc_tedu 0.841 -0.082 -0.034 -0.034 -0.034
2 demography =~ tc_initqual 0.842 0.166 0.068 0.068 0.068
3 demography =~ tc_training 8.755 -0.578 -0.238 -0.238 -0.238
4 demography =~ tc_workshop 4.185 2.311 0.952 0.952 0.952
5 demography =~ tc_pd 0.481 -0.956 -0.394 -0.394 -0.394
6 demography =~ teacher_type 60.451 -10.096 -4.158 -4.158 -4.158
7 demography =~ empltim 60.442 10.849 4.468 4.468 4.468
8 education =~ tc_sex 5.201 1.322 0.239 0.239 0.239
9 education =~ tc_age 5.196 -7.314 -1.319 -0.117 -0.117
10 education =~ tc_training 11.217 -1.076 -0.194 -0.194 -0.194
11 education =~ tc_workshop 6.008 1.527 0.275 0.275 0.275
12 education =~ tc_pd 3.792 -1.516 -0.274 -0.274 -0.274
13 education =~ teacher_type 13.524 2.489 0.449 0.449 0.449
14 education =~ empltim 13.523 -2.675 -0.483 -0.483 -0.483
15 training =~ tc_sex 0.003 -0.219 -0.009 -0.009 -0.009
16 training =~ tc_tedu 0.565 -0.789 -0.034 -0.034 -0.034
17 training =~ tc_initqual 0.364 1.273 0.055 0.055 0.055
18 training =~ teacher_type 53.833 -28.330 -1.214 -1.214 -1.214
19 training =~ empltim 53.978 30.487 1.307 1.307 1.307
20 type =~ tc_sex 0.659 0.448 0.142 0.142 0.142
21 type =~ tc_age 0.660 -2.480 -0.788 -0.070 -0.070
22 type =~ tc_tedu 1.655 -0.118 -0.037 -0.037 -0.037
23 type =~ tc_initqual 1.656 0.237 0.075 0.075 0.075
24 type =~ tc_training 12.360 -0.622 -0.198 -0.198 -0.198
25 type =~ tc_workshop 0.599 0.358 0.114 0.114 0.114
26 type =~ tc_pd 0.003 -0.031 -0.010 -0.010 -0.010
27 tc_sex ~~ tc_tedu 1.798 -0.038 -0.038 -0.043 -0.043
28 tc_sex ~~ tc_initqual 7.647 0.123 0.123 0.145 0.145
29 tc_sex ~~ tc_training 15.348 -0.128 -0.128 -0.141 -0.141
30 tc_sex ~~ tc_workshop 1.442 0.075 0.075 0.098 0.098
31 tc_sex ~~ tc_pd 0.285 -0.045 -0.045 -0.068 -0.068
32 tc_sex ~~ teacher_type 48.373 0.482 0.482 0.558 0.558
33 tc_sex ~~ empltim 71.019 -0.768 -0.768 -0.896 -0.896
34 tc_age ~~ tc_tedu 0.142 -0.089 -0.089 -0.008 -0.008
35 tc_age ~~ tc_initqual 2.827 -0.501 -0.501 -0.049 -0.049
36 tc_age ~~ tc_training 5.465 0.631 0.631 0.057 0.057
37 tc_age ~~ tc_workshop 0.843 -0.383 -0.383 -0.042 -0.042
38 tc_age ~~ tc_pd 0.497 0.397 0.397 0.049 0.049
39 tc_age ~~ teacher_type 0.661 -0.355 -0.355 -0.034 -0.034
40 tc_age ~~ empltim 0.797 0.542 0.542 0.052 0.052
41 tc_tedu ~~ tc_training 0.001 0.001 0.001 0.001 0.001
42 tc_tedu ~~ tc_workshop 0.282 -0.020 -0.020 -0.024 -0.024
43 tc_tedu ~~ tc_pd 0.424 0.040 0.040 0.055 0.055
44 tc_tedu ~~ teacher_type 0.065 -0.008 -0.008 -0.009 -0.009
45 tc_tedu ~~ empltim 0.190 0.022 0.022 0.024 0.024
46 tc_initqual ~~ tc_training 18.435 -0.137 -0.137 -0.147 -0.147
47 tc_initqual ~~ tc_workshop 1.868 0.075 0.075 0.096 0.096
48 tc_initqual ~~ tc_pd 1.241 -0.082 -0.082 -0.120 -0.120
49 tc_initqual ~~ teacher_type 0.000 -0.001 -0.001 -0.001 -0.001
50 tc_initqual ~~ empltim 0.001 0.002 0.002 0.003 0.003
51 tc_training ~~ tc_workshop 1.675 0.073 0.073 0.087 0.087
52 tc_training ~~ tc_pd 6.289 0.210 0.210 0.289 0.289
53 tc_training ~~ teacher_type 5.794 -0.081 -0.081 -0.085 -0.085
54 tc_training ~~ empltim 6.956 0.195 0.195 0.207 0.207
55 tc_workshop ~~ tc_pd 9.527 -2.553 -2.553 -4.187 -4.187
56 tc_workshop ~~ teacher_type 7.249 -0.176 -0.176 -0.222 -0.222
57 tc_workshop ~~ empltim 16.391 0.329 0.329 0.419 0.419
58 tc_pd ~~ teacher_type 17.021 -0.360 -0.360 -0.521 -0.521
59 tc_pd ~~ empltim 14.435 0.391 0.391 0.571 0.571
lavaanPlot(model = tech_model_2, coefs = TRUE, sig = .05, stand = TRUE, stars = "latent")
j. Baseline Model
<- "
base_1 tc_age ~~ tc_age
tc_tcexp ~~ tc_tcexp
tc_tedu ~~ tc_tedu
tc_initqual ~~ tc_initqual
tc_training ~~ tc_training
tc_workshop ~~ tc_workshop
tc_pd ~~ tc_pd
reading_score ~~ reading_score
"
<- sem(base_1, data = teacher_data, ordered = c("tc_sex", "tc_tedu", "tc_initqual", "tc_training", "tc_workshop", "tc_pd"))
base_model # summary(base_model, fit.measures = TRUE)
fitmeasures(base_model)
npar fmin
20.000 0.197
chisq df
666.371 23.000
pvalue chisq.scaled
0.000 666.371
df.scaled pvalue.scaled
23.000 0.000
chisq.scaling.factor baseline.chisq
NA 666.371
baseline.df baseline.pvalue
28.000 0.000
baseline.chisq.scaled baseline.df.scaled
552.322 28.000
baseline.pvalue.scaled baseline.chisq.scaling.factor
0.000 1.218
cfi tli
0.000 -0.227
cfi.scaled tli.scaled
0.000 -0.494
cfi.robust tli.robust
NA NA
nnfi rfi
-0.227 1.000
nfi pnfi
0.000 0.000
ifi rni
0.000 -0.008
nnfi.scaled rfi.scaled
-0.494 1.000
nfi.scaled pnfi.scaled
-0.206 -0.170
ifi.scaled rni.scaled
-0.215 -0.227
nnfi.robust rni.robust
NA NA
rmsea rmsea.ci.lower
0.129 0.120
rmsea.ci.upper rmsea.ci.level
0.137 0.900
rmsea.pvalue rmsea.close.h0
0.000 0.050
rmsea.notclose.pvalue rmsea.notclose.h0
1.000 0.080
rmsea.scaled rmsea.ci.lower.scaled
0.129 0.120
rmsea.ci.upper.scaled rmsea.pvalue.scaled
0.137 0.000
rmsea.notclose.pvalue.scaled rmsea.robust
1.000 NA
rmsea.ci.lower.robust rmsea.ci.upper.robust
NA NA
rmsea.pvalue.robust rmsea.notclose.pvalue.robust
NA NA
rmr rmr_nomean
14.681 16.414
srmr srmr_bentler
0.166 0.148
srmr_bentler_nomean crmr
0.166 0.164
crmr_nomean srmr_mplus
0.188 NA
srmr_mplus_nomean cn_05
NA 90.202
cn_01 gfi
106.600 0.997
agfi pgfi
0.994 0.533
mfi wrmr
0.827 3.937
k. Regression Model
# Teacher Type Model
<- "
reg_mod1 reading_score ~ teacher_type
"
<- sem(reg_mod1, data = teacher_data, ordered = c("teacher_type"))
techr_type parameterEstimates(techr_type)
lhs op rhs est se z pvalue ci.lower
1 reading_score ~ teacher_type -1.685 1.846 -0.912 0.362 -5.304
2 reading_score ~~ reading_score 2275.776 72.731 31.290 0.000 2133.224
3 teacher_type ~~ teacher_type 0.232 0.000 NA NA 0.232
4 reading_score ~1 508.371 3.138 162.017 0.000 502.221
5 teacher_type ~1 1.633 0.000 NA NA 1.633
ci.upper
1 1.934
2 2418.327
3 0.232
4 514.521
5 1.633
# Employment Type
<- "
reg_mod2 reading_score ~ empltim
"
<- sem(reg_mod2, data = teacher_data, ordered = "empltim")
empl_type parameterEstimates(empl_type)
lhs op rhs est se z pvalue ci.lower
1 reading_score ~ empltim 35.776 6.002 5.961 0 24.013
2 reading_score ~~ reading_score 2246.062 72.877 30.820 0 2103.226
3 empltim ~~ empltim 0.021 0.000 NA NA 0.021
4 reading_score ~1 469.030 6.201 75.636 0 456.876
5 empltim ~1 1.021 0.000 NA NA 1.021
ci.upper
1 47.539
2 2388.898
3 0.021
4 481.184
5 1.021
k. Regression Model Complete
# Teacher Type Model
<- "
reg_mod_complete reading_score ~ tc_sex + tc_age + empltim + tc_tcexp + tc_tedu + tc_initqual + tc_training + tc_workshop + tc_pd
"
<- sem(reg_mod_complete, data = complete_data_5, ordered = c("tc_sex", "tc_tedu", "tc_initqual", "tc_training", "tc_workshop", "tc_pd", "empltim", "tc_tedu"))
complete_mod
summary(complete_mod, standardized = TRUE, rsquare = TRUE)
lavaan 0.6.14 ended normally after 114 iterations
Estimator DWLS
Optimization method NLMINB
Number of model parameters 11
Number of observations 3526
Model Test User Model:
Standard Scaled
Test Statistic 0.000 0.000
Degrees of freedom 0 0
Parameter Estimates:
Standard errors Robust.sem
Information Expected
Information saturated (h1) model Unstructured
Regressions:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
reading_score ~
tc_sex 1.835 1.697 1.081 0.280 1.835 0.018
tc_age -0.331 0.115 -2.881 0.004 -0.331 -0.076
empltim 31.467 5.643 5.576 0.000 31.467 0.092
tc_tcexp 0.756 0.145 5.222 0.000 0.756 0.147
tc_tedu -4.938 1.670 -2.957 0.003 -4.938 -0.052
tc_initqual -1.195 0.798 -1.497 0.134 -1.195 -0.025
tc_training -0.613 1.900 -0.323 0.747 -0.613 -0.005
tc_workshop -6.628 3.872 -1.712 0.087 -6.628 -0.033
tc_pd 10.863 7.178 1.513 0.130 10.863 0.030
Intercepts:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.reading_score 481.473 10.495 45.877 0.000 481.473 9.816
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.reading_score 2346.301 68.475 34.265 0.000 2346.301 0.975
R-Square:
Estimate
reading_score 0.025
# Checking the Parameter Estimates
# parameterEstimates(complete_mod)
# Ordering covariacne matrix
# fitted(complete_mod)
# Conversting covariance matrix to correlation matrix
# cov_m <- fitted(complete_mod)$cov
# cor_cov <- cov2cor(complete_mod)
# cor_cov
# Obtaining residual correlation matrix
# residuals(complete_mod, type = "cor")
# Obtaining the residual covariance matrix
# residuals(complete_mod, type = "raw")
Comments
Post a Comment