# Supplementary material (R code) for "Marengo, D., Montag, C. Digital Phenotyping of Big Five Personality Traits via Facebook Data Mining: A Meta-Analysis." # The code was adapted from: Assink, M., & Wibbelink, C. J. (2016). Fitting three-level meta-analytic models in R: A step-by-step tutorial. The Quantitative Methods for Psychology, 12(3), 154-174. #Install and load metafor package install.packages("metafor") library(metafor) #Look for folder containing data and setting it as working directory setwd(choose.dir(default = "", caption = "Select folder")) #Load data up data<-read.csv("data.csv") #Estimation of overall meta-anlytical correlation using the four-level multilevel model overall <- rma.mv(correlation, samplevar, random = list(~ 1 | id, ~ 1 | study, ~ 1 | dataset), tdist= TRUE, data=data, method="REML") summary(overall, digits=3) #Generating the function to compute distribution of the total variance over the four levels variance.distribution.4lm <- function(data, m){ data <- data m <- m n <- length(data$samplevar) vector.inv.var <- 1/(data$samplevar) sum.inv.var <- sum(vector.inv.var) sum.sq.inv.var <- (sum.inv.var)^2 vector.inv.var.sq <- 1/(data$samplevar^2) sum.inv.var.sq <- sum(vector.inv.var.sq) num <- (n-1)*sum.inv.var den <- sum.sq.inv.var - sum.inv.var.sq est.samp.var <- num/den level1<-((est.samp.var)/(m$sigma2[1]+m$sigma2[2] + m$sigma2[3]+est.samp.var)*100) level2<-((m$sigma2[1])/(m$sigma2[1]+m$sigma2[2] + m$sigma2[3]+est.samp.var)*100) level3<-((m$sigma2[2])/(m$sigma2[1]+m$sigma2[2]+ m$sigma2[3]+est.samp.var)*100) level4<-((m$sigma2[3])/(m$sigma2[1]+m$sigma2[2]+ m$sigma2[3] +est.samp.var)*100) Level<-c("level 1", "level 2", "level 3", "level 4") Variance<-c(level1, level2, level3, level4) df<-data.frame(Level, Variance) df1<-df colnames(df1) <- c("Level", "% of total variance") return(df1) } #Printing percentage of variance per level variance.distribution.4lm(data = data, m = overall) #Estimating and comparing the fit of competing models overall <- rma.mv(correlation, samplevar, random = list(~ 1 | id, ~ 1 | study, ~ 1 | dataset), tdist= TRUE, data=data, method="REML") summary(overall, digits=3) overall_1 <- rma.mv(correlation, samplevar, random = list(~ 1 | id, ~ 1 | study, ~ 1 | dataset), tdist= TRUE, data=data, method="REML",sigma2 = c(0,NA,NA)) summary(overall_1, digits=3) overall_2 <- rma.mv(correlation, samplevar, random = list(~ 1 | id, ~ 1 | study, ~ 1 | dataset), tdist= TRUE, data=data, method="REML",sigma2 = c(NA,0,NA)) summary(overall_2, digits=3) overall_3 <- rma.mv(correlation, samplevar, random = list(~ 1 | id, ~ 1 | study, ~ 1 | dataset), tdist= TRUE, data=data, method="REML",sigma2 = c(NA,NA,0)) summary(overall_3, digits=3) #Likelihood ratio tests anova(overall, overall_1) anova(overall, overall_2) anova(overall, overall_3) #Computing meta-analyical correlation for specific traits (1 = Agreeableness; 2 = Conscientiousness; 3 = Extraversion; 4 = Neuroticism; 5 = Openness) mod0 <- rma.mv(correlation, samplevar,mods = ~ factor(trait) - 1, random = list(~ 1 | id, ~ 1 | study, ~ 1 | dataset), tdist= TRUE, data=data, method="REML") summary(mod7, digits=3) (sum(overall$sigma2) - sum(mod0$sigma2)) / sum(overall$sigma2) #Generating unadjusted pair-wise contrasts for personality traits if (require(multcomp)) { contr <- contrMat(setNames(rep(1,mod0$p), colnames(mod0$X)), type="Tukey") sav <- predict(mod0, newmods=contr) sav[["slab"]] <- rownames(contr) sav$pval <- anova(mod0, L=contr)$pval sav sav } #Moderation analyses #Use of multiple digital footprints mod1 <- rma.mv(correlation, samplevar,mods = ~ multiple , random = list(~ 1 | id, ~ 1 | study, ~ 1 | dataset), tdist= TRUE, data=data, method="REML") summary(mod1, digits=3) #Explained variance (sum(overall$sigma2) - sum(mod1$sigma2)) / sum(overall$sigma2) #Use of cross-validation mod2 <- rma.mv(correlation, samplevar,mods = ~ validation, random = list(~ 1 | id, ~ 1 | study, ~ 1 | dataset), tdist= TRUE, data=data, method="REML") summary(mod2, digits=3) #Explained variance (sum(overall$sigma2) - sum(mod2$sigma2)) / sum(overall$sigma2) #Use of demographics mod3 <- rma.mv(correlation, samplevar,mods = ~ demos , random = list(~ 1 | id, ~ 1 | study, ~ 1 | dataset), tdist= TRUE, data=data, method="REML") summary(mod3, digits=3) #Explained variance (sum(overall$sigma2) - sum(mod3$sigma2)) / sum(overall$sigma2) #Use of activity statics mod4 <- rma.mv(correlation, samplevar,mods = ~ stats, random = list(~ 1 | id, ~ 1 | study, ~ 1 | dataset), tdist= TRUE, data=data, method="REML") summary(mod4, digits=3) #Explained variance (sum(overall$sigma2) - sum(mod4$sigma2)) / sum(overall$sigma2) #Use of language features mod5 <- rma.mv(correlation, samplevar,mods = ~ language, random = list(~ 1 | id, ~ 1 | study, ~ 1 | dataset), tdist= TRUE, data=data, method="REML") summary(mod5, digits=3) #Explained variance (sum(overall$sigma2) - sum(mod5$sigma2)) / sum(overall$sigma2) #Use of likes mod6 <- rma.mv(correlation, samplevar,mods = ~ likes, random = list(~ 1 | id, ~ 1 | study, ~ 1 | dataset), tdist= TRUE, data=data, method="REML") summary(mod6, digits=3) #Explained variance (sum(overall$sigma2) - sum(mod6$sigma2)) / sum(overall$sigma2) #Publication bias analysis #Generating the funnel plot funnel(overall) #Egger's regression test egger <- rma.mv(correlation, samplevar, mods = stander, random = list(~ 1 | id, ~ 1 | study, ~ 1 | dataset), tdist= TRUE, data=data, method="REML") summary(egger , digits=3) #Explained variance (standard error) (sum(overall$sigma2) - sum(egger$sigma2)) / sum(overall$sigma2)