\[ E(y | x_1, x_2, ..., x_q) = \beta_0 + \beta_1x_1 + \beta_2x_2 + ... + \beta_qx_q \]
bzw.
\[ y = \beta_0 + \beta_1x_1 + \beta_2x_2 + ... + \beta_qx_q + \varepsilon \]
Dargestellt werden Varianzanalytische Designs mit Focus auf das Allgemeine Lineare Modell.
Es geht um den regressionsanalytischen Ansatz varianzanalytischer Designs.
Vorhersagevariablen sind kategorial, Reaktionsvariablen sind stetig (intervallskaliert).
In R haben kategoriale Vorhersagevariablen den Typ
factor()
Faktoren kodieren Gruppenzugehörigkeit.
Faktorstufen haben eine Reihenfolge, die ggf. verändert werden kann. Default ist alphabetisch nach den Namen der Faktorstufen.
Faktoren können in Zahlenwerte umgewandelt werden mit
as.numeric(factor.variable)
.
Zahlenwerte können in Faktoren umgewandelt werden mit
factor(variable)
bzw. as.factor(variable)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.5 ✔ purrr 0.3.4
## ✔ tibble 3.1.6 ✔ dplyr 1.0.8
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
dd <- read.delim("https://md.psych.bio.uni-goettingen.de/mv/data/virt/v_stait_exam_wide.txt")[c('subj','gender','neutr', 'exam1', 'exam2')]
# we rename neutr to anx_neutr, it's state anxiety in neutral situation
dd <- dd %>% dplyr::rename(anx_neutr = neutr)
# add a numeric var
dd$n1.group <- c(rep(1, 90), rep(2, 90))
dd$n2.group <- c(rep(1:3, 60))
dd$fn1.group <- factor(dd$n1.group, levels=c(1,2), labels=c('one', 'two'))
# if we don't use codes, they are set to NA
dd$fn2.group.miss <- factor(dd$n2.group, levels=c(1,2), labels=c('one', 'two'))
dd$fn2.group <- factor(dd$n2.group, levels=c(1,2,3), labels=c('one', 'two', 'three'))
head(dd)
## subj gender anx_neutr exam1 exam2 n1.group n2.group fn1.group fn2.group.miss
## 1 1 f 42 52 48 1 1 one one
## 2 2 f 59 68 65 1 2 one two
## 3 3 f 49 59 55 1 3 one <NA>
## 4 4 f 20 31 27 1 1 one one
## 5 5 f 45 55 51 1 2 one two
## 6 6 f 39 49 45 1 3 one <NA>
## fn2.group
## 1 one
## 2 two
## 3 three
## 4 one
## 5 two
## 6 three
as.numeric(dd$fn2.group)
## [1] 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1
## [38] 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2
## [75] 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3
## [112] 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1
## [149] 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3
Ein Spezifikum von R ist das sog. ‘long format’ beim Aufbau von Datensätzen. Im Kern bedeutet das, dass bei R im Gegensatz zu vielen anderen bekannten Statistikpaketen Messwiederholungsdesigns (within subjects designs) keine andere Anordnung der Variablen erzwingen als Zwischengruppen-Designs (between subjects designs).
Unit long_wide html zum Umbau von Datensätzen.
Der Befehl t.test()
ist sehr flexibel und kann sowohl
mit dem Long- als auch mit dem Wide-Format umgehen.
# comparison of different ways to calculate t.test()
dd <- read.delim("https://md.psych.bio.uni-goettingen.de/mv/data/virt/v_stait_exam_wide.txt")[c('subj','gender','neutr', 'exam1', 'exam2')]
# we rename neutr to anx_neutr, it's state anxiety in neutral situation
dd <- dd %>% dplyr::rename(anx_neutr = neutr)
# alternatives to get the same t-test
# default is t-test for independent groups (paired=FALSE)
t.test(dd$anx_neutr[dd$gender == "f"], dd$anx_neutr[dd$gender == "m"])
##
## Welch Two Sample t-test
##
## data: dd$anx_neutr[dd$gender == "f"] and dd$anx_neutr[dd$gender == "m"]
## t = 3.3486, df = 157.68, p-value = 0.001016
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 2.980472 11.552862
## sample estimates:
## mean of x mean of y
## 47.47778 40.21111
t.test(dd$anx_neutr ~ dd$gender)
##
## Welch Two Sample t-test
##
## data: dd$anx_neutr by dd$gender
## t = 3.3486, df = 157.68, p-value = 0.001016
## alternative hypothesis: true difference in means between group f and group m is not equal to 0
## 95 percent confidence interval:
## 2.980472 11.552862
## sample estimates:
## mean in group f mean in group m
## 47.47778 40.21111
# create wide format on the fly
dd.t <- data.frame(
women = dd$anx_neutr[dd$gender == "f"],
men = dd$anx_neutr[dd$gender == "m"]
)
head(dd.t)
## women men
## 1 42 20
## 2 59 42
## 3 49 31
## 4 20 31
## 5 45 32
## 6 39 63
# same test, now data in wide format but t.test is for independent groups
t.test(dd.t$women, dd.t$men)
##
## Welch Two Sample t-test
##
## data: dd.t$women and dd.t$men
## t = 3.3486, df = 157.68, p-value = 0.001016
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 2.980472 11.552862
## sample estimates:
## mean of x mean of y
## 47.47778 40.21111
# we can also get the repeated measures t-test
t.test(dd.t$women, dd.t$men, paired=TRUE)
##
## Paired t-test
##
## data: dd.t$women and dd.t$men
## t = 4.2139, df = 89, p-value = 6.004e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 3.840218 10.693115
## sample estimates:
## mean of the differences
## 7.266667
Die Regression kann mit dem lm()
Befehl angefordert
werden.
dd <- read.delim("https://md.psych.bio.uni-goettingen.de/mv/data/virt/v_stait_exam_classic.txt")[c('nr','gender','neutr', 'exam1', 'exam2')]
# we rename neutr to anx_neutr, it's state anxiety in neutral situation
dd <- dd %>% dplyr::rename(anx_neutr = neutr)
t.test(anx_neutr ~ gender, data=dd)
##
## Welch Two Sample t-test
##
## data: anx_neutr by gender
## t = 4.5749, df = 173.17, p-value = 9.052e-06
## alternative hypothesis: true difference in means between group f and group m is not equal to 0
## 95 percent confidence interval:
## 4.586424 11.546910
## sample estimates:
## mean in group f mean in group m
## 50.05556 41.98889
summary(lm(anx_neutr ~ gender, data=dd))
##
## Call:
## lm(formula = anx_neutr ~ gender, data = dd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.0556 -8.9889 0.9444 7.9444 30.9444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.056 1.247 40.147 < 2e-16 ***
## genderm -8.067 1.763 -4.575 8.91e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.83 on 178 degrees of freedom
## Multiple R-squared: 0.1052, Adjusted R-squared: 0.1002
## F-statistic: 20.93 on 1 and 178 DF, p-value: 8.907e-06
Das geht auch mit Dummy-Codierung. Je nach Dummy-Codierungsart müssen die Koeffizienten anders interpretiert werden.
df <- read.delim("https://md.psych.bio.uni-goettingen.de/mv/data/virt/v_stait_exam_classic.txt")[c('nr','gender','neutr', 'exam1', 'exam2')]
# we rename neutr to anx_neutr, it's state anxiety in neutral situation
df <- df %>% dplyr::rename(anx_neutr = neutr)
t.test(anx_neutr ~ gender, data=df)
##
## Welch Two Sample t-test
##
## data: anx_neutr by gender
## t = 4.5749, df = 173.17, p-value = 9.052e-06
## alternative hypothesis: true difference in means between group f and group m is not equal to 0
## 95 percent confidence interval:
## 4.586424 11.546910
## sample estimates:
## mean in group f mean in group m
## 50.05556 41.98889
summary(lm(anx_neutr ~ gender, data=df))
##
## Call:
## lm(formula = anx_neutr ~ gender, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.0556 -8.9889 0.9444 7.9444 30.9444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.056 1.247 40.147 < 2e-16 ***
## genderm -8.067 1.763 -4.575 8.91e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.83 on 178 degrees of freedom
## Multiple R-squared: 0.1052, Adjusted R-squared: 0.1002
## F-statistic: 20.93 on 1 and 178 DF, p-value: 8.907e-06
# dummy coded - reference coding - women are reference group
dummy_ref <- rep(0, length(df$gender))
dummy_ref[df$gender == 'm'] <- 1
dummy_ref
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1
## [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [112] 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [149] 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
summary(lm(df$anx_neutr ~ dummy_ref))
##
## Call:
## lm(formula = df$anx_neutr ~ dummy_ref)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.0556 -8.9889 0.9444 7.9444 30.9444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.056 1.247 40.147 < 2e-16 ***
## dummy_ref -8.067 1.763 -4.575 8.91e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.83 on 178 degrees of freedom
## Multiple R-squared: 0.1052, Adjusted R-squared: 0.1002
## F-statistic: 20.93 on 1 and 178 DF, p-value: 8.907e-06
unique(model.matrix(lm(df$anx_neutr ~ dummy_ref)))
## (Intercept) dummy_ref
## 1 1 0
## 31 1 1
# dummy coded, contrast coding
dummy_effect <- rep(-1, length(df$gender))
dummy_effect[df$gender == 'm'] <- 1
dummy_effect
## [1] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
## [26] -1 -1 -1 -1 -1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [51] 1 1 1 1 1 1 1 1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
## [76] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 1 1 1 1 1 1 1 1 1
## [101] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 -1 -1 -1 -1 -1
## [126] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
## [151] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [176] 1 1 1 1 1
summary(lm(df$anx_neutr ~ dummy_effect))
##
## Call:
## lm(formula = df$anx_neutr ~ dummy_effect)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.0556 -8.9889 0.9444 7.9444 30.9444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46.0222 0.8816 52.201 < 2e-16 ***
## dummy_effect -4.0333 0.8816 -4.575 8.91e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.83 on 178 degrees of freedom
## Multiple R-squared: 0.1052, Adjusted R-squared: 0.1002
## F-statistic: 20.93 on 1 and 178 DF, p-value: 8.907e-06
#head(model.matrix(lm(df$anx_neutr ~ dummy_ref)))
unique(model.matrix(lm(df$anx_neutr ~ dummy_ref)))
## (Intercept) dummy_ref
## 1 1 0
## 31 1 1
t.test()
benutzt im Hintergrund also eine
Referenzcodierung.
Je nach Codierung müssen die Parameter anders interpretiert werden.
dd <- read.delim("https://md.psych.bio.uni-goettingen.de/mv/data/virt/v_stait_exam_classic.txt")[c('nr','gender','neutr', 'exam1', 'exam2')]
# we rename neutr to anx_neutr, it's state anxiety in neutral situation
dd <- dd %>% dplyr::rename(anx_neutr = neutr)
require(ggplot2)
# create a plot object, define dataframe to use, add gender to differ in colour already in base plot
#ggplot(dd, x=gender, y=av_anx_neutr, aes(nation, bmi, fill = gender)) +
ggplot(dd, x=gender, y=anx_neutr, aes(gender, anx_neutr, fill = gender)) +
stat_summary(fun.y=mean, geom="bar", position=position_dodge()) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", position=position_dodge(width=0.9), width=0.2)
## Warning: `fun.y` is deprecated. Use `fun` instead.
Kleinstes varianzanalytisches Messwiederholungsdesign. Erreicht über
paired=TRUE
in t.test()
.
Hier im Beispiel wird die neutrale Situation mit der Examenssituation verglichen.
# comparison of different ways to calculate t.test()
dd <- read.delim("https://md.psych.bio.uni-goettingen.de/mv/data/virt/v_stait_exam_classic.txt")[c('nr','gender','neutr', 'exam1', 'exam2')]
# we rename neutr to anx_neutr, it's state anxiety in neutral situation
dd <- dd %>% dplyr::rename(anx_neutr = neutr)
# alternatives to get the same t-test
t.test(dd$anx_neutr, dd$exam1, paired=TRUE)
##
## Paired t-test
##
## data: dd$anx_neutr and dd$exam1
## t = -24.749, df = 179, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.820321 -5.813012
## sample estimates:
## mean of the differences
## -6.316667
# create long format on the fly
dd.t <- data.frame(
nr = 1:2*length(dd$nr),
gender = factor(c(dd$gender, dd$gender), levels = c(1,2), labels = c("f", "m")),
sit = c(rep('anx_neutr', length(dd$nr)), rep('exam1', length(dd$nr))),
anx = c(dd$anx_neutr, dd$exam1)
)
head(dd.t)
## nr gender sit anx
## 1 180 <NA> anx_neutr 31
## 2 360 <NA> anx_neutr 52
## 3 180 <NA> anx_neutr 39
## 4 360 <NA> anx_neutr 59
## 5 180 <NA> anx_neutr 61
## 6 360 <NA> anx_neutr 36
# same test, now data in long format but t.test is for dependent groups
t.test(dd.t$anx[dd.t$sit == 'anx_neutr'], dd.t$anx[dd.t$sit == 'exam1'], paired=TRUE)
##
## Paired t-test
##
## data: dd.t$anx[dd.t$sit == "anx_neutr"] and dd.t$anx[dd.t$sit == "exam1"]
## t = -24.749, df = 179, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.820321 -5.813012
## sample estimates:
## mean of the differences
## -6.316667
t.test(anx ~ sit, data=dd.t, paired=TRUE)
##
## Paired t-test
##
## data: anx by sit
## t = -24.749, df = 179, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.820321 -5.813012
## sample estimates:
## mean of the differences
## -6.316667
# visualization
require(ggplot2)
# create a plot object, define dataframe to use, add gender to differ in colour already in base plot
#ggplot(dd, x=gender, y=av_neutr, aes(nation, bmi, fill = gender)) +
ggplot(dd.t, x=sit, y=anx, aes(sit, anx, fill = sit)) +
stat_summary(fun.y=mean, geom="bar", position=position_dodge()) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", position=position_dodge(width=0.9), width=0.2)
## Warning: `fun.y` is deprecated. Use `fun` instead.
Drei Gruppen miteinander vergleichen hinsichtlich der State-Angst in einer neutralen Situation (Statistik-Tutorium):
Kategoriale Variablen sind in R vom Variablentyp factor
.
Umwandlung ist möglich durch den Befehl as.factor()
. Mehr
als zweistufige Faktoren werden bei varianzanalytischen Modellen
implizit in n-1 Dummy-Variablen umgewandelt. Voreinstellung ist die
Referenz-Kodierung. Referenz ist Faktorstufe mit dem alphabetisch ersten
Namen (Klassen- bzw. Subgruppen-Label). Mit `model.matrix()´ können wir
uns die Designmatrix ansehen, die implizit generiert und verwendet
wird.
# get data
dd <- read.delim("https://md.psych.bio.uni-goettingen.de/mv/data/virt/v_stait_exam_wide.txt")[c('subj','gender','group', 'neutr')]
# we rename neutr to anx_neutr, it's state anxiety in neutral situation
dd <- dd %>% dplyr::rename(anx_neutr = neutr)
head(dd)
## subj gender group anx_neutr
## 1 1 f av 42
## 2 2 f av 59
## 3 3 f av 49
## 4 4 f av 20
## 5 5 f av 45
## 6 6 f av 39
# descriptives using summarySE
require(Rmisc)
## Lade nötiges Paket: Rmisc
## Lade nötiges Paket: lattice
## Lade nötiges Paket: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attache Paket: 'plyr'
## Die folgenden Objekte sind maskiert von 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## Das folgende Objekt ist maskiert 'package:purrr':
##
## compact
Rmisc::summarySE(data=dd, "anx_neutr")
## .id N anx_neutr sd se ci
## 1 <NA> 180 43.84444 14.96694 1.11557 2.20136
Rmisc::summarySE(data=dd, "anx_neutr", groupvars="group")
## group N anx_neutr sd se ci
## 1 av 60 43.48333 12.34324 1.593506 3.188598
## 2 hi 60 54.51667 13.18897 1.702688 3.407072
## 3 lo 60 33.53333 11.36821 1.467629 2.936720
# grouping variables in R must be of class factor. We check that:
class(dd$group)
## [1] "character"
# aov() is a wrapper function for lm(), it's summary is more comparable to the 'usual' informations on ANOVAs
m.anx <- aov(anx_neutr ~ group, data=dd)
m.anx
## Call:
## aov(formula = anx_neutr ~ group, data = dd)
##
## Terms:
## group Residuals
## Sum of Squares 13220.74 26876.90
## Deg. of Freedom 2 177
##
## Residual standard error: 12.32262
## Estimated effects may be unbalanced
# get the global F-Test
summary(m.anx)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 13221 6610 43.53 4.21e-16 ***
## Residuals 177 26877 152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# we can see that aov() actually is a wrapper for lm()
summary(aov(lm(anx_neutr ~ group, data=dd)))
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 13221 6610 43.53 4.21e-16 ***
## Residuals 177 26877 152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# check means
require(effects)
## Lade nötiges Paket: effects
## Lade nötiges Paket: carData
## Use the command
## lattice::trellis.par.set(effectsTheme())
## to customize lattice options for effects plots.
## See ?effectsTheme for details.
effects::allEffects(m.anx)
## model: anx_neutr ~ group
##
## group effect
## group
## av hi lo
## 43.48333 54.51667 33.53333
# weights or coefficients, express the distance from reference group
m.anx$coefficients
## (Intercept) grouphi grouplo
## 43.48333 11.03333 -9.95000
# two coefficients for group because it has 3 levels due to dummy coding
# we can check dummy coding by inspecting the model.matrix()
unique(model.matrix(m.anx))
## (Intercept) grouphi grouplo
## 1 1 0 0
## 61 1 0 1
## 121 1 1 0
# visualisation via plot
plot(allEffects(m.anx), ask=FALSE)
# visualization ggplot
require(ggplot2)
ggplot(dd, x=group, y=anx_neutr, aes(group, anx_neutr, fill = group)) +
#scale_fill_manual(values=c_colors) +
stat_summary(fun.y=mean, geom="bar") +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width=0.2) + # width refers to whiskers
# put points on top of bars
stat_summary(fun.data = mean_cl_normal, geom = "point", shape=21, size=5) +
# add verbose axis labels
labs(x = "anxiety group", y = "STAI state anxiety")
## Warning: `fun.y` is deprecated. Use `fun` instead.
# we may also use predefined custom colors using scale_fill_manual()
c_colors <- c("#66ff66", "#ff6666", "#6666ff")
ggplot(dd, x=group, y=anx_neutr, aes(group, anx_neutr, fill = group)) +
scale_fill_manual(values=c_colors) +
stat_summary(fun.y=mean, geom="bar") +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width=0.2) + # width refers to whiskers
# put points on top of bars
stat_summary(fun.data = mean_cl_normal, geom = "point", shape=21, size=5) +
# add verbose axis labels
labs(x = "anxiety group", y = "STAI state anxiety")
## Warning: `fun.y` is deprecated. Use `fun` instead.
detach("package:Rmisc", unload=TRUE)
Der Levene-Test prüft die Homogenität der Varianzen über die
Faktorstufen. Bei Verletzung dieser Voraussetzung hilft Welch’s F ratio
oneway.test()
# get data
dd <- read.delim("https://md.psych.bio.uni-goettingen.de/mv/data/virt/v_stait_exam_wide.txt")[c('subj','gender','group', 'neutr')]
# we rename neutr to anx_neutr, it's state anxiety in neutral situation
dd <- dd %>% dplyr::rename(anx_neutr = neutr)
# require(psych)
# psych::describe(dd$anx_neutr)
# by(dd$anx_neutr, list(dd$group), psych::describe)
# descriptives using summarySE
require(Rmisc)
## Lade nötiges Paket: Rmisc
Rmisc::summarySE(data=dd, "anx_neutr")
## .id N anx_neutr sd se ci
## 1 <NA> 180 43.84444 14.96694 1.11557 2.20136
Rmisc::summarySE(data=dd, "anx_neutr", groupvars="group")
## group N anx_neutr sd se ci
## 1 av 60 43.48333 12.34324 1.593506 3.188598
## 2 hi 60 54.51667 13.18897 1.702688 3.407072
## 3 lo 60 33.53333 11.36821 1.467629 2.936720
# test the differences in variability
require(car)
## Lade nötiges Paket: car
##
## Attache Paket: 'car'
## Das folgende Objekt ist maskiert 'package:dplyr':
##
## recode
## Das folgende Objekt ist maskiert 'package:purrr':
##
## some
car::leveneTest(dd$anx_neutr, dd$group)
## Warning in leveneTest.default(dd$anx_neutr, dd$group): dd$group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 0.7889 0.4559
## 177
# if there are problems with heterogenity of variances
# compare to 'normal' test
summary(aov(anx_neutr ~ group, data=dd))
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 13221 6610 43.53 4.21e-16 ***
## Residuals 177 26877 152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(aov(anx_neutr ~ group, data=dd))
## Analysis of Variance Table
##
## Response: anx_neutr
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 13221 6610.4 43.533 4.208e-16 ***
## Residuals 177 26877 151.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# use oneway - Welch's F ratio
oneway.test(anx_neutr ~group, data=dd)
##
## One-way analysis of means (not assuming equal variances)
##
## data: anx_neutr and group
## F = 43.455, num df = 2.00, denom df = 117.56, p-value = 7.43e-15
Problem multiplen Testens und damit Alpha-Inflation. Korrektur des Alpha-Niveaus nach verschiedenen Methoden:
Bonferroni, Holm, Benjamini-Hochberg etc. help(p.adjust)
Verschiedene Methoden unterscheiden sich in Bezug auf die Menge der
durchzuführenden Post-Hoc-Tests, Power, wie konservativ sie sind
etc.
# get data
# dd <- read.delim("https://md.psych.bio.uni-goettingen.de/mv/data/virt/v_stait_exam_wide.txt")[c('subj','gender','group', 'neutr')]
dd <- readr::read_tsv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/v_stait_exam_wide.txt")[c('subj','gender','group', 'neutr')]
## Rows: 180 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): gender, group
## dbl (4): subj, neutr, exam1, exam2
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# we rename neutr to anx_neutr, it's state anxiety in neutral situation
dd <- dd %>% dplyr::rename(anx_neutr = neutr)
dd$group <- factor(dd$group)
# do multiple comparisons with bonferroni correction of alpha
pairwise.t.test(dd$anx_neutr, dd$group, p.adjust.method = "bonferroni")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: dd$anx_neutr and dd$group
##
## av hi
## hi 6.3e-06 -
## lo 5.1e-05 < 2e-16
##
## P value adjustment method: bonferroni
# Tukey and Dunnett
require(multcomp)
## Lade nötiges Paket: multcomp
## Lade nötiges Paket: mvtnorm
## Lade nötiges Paket: survival
## Lade nötiges Paket: TH.data
## Lade nötiges Paket: MASS
##
## Attache Paket: 'MASS'
##
## Das folgende Objekt ist maskiert 'package:dplyr':
##
## select
##
##
## Attache Paket: 'TH.data'
##
## Das folgende Objekt ist maskiert 'package:MASS':
##
## geyser
m.anx <- aov(anx_neutr ~ group, data=dd)
post.hocs <- multcomp::glht(m.anx, linfct = mcp(group = "Tukey"))
summary(post.hocs)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = anx_neutr ~ group, data = dd)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## hi - av == 0 11.03 2.25 4.904 < 1e-05 ***
## lo - av == 0 -9.95 2.25 -4.423 4.97e-05 ***
## lo - hi == 0 -20.98 2.25 -9.327 < 1e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
confint(post.hocs)
##
## Simultaneous Confidence Intervals
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = anx_neutr ~ group, data = dd)
##
## Quantile = 2.3634
## 95% family-wise confidence level
##
##
## Linear Hypotheses:
## Estimate lwr upr
## hi - av == 0 11.0333 5.7162 16.3505
## lo - av == 0 -9.9500 -15.2671 -4.6329
## lo - hi == 0 -20.9833 -26.3005 -15.6662
# Dunnett, base specifies group to compare with
# post.hocs <- glht(m.anx, linfct = mcp(group = "Dunnett"), base=1)
post.hocs <- glht(m.anx, linfct = mcp(group = "Dunnett"))
summary(post.hocs)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Dunnett Contrasts
##
##
## Fit: aov(formula = anx_neutr ~ group, data = dd)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## hi - av == 0 11.03 2.25 4.904 4.22e-06 ***
## lo - av == 0 -9.95 2.25 -4.423 3.38e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
confint(post.hocs)
##
## Simultaneous Confidence Intervals
##
## Multiple Comparisons of Means: Dunnett Contrasts
##
##
## Fit: aov(formula = anx_neutr ~ group, data = dd)
##
## Quantile = 2.23
## 95% family-wise confidence level
##
##
## Linear Hypotheses:
## Estimate lwr upr
## hi - av == 0 11.0333 6.0164 16.0503
## lo - av == 0 -9.9500 -14.9669 -4.9331
Zur Vergleichbarkeit des varianzanalytischen mit dem regressionsanalytischen Ansatzes braucht man die Dummy-Codierungen. Es werden immer Faktorstufen - 1 Dummy-Variablen gebraucht, um einen Faktor zu kodieren.
dd <- read.delim("https://md.psych.bio.uni-goettingen.de/mv/data/virt/v_stait_exam_wide.txt")[c('subj','gender','group', 'neutr')]
# we rename neutr to anx_neutr, it's state anxiety in neutral situation
dd <- dd %>% dplyr::rename(anx_neutr = neutr)
dd.t <- dd
# do reference coding. reference group is 'normal students'. we need 2 dummy variables d.hi and d.lo
dd.t$d.hi <- 0
dd.t$d.lo <- 0
# low group contrasted in dummy var 2
dd.t$d.lo[dd.t$group == 'lo'] <- 1
# hi group contrasted in dummy var 1
dd.t$d.hi[dd.t$group == 'hi'] <- 1
# the default model
m.d1 <- aov(anx_neutr ~ group, data=dd.t)
# the reference contrast model
m.d2 <- aov(anx_neutr ~ d.hi + d.lo, data=dd.t)
# now we do it with lm()
m.d3 <- lm(anx_neutr ~ d.hi + d.lo, data=dd.t)
# get the global F-Test
summary(m.d1)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 13221 6610 43.53 4.21e-16 ***
## Residuals 177 26877 152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m.d2)
## Df Sum Sq Mean Sq F value Pr(>F)
## d.hi 1 10251 10251 67.51 4.29e-14 ***
## d.lo 1 2970 2970 19.56 1.70e-05 ***
## Residuals 177 26877 152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m.d3)
##
## Call:
## lm(formula = anx_neutr ~ d.hi + d.lo, data = dd.t)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.533 -7.779 0.483 7.754 34.483
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43.483 1.591 27.334 < 2e-16 ***
## d.hi 11.033 2.250 4.904 2.11e-06 ***
## d.lo -9.950 2.250 -4.423 1.70e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.32 on 177 degrees of freedom
## Multiple R-squared: 0.3297, Adjusted R-squared: 0.3221
## F-statistic: 43.53 on 2 and 177 DF, p-value: 4.208e-16
coefficients(m.d1)
## (Intercept) grouphi grouplo
## 43.48333 11.03333 -9.95000
coefficients(m.d2)
## (Intercept) d.hi d.lo
## 43.48333 11.03333 -9.95000
coefficients(m.d3)
## (Intercept) d.hi d.lo
## 43.48333 11.03333 -9.95000
Faktoren haben in R spezielle Eigenschaften und werden in
statistischen Modellen entsprechend behandelt. Faktoren haben Stufen
(level
), also eine bestimmte Anzahl unterschiedlicher
Ausprägungen, die sich über die Beobachtungen weg wiederholen.
Faktorstufen ordnen Beobachtungen Untergruppen zu.
read.table()
mit seinen Unterbefehlen
read.delim()
oder read.csv()
machen beim
Einlesen einen “best guess”, wenn sie Spalten vorfinden, die
Zeichenketten (str
) enthalten. Wiederholen sich
Ausprägungen, wird direkt der Datentyp factor
angenommen
und gesetzt. Tibbles, eingelesen via read_tsv()
,
read_csv()
aus der library(tidyverse)
setzen
bei Spalten mit Zeichenketten immer den Datentyp str
. Die
ggf. notwendige Umwandlung in den Typ factor
muss explizit
erfolgen.
Faktorstufen haben eine Reihenfolge. Wenn nicht näher angegeben, ordnet R die Faktorstufen alphabetisch. Die Reihenfolge der Faktorstufen hat Auswirkungen auf die Schätzung der Parameter in statistischen Modellen.
Die Reihenfolge kann explizit gesetzt werden über den Parameter
levels = c(...)
bei der Umwandlung in Faktoren. Mit dem
Parameter labels = c(...)
können wir die Faktorstufen auch
neu benennen. levels
kann auch einfach einen Zahlenvektor
bekommen und richtet sich dann nach der impliziten alphabetischen
Reihenfolge.
Faktoren können auch ordered sein, was durch den Parameter
ordered = ...
gesetzt wird. Dies gibt den Faktorstufen eine
Reihenfolge, die Rangskalenniveau impliziert. Bei der Ausgabe eines
ordered Faktors werden dann auch <
zwischen die
Faktorstufen gesetzt.
require(tidyverse)
dd <- read_tsv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/v_stait_exam_wide.txt")[c('subj','gender','group', 'neutr')]
## Rows: 180 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): gender, group
## dbl (4): subj, neutr, exam1, exam2
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# we rename neutr to anx_neutr, it's state anxiety in neutral situation
dd <- dd %>% dplyr::rename(anx_neutr = neutr)
dd <- dd %>% arrange(desc(anx_neutr))
# we inspect group
str(dd)
## tibble [180 × 4] (S3: tbl_df/tbl/data.frame)
## $ subj : num [1:180] 143 141 23 133 21 147 122 139 156 132 ...
## $ gender : chr [1:180] "f" "f" "f" "f" ...
## $ group : chr [1:180] "hi" "hi" "av" "hi" ...
## $ anx_neutr: num [1:180] 89 86 76 75 73 73 72 72 72 68 ...
head(dd$group)
## [1] "hi" "hi" "av" "hi" "av" "hi"
# obviously, read_tsv sets gender and group to type chr
# if we want to use these in models, they have to be of type factor
# let's experiment with group
# default conversion, alphabetical order of factor levels
dd$group.fu <- factor(dd$group)
# if we want to have specific order of factor levels, we can set that using ordered
dd$group.f1 <- factor(dd$group, levels = c('lo', 'av', 'hi'))
# we can also change an existing factor's order and label it more verbos
dd$group.f2 <- factor(dd$group.f1, levels = c('av', 'lo', 'hi'), labels = c('average', 'low', 'high'))
# we take a look at the results
head(dd)
## # A tibble: 6 × 7
## subj gender group anx_neutr group.fu group.f1 group.f2
## <dbl> <chr> <chr> <dbl> <fct> <fct> <fct>
## 1 143 f hi 89 hi hi high
## 2 141 f hi 86 hi hi high
## 3 23 f av 76 av av average
## 4 133 f hi 75 hi hi high
## 5 21 f av 73 av av average
## 6 147 f hi 73 hi hi high
# we see <ord> as column type in our tibble
head(dd$group)
## [1] "hi" "hi" "av" "hi" "av" "hi"
head(dd$group.f1, n=20)
## [1] hi hi av hi av hi hi hi hi hi hi hi hi hi hi hi hi hi av hi
## Levels: lo av hi
head(dd$group.f2, n=10)
## [1] high high average high average high high high high
## [10] high
## Levels: average low high
# the changed order is also reflected in
as.numeric(head(dd$group.fu, n=30))
## [1] 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 3 1 2 1 1 2 2
as.numeric(head(dd$group.f1, n=30))
## [1] 3 3 2 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 2 3 3 1 2 3 2 2 3 3
as.numeric(head(dd$group.f2, n=30))
## [1] 3 3 1 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 1 3 1 3 3 2 1 3 1 1 3 3
# if we use these factors now in linear models, they differ
summary(lm(anx_neutr ~ group.fu, data = dd))
##
## Call:
## lm(formula = anx_neutr ~ group.fu, data = dd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.533 -7.779 0.483 7.754 34.483
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43.483 1.591 27.334 < 2e-16 ***
## group.fuhi 11.033 2.250 4.904 2.11e-06 ***
## group.fulo -9.950 2.250 -4.423 1.70e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.32 on 177 degrees of freedom
## Multiple R-squared: 0.3297, Adjusted R-squared: 0.3221
## F-statistic: 43.53 on 2 and 177 DF, p-value: 4.208e-16
summary(lm(anx_neutr ~ group.f1, data = dd))
##
## Call:
## lm(formula = anx_neutr ~ group.f1, data = dd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.533 -7.779 0.483 7.754 34.483
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.533 1.591 21.079 < 2e-16 ***
## group.f1av 9.950 2.250 4.423 1.7e-05 ***
## group.f1hi 20.983 2.250 9.327 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.32 on 177 degrees of freedom
## Multiple R-squared: 0.3297, Adjusted R-squared: 0.3221
## F-statistic: 43.53 on 2 and 177 DF, p-value: 4.208e-16
summary(lm(anx_neutr ~ group.f2, data = dd))
##
## Call:
## lm(formula = anx_neutr ~ group.f2, data = dd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.533 -7.779 0.483 7.754 34.483
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43.483 1.591 27.334 < 2e-16 ***
## group.f2low -9.950 2.250 -4.423 1.70e-05 ***
## group.f2high 11.033 2.250 4.904 2.11e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.32 on 177 degrees of freedom
## Multiple R-squared: 0.3297, Adjusted R-squared: 0.3221
## F-statistic: 43.53 on 2 and 177 DF, p-value: 4.208e-16
# coefficients reflect the different sequence, the reference group is different
lm(anx_neutr ~ group.fu, data = dd)$coefficients
## (Intercept) group.fuhi group.fulo
## 43.48333 11.03333 -9.95000
lm(anx_neutr ~ group.f1, data = dd)$coefficients
## (Intercept) group.f1av group.f1hi
## 33.53333 9.95000 20.98333
lm(anx_neutr ~ group.f2, data = dd)$coefficients
## (Intercept) group.f2low group.f2high
## 43.48333 -9.95000 11.03333
# we also see the differences in the model.matrix()
head(model.matrix(lm(anx_neutr ~ group.fu, data = dd)))
## (Intercept) group.fuhi group.fulo
## 1 1 1 0
## 2 1 1 0
## 3 1 0 0
## 4 1 1 0
## 5 1 0 0
## 6 1 1 0
head(model.matrix(lm(anx_neutr ~ group.f1, data = dd)))
## (Intercept) group.f1av group.f1hi
## 1 1 0 1
## 2 1 0 1
## 3 1 1 0
## 4 1 0 1
## 5 1 1 0
## 6 1 0 1
head(model.matrix(lm(anx_neutr ~ group.f2, data = dd)))
## (Intercept) group.f2low group.f2high
## 1 1 0 1
## 2 1 0 1
## 3 1 0 0
## 4 1 0 1
## 5 1 0 0
## 6 1 0 1
require(psych)
## Lade nötiges Paket: psych
##
## Attache Paket: 'psych'
## Das folgende Objekt ist maskiert 'package:car':
##
## logit
## Die folgenden Objekte sind maskiert von 'package:ggplot2':
##
## %+%, alpha
require(effects)
# get data
dd <- read.delim("https://md.psych.bio.uni-goettingen.de/mv/data/virt/v_stait_exam_wide.txt")[c('subj','gender','group', 'neutr')]
# we rename neutr to anx_neutr, it's state anxiety in neutral situation
dd <- dd %>% dplyr::rename(anx_neutr = neutr)
#psych::describe(dd)
dd.t <- dd
# do reference coding. reference group is 'normal students'. we need 2 dummy variables dr1 and dr2
dd.t$dr1 <- 0
dd.t$dr2 <- 0
# low group contrasted in dummy var 2
dd.t$dr2[dd.t$group == 'lo'] <- 1
# hi group contrasted in dummy var 1
dd.t$dr1[dd.t$group == 'hi'] <- 1
# the default model
m.anx <- aov(anx_neutr ~ group, data=dd.t)
# contrast matrix to see the dummy coding
unique(model.matrix(m.anx))
## (Intercept) grouphi grouplo
## 1 1 0 0
## 61 1 0 1
## 121 1 1 0
# the reference contrast model
m.anx.r <- aov(anx_neutr ~ dr1 + dr2, data=dd.t)
unique(model.matrix(m.anx.r))
## (Intercept) dr1 dr2
## 1 1 0 0
## 61 1 0 1
## 121 1 1 0
# get the global F-Test
summary(m.anx)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 13221 6610 43.53 4.21e-16 ***
## Residuals 177 26877 152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m.anx.r)
## Df Sum Sq Mean Sq F value Pr(>F)
## dr1 1 10251 10251 67.51 4.29e-14 ***
## dr2 1 2970 2970 19.56 1.70e-05 ***
## Residuals 177 26877 152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coefficients(m.anx)
## (Intercept) grouphi grouplo
## 43.48333 11.03333 -9.95000
coefficients(m.anx.r)
## (Intercept) dr1 dr2
## 43.48333 11.03333 -9.95000
# compare coefficients with means to see their meaning
allEffects(m.anx)
## model: anx_neutr ~ group
##
## group effect
## group
## av hi lo
## 43.48333 54.51667 33.53333
# show it
require(ggplot2)
ggplot(dd.t, x=group, y=anx_neutr, aes(group, anx_neutr, fill = group)) +
stat_summary(fun.y=mean, geom="bar") +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width=0.2) +
geom_hline(yintercept = c(coefficients(m.anx.r)[1]))
## Warning: `fun.y` is deprecated. Use `fun` instead.
# effect coding in contrasts
dd.t$de1 <- 0
dd.t$de2 <- 0
# one group gets -1 for all dummy variables, here group 'av'
dd.t$de1[dd.t$group == 'av'] <- -1
dd.t$de2[dd.t$group == 'av'] <- -1
# low group contrasted in dummy var 2
dd.t$de2[dd.t$group == 'lo'] <- 1
# hi group contrasted in dummy var 1
dd.t$de1[dd.t$group == 'hi'] <- 1
# the common model
m.anx <- aov(anx_neutr ~ group, data=dd.t)
unique(model.matrix(m.anx))
## (Intercept) grouphi grouplo
## 1 1 0 0
## 61 1 0 1
## 121 1 1 0
# the reference contrast model
m.anx.e <- aov(anx_neutr ~ de1 + de2, data=dd.t)
unique(model.matrix(m.anx.e))
## (Intercept) de1 de2
## 1 1 -1 -1
## 61 1 0 1
## 121 1 1 0
# get the global F-Test
summary(m.anx)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 13221 6610 43.53 4.21e-16 ***
## Residuals 177 26877 152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m.anx.e)
## Df Sum Sq Mean Sq F value Pr(>F)
## de1 1 3652 3652 24.05 2.11e-06 ***
## de2 1 9569 9569 63.02 2.27e-13 ***
## Residuals 177 26877 152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# show it
ggplot(dd.t, x=group, y=anx_neutr, aes(group, anx_neutr, fill = group)) +
stat_summary(fun.y=mean, geom="bar") +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width=0.2) +
geom_hline(yintercept = c(coefficients(m.anx.e)[1]))
## Warning: `fun.y` is deprecated. Use `fun` instead.
# the three models
coefficients(m.anx)
## (Intercept) grouphi grouplo
## 43.48333 11.03333 -9.95000
coefficients(m.anx.r)
## (Intercept) dr1 dr2
## 43.48333 11.03333 -9.95000
coefficients(m.anx.e)
## (Intercept) de1 de2
## 43.84444 10.67222 -10.31111
# compare coefficients of both, reference and effect dummy coding with means to see that they code means
allEffects(m.anx)
## model: anx_neutr ~ group
##
## group effect
## group
## av hi lo
## 43.48333 54.51667 33.53333
psych::describe(dd$anx_neutr)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 180 43.84 14.97 42 43.44 15.57 5 89 84 0.25 -0.06 1.12
# rm(dd.t)
Reference-Coding ist Default in R. Die Referenzgruppe wird in Abhängigkeit von der Benennung der Faktorstufen gewählt (alphabetisch geordnet). Dies hat Auswirkungen auf die Interpretation der Koeffizienten. Die Ordnung von Faktorstufen kann aber festgelegt werden, alphabetische Sortierung ist nur Default. Beispiele unten.
require(psych)
require(effects)
# get data
dd <- read.delim("https://md.psych.bio.uni-goettingen.de/mv/data/virt/v_stait_exam_wide.txt")[c('subj','gender','group', 'neutr')]
# we rename neutr to anx_neutr, it's state anxiety in neutral situation
dd <- dd %>% dplyr::rename(anx_neutr = neutr)
dd$group <- factor(dd$group)
dd$n_group <- factor(c(rep('mm', 60), rep('lo',60), rep('hi',60)))
# we use the alphabetic sorting and call levels accordingly
dd$g_group <- factor(c(rep('zz', 60), rep('alo',60), rep('zhi',60)))
m.1 <- aov(anx_neutr ~ group, data=dd)
m.2 <- aov(anx_neutr ~ n_group, data=dd)
m.3 <- aov(anx_neutr ~ g_group, data=dd)
m.1$coefficients
## (Intercept) grouphi grouplo
## 43.48333 11.03333 -9.95000
m.2$coefficients
## (Intercept) n_grouplo n_groupmm
## 54.51667 -20.98333 -11.03333
m.3$coefficients
## (Intercept) g_groupzhi g_groupzz
## 33.53333 20.98333 9.95000
# default coding in aov() is reference coding against the first group name using alphabetical order
summary(m.1)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 13221 6610 43.53 4.21e-16 ***
## Residuals 177 26877 152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m.2)
## Df Sum Sq Mean Sq F value Pr(>F)
## n_group 2 13221 6610 43.53 4.21e-16 ***
## Residuals 177 26877 152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m.3)
## Df Sum Sq Mean Sq F value Pr(>F)
## g_group 2 13221 6610 43.53 4.21e-16 ***
## Residuals 177 26877 152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# show it
require(ggplot2)
ggplot(dd, x=group, y=anx_neutr, aes(group, anx_neutr, fill = group)) +
stat_summary(fun.y=mean, geom="bar", position=position_dodge()) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", position=position_dodge(width=0.9), width=0.2) +
geom_hline(yintercept = c(coefficients(m.1)[1]))
## Warning: `fun.y` is deprecated. Use `fun` instead.
# n_group
ggplot(dd, x=n_group, y=anx_neutr, aes(n_group, anx_neutr, fill = n_group)) +
stat_summary(fun.y=mean, geom="bar", position=position_dodge()) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", position=position_dodge(width=0.9), width=0.2) +
geom_hline(yintercept = c(coefficients(m.2)[1]))
## Warning: `fun.y` is deprecated. Use `fun` instead.
# g_group
ggplot(dd, x=g_group, y=anx_neutr, aes(g_group, anx_neutr, fill = g_group)) +
stat_summary(fun.y=mean, geom="bar", position=position_dodge()) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", position=position_dodge(width=0.9), width=0.2) +
geom_hline(yintercept = c(coefficients(m.3)[1]))
## Warning: `fun.y` is deprecated. Use `fun` instead.
# we can redefine factor levels of existing factors to order levels due to our wishes
gr <- dd$group
# we have default alphabetical order
levels(gr)
## [1] "av" "hi" "lo"
# we can define one level to be the first by using command `relevel()`
gr <- relevel(gr, "lo")
levels(gr)
## [1] "lo" "av" "hi"
# we can also entirely define order of levels explicitly
levels(gr) <- c("hi", "lo", "av")
levels(gr)
## [1] "hi" "lo" "av"
Ein Paket zum komfortablen Berechnen von varianzanalytischen Modellen. Default-Einstellungen sind ähnlich zu denen anderer Statistikprogramme, z. B. Quadratsummen Typ III. Einfache Grafiken mit Streuungsinformationen zu den Gruppenmittelwerten. Achtung: Im Gegensatz zu anderen Statistikprogrammen braucht ´library(ez)` für Messwiederholungsmodelle das auch ansonsten übliche Long-Format. Siehe Unit long_wide
# get data
dd <- read.delim("https://md.psych.bio.uni-goettingen.de/mv/data/virt/v_stait_exam_wide.txt")[c('subj','gender','group', 'neutr')]
# we rename neutr to anx_neutr, it's state anxiety in neutral situation
dd <- dd %>% dplyr::rename(anx_neutr = neutr)
#head(dd)
require(ez)
## Lade nötiges Paket: ez
rr <- ezANOVA(dd, anx_neutr, subj, between=group)
## Warning: Converting "subj" to factor for ANOVA.
## Warning: Converting "group" to factor for ANOVA.
## Coefficient covariances computed by hccm()
# clearer with parameter names
ezANOVA(data=dd, dv=anx_neutr, wid=subj, between=group)
## Warning: Converting "subj" to factor for ANOVA.
## Warning: Converting "group" to factor for ANOVA.
## Coefficient covariances computed by hccm()
## $ANOVA
## Effect DFn DFd F p p<.05 ges
## 1 group 2 177 43.53314 4.207633e-16 * 0.3297137
##
## $`Levene's Test for Homogeneity of Variance`
## DFn DFd SSn SSd F p p<.05
## 1 2 177 88.41111 9918.167 0.7888941 0.4559379
# we can also get a plot using
ezPlot(dd, dv=anx_neutr, x=group, wid=subj, between = .(group))
## Warning: Converting "subj" to factor for ANOVA.
## Warning: Converting "group" to factor for ANOVA.
## Coefficient covariances computed by hccm()
detach("package:ez", unload=TRUE)
ges Generalized Eta-Squared measure of effect size (see in references below: Bakeman, 2005).
Effect Size Anova η2
Small | Medium | Large |
---|---|---|
0.01 | 0.06 | 0.14 |
siehe html
siehe lm_cat_factorial html
{} todo
siehe lm_cat_repeated html
siehe lm_cat_mixed html
siehe lm_cat_unbalanced html
Grundlagen finden sich in der Unit zu unbalanced designs. [lm_cat_unbal.html]
Rcmdr unterstützt einfaktorielle ANOVA unter ‘Statistik | Mittelwertvergleiche’ ‘Statistik | Mittelwerte vergleichen | Einfaktorielle Varianzanalyse’
Grafische Darstellung hiervon in Rcmdr nach Auswahl des berechneten Modells: ‘Modelle | wähle aktives Modell aus’ ‘Modelle | Grafiken | Effect plots’
Beispiele und Übungen finden sich bei den speziellen Unter-Units.
Eine Einführung
unter Nutzung des Befehls aov()
(base).
Viele Anova Designs incl. R-Code und Beispieldaten.
[Field (2012)][field] Kapitel comparing means
[field]: https://www.uk.sagepub.com/dsur/main.htm “Field et al (2012). Discovering Statistics using R”
lm_cat_factorial lm_cat_unbalanced lm_cat_repeated lm_cat_mixed
Sammlung R und Anova, Einstiegspunkte hier
continuous reaction variable, categorial explanatory variable(s)
group membership is the predictive variable, it has to be of type
factor()
(Sub-)Group mean is estimated reaction variable
parameters reflect group means or differences of group means or distances of group means to some other value f. e. grand mean
factors and models
factor(..., levels = c(...))
factor()
: in reference
coding the first level is the reference group“classical” ANOVA designs report sums of squares (SS) and omnibus tests with more than 2 levels in an explanatory variable, we don’t know which sub-differences are significant
aov()
and car::Anova()
applied to a
lm()
result object provide ANOVA summary tables with SS and
omnibus-tests for factors
more than 2 levels of an eplanatory factor variable are split in dummy variable (levels - 1)
using model.matrix()
, even better combined with
unique()
shows us the dummy variables and their
coding
this is, why we have p-1 or q-1 df when we look at significance tests in categorial explanatory variables designs
cell size differences matter unbalanced designs
Meta-Packages oder Wrapper-Packages Eine komfortable Art für
“klassische” varianzanalytische Messwiederholungs- und Mixed-Designs
bieten Meta-Packages wie library(ez)
, die pipe-friendly
library(rstatx)
oder `library(afex) mit lme4 Inclusion und
flexiblen Effektstärken etc.
dd <- readr::read_tsv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/v_stait_exam_wide.txt")
## Rows: 180 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): gender, group
## dbl (4): subj, neutr, exam1, exam2
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# we rename neutr to anx_neutr, it's state anxiety in neutral situation
dd <- dd %>% dplyr::rename(anx_neutr = neutr)
s.subj <- sample(dd$subj)
ggplot2::ggplot(dd, aes(x=s.subj, y=anx_neutr)) + geom_point() + geom_hline(yintercept=mean(dd$anx_neutr))
ggplot2::ggplot(dd, aes(x=s.subj, y=anx_neutr, color = group)) + geom_point() +
stat_summary(fun = mean, aes(x = 1, yintercept = ..y.., group = group), geom = "hline")
dd$f.group <- factor(dd$group, levels=c("lo", "av", "hi"))
mm <- lm(anx_neutr ~ f.group, data=dd)
unique(model.matrix(mm))
## (Intercept) f.groupav f.grouphi
## 1 1 1 0
## 61 1 0 0
## 121 1 0 1
summary(mm)
##
## Call:
## lm(formula = anx_neutr ~ f.group, data = dd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.533 -7.779 0.483 7.754 34.483
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.533 1.591 21.079 < 2e-16 ***
## f.groupav 9.950 2.250 4.423 1.7e-05 ***
## f.grouphi 20.983 2.250 9.327 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.32 on 177 degrees of freedom
## Multiple R-squared: 0.3297, Adjusted R-squared: 0.3221
## F-statistic: 43.53 on 2 and 177 DF, p-value: 4.208e-16
# aov gives us the SS
aov(mm)
## Call:
## aov(formula = mm)
##
## Terms:
## f.group Residuals
## Sum of Squares 13220.74 26876.90
## Deg. of Freedom 2 177
##
## Residual standard error: 12.32262
## Estimated effects may be unbalanced
# package car's Anova gives us the classical SS omnibus tests and moreover Types of SS if we want
car::Anova(mm, type=3)
## Anova Table (Type III tests)
##
## Response: anx_neutr
## Sum Sq Df F value Pr(>F)
## (Intercept) 67469 1 444.323 < 2.2e-16 ***
## f.group 13221 2 43.533 4.208e-16 ***
## Residuals 26877 177
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1