Chapter 13 Formatumwandlung long_wide
Zur Behandlung von Messwiederholungsdaten. Auf dem Weg zu “tidy data”.
Es gibt mehrere Ansätze zur Umwandlung zwischen long und wide Data.
Für das Tidyverse konzentrieren wir uns hier auf library(tidyr)
und hier auf die Befehle tidyr::pivot_longer()
und tidyr::pivot_wider()
.
13.1 Grundidee
subj: Versuchsperson gender: ‘f’, ‘m’ t1: AV Zeitpunkt 1 t2: AV Zeitpunkt 2
# wide format
<- data.frame(
dd.w subj = c(1,2,3,4),
gender = c('f', 'f', 'm', 'm'),
v_t1 = c(4, 5, 7, 6),
v_t2 = c(6, 5, 9, 8)
)getwd()
## [1] "/Users/pzezula/ownCloud/lehre_ss_2022/unit/b_data_mangling"
::write_csv(dd, "long_wide_v.csv")
readr<- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/long_wide_v.csv") dd
## Rows: 12 Columns: 7
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): gender
## dbl (6): subj, age, grp, v1, v2, v3
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# conversion to long format
require(tidyr)
<- dd.w %>% tidyr::gather(key=time, value=v, v_t1, v_t2)
df.l # or even shorter: key and value can be used as positional parameters too
<- dd.w %>% tidyr::gather(time, v, v_t1, v_t2)
df.l # we want to have all repeated measures for each person in consecutive lines
%>% dplyr::arrange(subj, time) -> df.l
df.l
# data in wide format
dd.w
## subj gender v_t1 v_t2
## 1 1 f 4 6
## 2 2 f 5 5
## 3 3 m 7 9
## 4 4 m 6 8
# same data in long format
df.l
## subj gender time v
## 1 1 f v_t1 4
## 2 1 f v_t2 6
## 3 2 f v_t1 5
## 4 2 f v_t2 5
## 5 3 m v_t1 7
## 6 3 m v_t2 9
## 7 4 m v_t1 6
## 8 4 m v_t2 8
# ... we notice that variables subj and gender are kept and adequately repeated
# a dataframe with two repeated measures variables
<- data.frame(
dd.w subj = c(1,2,3,4),
gender = c('f', 'f', 'm', 'm'),
v_t1 = c(4, 5, 7, 6),
v_t2 = c(6, 5, 9, 8),
w_t1 = c(5, 6, 8, 7),
w_t2 = c(3, 4, 6, 5)
)getwd()
## [1] "/Users/pzezula/ownCloud/lehre_ss_2022/unit/b_data_mangling"
::write_csv(dd.w, "long_wide_vw.csv")
readr# can be read as
<- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/long_wide_vw.csv") dd.w
## Rows: 4 Columns: 6
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): gender
## dbl (5): subj, v_t1, v_t2, w_t1, w_t2
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
13.2 Base R
13.2.1 Wide zu long mit Slicing-Techniken
require(tidyverse)
<- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/long_wide_v.csv") dd
## Rows: 12 Columns: 7
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): gender
## dbl (6): subj, age, grp, v1, v2, v3
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# we create a dataframe of t1 data
.1 <- dd[, c('subj', 'gender', 'v1')]
dd# we create column y with outcome values
.1['y'] <- dd.1$v1
dd# ... and erase old column t1
.1$v1 <- NULL
dd# and add t1 info
.1['time'] <- 't1'
dd
# the same with t2 data
.2 <- dd[, c('subj', 'gender', 'v2')]
dd# and add t2 info
.2['time'] <- 't2'
dd# we create column y with outcome values
.2['y'] <- dd.2$v2
dd# we delete column t2
.2$v2 <- NULL
dd
# we now merge the two parts
<- rbind(dd.1, dd.2)
dd.l
# wide
dd
## # A tibble: 12 × 7
## subj gender age grp v1 v2 v3
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 f 17 1 9 16 8
## 2 2 f 33 2 67 73 66
## 3 3 f 47 3 86 87 91
## 4 4 f 10 1 64 68 67
## 5 5 f 21 2 40 46 44
## 6 6 f 30 3 26 34 28
## 7 7 m 51 1 64 66 64
## 8 8 m 13 2 61 66 64
## 9 9 m 17 3 67 67 67
## 10 10 m 25 1 38 36 35
## 11 11 m 33 2 22 25 21
## 12 12 m 27 3 81 86 81
# long
dd.l
## # A tibble: 24 × 4
## subj gender y time
## <dbl> <chr> <dbl> <chr>
## 1 1 f 9 t1
## 2 2 f 67 t1
## 3 3 f 86 t1
## 4 4 f 64 t1
## 5 5 f 40 t1
## 6 6 f 26 t1
## 7 7 m 64 t1
## 8 8 m 61 t1
## 9 9 m 67 t1
## 10 10 m 38 t1
## # … with 14 more rows
# we delete temporary data
rm(dd.1, dd.2)
# {} todo two repeated measures variables, long_wide_vw.csv
13.3 Tidyverse: tidyr::pivot_longer()
und tidyr::pivot_wider()
require(tidyverse)
<- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/long_wide_vw.csv") dd.w
## Rows: 4 Columns: 6
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): gender
## dbl (5): subj, v_t1, v_t2, w_t1, w_t2
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# we convert v values only
%>% tidyr::pivot_longer(
dd.w cols = starts_with("v_"),
names_to = "tv",
values_to = "vl"
)
## # A tibble: 8 × 6
## subj gender w_t1 w_t2 tv vl
## <dbl> <chr> <dbl> <dbl> <chr> <dbl>
## 1 1 f 5 3 v_t1 4
## 2 1 f 5 3 v_t2 6
## 3 2 f 6 4 v_t1 5
## 4 2 f 6 4 v_t2 5
## 5 3 m 8 6 v_t1 7
## 6 3 m 8 6 v_t2 9
## 7 4 m 7 5 v_t1 6
## 8 4 m 7 5 v_t2 8
# we convert v and w values
%>%
dd.w ::pivot_longer(
tidyrcols = starts_with("v_"),
names_to = "tv",
values_to = "vl"
%>%
) ::pivot_longer(
tidyrcols = starts_with("w_"),
names_to = "tw",
values_to = "wl"
)
## # A tibble: 16 × 6
## subj gender tv vl tw wl
## <dbl> <chr> <chr> <dbl> <chr> <dbl>
## 1 1 f v_t1 4 w_t1 5
## 2 1 f v_t1 4 w_t2 3
## 3 1 f v_t2 6 w_t1 5
## 4 1 f v_t2 6 w_t2 3
## 5 2 f v_t1 5 w_t1 6
## 6 2 f v_t1 5 w_t2 4
## 7 2 f v_t2 5 w_t1 6
## 8 2 f v_t2 5 w_t2 4
## 9 3 m v_t1 7 w_t1 8
## 10 3 m v_t1 7 w_t2 6
## 11 3 m v_t2 9 w_t1 8
## 12 3 m v_t2 9 w_t2 6
## 13 4 m v_t1 6 w_t1 7
## 14 4 m v_t1 6 w_t2 5
## 15 4 m v_t2 8 w_t1 7
## 16 4 m v_t2 8 w_t2 5
# ... every v is repeated for w values
# if v and w were the same value from different occasions and 1 or 2 specify time
%>%
dd.w ::pivot_longer(
tidyrcols = v_t1:w_t2,
names_to = c("occasion", "time"),
names_sep = "_",
values_to = "score"
)
## # A tibble: 16 × 5
## subj gender occasion time score
## <dbl> <chr> <chr> <chr> <dbl>
## 1 1 f v t1 4
## 2 1 f v t2 6
## 3 1 f w t1 5
## 4 1 f w t2 3
## 5 2 f v t1 5
## 6 2 f v t2 5
## 7 2 f w t1 6
## 8 2 f w t2 4
## 9 3 m v t1 7
## 10 3 m v t2 9
## 11 3 m w t1 8
## 12 3 m w t2 6
## 13 4 m v t1 6
## 14 4 m v t2 8
## 15 4 m w t1 7
## 16 4 m w t2 5
13.4 Referenzen
Vignette Pivoting
Beispiele und Erklärungen: Unit long wide
Beispiele und Erklärungen: Unit transformation base
Beispiele und Erklärungen: Unit transformation dplyr