Chapter 13 Formatumwandlung long_wide
Zur Behandlung von Messwiederholungsdaten. Auf dem Weg zu “tidy data”.
Es gibt mehrere Ansätze zur Umwandlung zwischen long und wide Data.
Für das Tidyverse konzentrieren wir uns hier auf library(tidyr) und hier auf die Befehle tidyr::pivot_longer() und tidyr::pivot_wider().
13.1 Grundidee
subj: Versuchsperson gender: ‘f’, ‘m’ t1: AV Zeitpunkt 1 t2: AV Zeitpunkt 2
# wide format
dd.w <- data.frame(
subj = c(1,2,3,4),
gender = c('f', 'f', 'm', 'm'),
v_t1 = c(4, 5, 7, 6),
v_t2 = c(6, 5, 9, 8)
)
getwd()## [1] "/Users/pzezula/ownCloud/lehre_ss_2022/unit/b_data_mangling"
readr::write_csv(dd, "long_wide_v.csv")
dd <- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/long_wide_v.csv")## Rows: 12 Columns: 7
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): gender
## dbl (6): subj, age, grp, v1, v2, v3
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# conversion to long format
require(tidyr)
df.l <- dd.w %>% tidyr::gather(key=time, value=v, v_t1, v_t2)
# or even shorter: key and value can be used as positional parameters too
df.l <- dd.w %>% tidyr::gather(time, v, v_t1, v_t2)
# we want to have all repeated measures for each person in consecutive lines
df.l %>% dplyr::arrange(subj, time) -> df.l
# data in wide format
dd.w## subj gender v_t1 v_t2
## 1 1 f 4 6
## 2 2 f 5 5
## 3 3 m 7 9
## 4 4 m 6 8
# same data in long format
df.l## subj gender time v
## 1 1 f v_t1 4
## 2 1 f v_t2 6
## 3 2 f v_t1 5
## 4 2 f v_t2 5
## 5 3 m v_t1 7
## 6 3 m v_t2 9
## 7 4 m v_t1 6
## 8 4 m v_t2 8
# ... we notice that variables subj and gender are kept and adequately repeated
# a dataframe with two repeated measures variables
dd.w <- data.frame(
subj = c(1,2,3,4),
gender = c('f', 'f', 'm', 'm'),
v_t1 = c(4, 5, 7, 6),
v_t2 = c(6, 5, 9, 8),
w_t1 = c(5, 6, 8, 7),
w_t2 = c(3, 4, 6, 5)
)
getwd()## [1] "/Users/pzezula/ownCloud/lehre_ss_2022/unit/b_data_mangling"
readr::write_csv(dd.w, "long_wide_vw.csv")
# can be read as
dd.w <- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/long_wide_vw.csv")## Rows: 4 Columns: 6
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): gender
## dbl (5): subj, v_t1, v_t2, w_t1, w_t2
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
13.2 Base R
13.2.1 Wide zu long mit Slicing-Techniken
require(tidyverse)
dd <- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/long_wide_v.csv")## Rows: 12 Columns: 7
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): gender
## dbl (6): subj, age, grp, v1, v2, v3
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# we create a dataframe of t1 data
dd.1 <- dd[, c('subj', 'gender', 'v1')]
# we create column y with outcome values
dd.1['y'] <- dd.1$v1
# ... and erase old column t1
dd.1$v1 <- NULL
# and add t1 info
dd.1['time'] <- 't1'
# the same with t2 data
dd.2 <- dd[, c('subj', 'gender', 'v2')]
# and add t2 info
dd.2['time'] <- 't2'
# we create column y with outcome values
dd.2['y'] <- dd.2$v2
# we delete column t2
dd.2$v2 <- NULL
# we now merge the two parts
dd.l <- rbind(dd.1, dd.2)
# wide
dd## # A tibble: 12 × 7
## subj gender age grp v1 v2 v3
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 f 17 1 9 16 8
## 2 2 f 33 2 67 73 66
## 3 3 f 47 3 86 87 91
## 4 4 f 10 1 64 68 67
## 5 5 f 21 2 40 46 44
## 6 6 f 30 3 26 34 28
## 7 7 m 51 1 64 66 64
## 8 8 m 13 2 61 66 64
## 9 9 m 17 3 67 67 67
## 10 10 m 25 1 38 36 35
## 11 11 m 33 2 22 25 21
## 12 12 m 27 3 81 86 81
# long
dd.l## # A tibble: 24 × 4
## subj gender y time
## <dbl> <chr> <dbl> <chr>
## 1 1 f 9 t1
## 2 2 f 67 t1
## 3 3 f 86 t1
## 4 4 f 64 t1
## 5 5 f 40 t1
## 6 6 f 26 t1
## 7 7 m 64 t1
## 8 8 m 61 t1
## 9 9 m 67 t1
## 10 10 m 38 t1
## # … with 14 more rows
# we delete temporary data
rm(dd.1, dd.2)
# {} todo two repeated measures variables, long_wide_vw.csv13.3 Tidyverse: tidyr::pivot_longer() und tidyr::pivot_wider()
require(tidyverse)
dd.w <- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/long_wide_vw.csv")## Rows: 4 Columns: 6
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): gender
## dbl (5): subj, v_t1, v_t2, w_t1, w_t2
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# we convert v values only
dd.w %>% tidyr::pivot_longer(
cols = starts_with("v_"),
names_to = "tv",
values_to = "vl"
) ## # A tibble: 8 × 6
## subj gender w_t1 w_t2 tv vl
## <dbl> <chr> <dbl> <dbl> <chr> <dbl>
## 1 1 f 5 3 v_t1 4
## 2 1 f 5 3 v_t2 6
## 3 2 f 6 4 v_t1 5
## 4 2 f 6 4 v_t2 5
## 5 3 m 8 6 v_t1 7
## 6 3 m 8 6 v_t2 9
## 7 4 m 7 5 v_t1 6
## 8 4 m 7 5 v_t2 8
# we convert v and w values
dd.w %>%
tidyr::pivot_longer(
cols = starts_with("v_"),
names_to = "tv",
values_to = "vl"
) %>%
tidyr::pivot_longer(
cols = starts_with("w_"),
names_to = "tw",
values_to = "wl"
) ## # A tibble: 16 × 6
## subj gender tv vl tw wl
## <dbl> <chr> <chr> <dbl> <chr> <dbl>
## 1 1 f v_t1 4 w_t1 5
## 2 1 f v_t1 4 w_t2 3
## 3 1 f v_t2 6 w_t1 5
## 4 1 f v_t2 6 w_t2 3
## 5 2 f v_t1 5 w_t1 6
## 6 2 f v_t1 5 w_t2 4
## 7 2 f v_t2 5 w_t1 6
## 8 2 f v_t2 5 w_t2 4
## 9 3 m v_t1 7 w_t1 8
## 10 3 m v_t1 7 w_t2 6
## 11 3 m v_t2 9 w_t1 8
## 12 3 m v_t2 9 w_t2 6
## 13 4 m v_t1 6 w_t1 7
## 14 4 m v_t1 6 w_t2 5
## 15 4 m v_t2 8 w_t1 7
## 16 4 m v_t2 8 w_t2 5
# ... every v is repeated for w values
# if v and w were the same value from different occasions and 1 or 2 specify time
dd.w %>%
tidyr::pivot_longer(
cols = v_t1:w_t2,
names_to = c("occasion", "time"),
names_sep = "_",
values_to = "score"
)## # A tibble: 16 × 5
## subj gender occasion time score
## <dbl> <chr> <chr> <chr> <dbl>
## 1 1 f v t1 4
## 2 1 f v t2 6
## 3 1 f w t1 5
## 4 1 f w t2 3
## 5 2 f v t1 5
## 6 2 f v t2 5
## 7 2 f w t1 6
## 8 2 f w t2 4
## 9 3 m v t1 7
## 10 3 m v t2 9
## 11 3 m w t1 8
## 12 3 m w t2 6
## 13 4 m v t1 6
## 14 4 m v t2 8
## 15 4 m w t1 7
## 16 4 m w t2 5
13.4 Referenzen
Vignette Pivoting
Beispiele und Erklärungen: Unit long wide
Beispiele und Erklärungen: Unit transformation base
Beispiele und Erklärungen: Unit transformation dplyr