Chapter 13 Formatumwandlung long_wide

Zur Behandlung von Messwiederholungsdaten. Auf dem Weg zu “tidy data”.

Es gibt mehrere Ansätze zur Umwandlung zwischen long und wide Data. Für das Tidyverse konzentrieren wir uns hier auf library(tidyr) und hier auf die Befehle tidyr::pivot_longer() und tidyr::pivot_wider().

13.1 Grundidee

subj: Versuchsperson gender: ‘f’, ‘m’ t1: AV Zeitpunkt 1 t2: AV Zeitpunkt 2

# wide format
dd.w <- data.frame(
  subj   = c(1,2,3,4),
  gender = c('f', 'f', 'm', 'm'),
  v_t1     = c(4, 5, 7, 6),
  v_t2     = c(6, 5, 9, 8)
  )
getwd()
## [1] "/Users/pzezula/ownCloud/lehre_ss_2022/unit/b_data_mangling"
readr::write_csv(dd, "long_wide_v.csv")
dd <- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/long_wide_v.csv")
## Rows: 12 Columns: 7
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): gender
## dbl (6): subj, age, grp, v1, v2, v3
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# conversion to long format
require(tidyr)
df.l <- dd.w %>% tidyr::gather(key=time, value=v, v_t1, v_t2)
# or even shorter: key and value can be used as positional parameters too
df.l <- dd.w %>% tidyr::gather(time, v, v_t1, v_t2)
# we want to have all repeated measures for each person in consecutive lines
df.l %>% dplyr::arrange(subj, time) -> df.l

# data in wide format
dd.w
##   subj gender v_t1 v_t2
## 1    1      f    4    6
## 2    2      f    5    5
## 3    3      m    7    9
## 4    4      m    6    8
# same data in long format
df.l
##   subj gender time v
## 1    1      f v_t1 4
## 2    1      f v_t2 6
## 3    2      f v_t1 5
## 4    2      f v_t2 5
## 5    3      m v_t1 7
## 6    3      m v_t2 9
## 7    4      m v_t1 6
## 8    4      m v_t2 8
# ... we notice that variables subj and gender are kept and adequately repeated

# a dataframe with two repeated measures variables
dd.w <- data.frame(
  subj   = c(1,2,3,4),
  gender = c('f', 'f', 'm', 'm'),
  v_t1     = c(4, 5, 7, 6),
  v_t2     = c(6, 5, 9, 8),
  w_t1     = c(5, 6, 8, 7),
  w_t2     = c(3, 4, 6, 5)
  )
getwd()
## [1] "/Users/pzezula/ownCloud/lehre_ss_2022/unit/b_data_mangling"
readr::write_csv(dd.w, "long_wide_vw.csv")
# can be read as
dd.w <- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/long_wide_vw.csv")
## Rows: 4 Columns: 6
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): gender
## dbl (5): subj, v_t1, v_t2, w_t1, w_t2
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

13.2 Base R

13.2.1 Wide zu long mit Slicing-Techniken

require(tidyverse)
dd <- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/long_wide_v.csv")
## Rows: 12 Columns: 7
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): gender
## dbl (6): subj, age, grp, v1, v2, v3
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# we create a dataframe of t1 data
dd.1 <- dd[, c('subj', 'gender', 'v1')]
# we create column y with outcome values
dd.1['y'] <- dd.1$v1
# ... and erase old column t1
dd.1$v1 <- NULL
# and add t1 info
dd.1['time'] <- 't1'

# the same with t2 data
dd.2 <- dd[, c('subj', 'gender', 'v2')]
# and add t2 info
dd.2['time'] <- 't2'
# we create column y with outcome values
dd.2['y'] <- dd.2$v2
# we delete column t2
dd.2$v2 <- NULL

# we now merge the two parts
dd.l <- rbind(dd.1, dd.2)

# wide
dd
## # A tibble: 12 × 7
##     subj gender   age   grp    v1    v2    v3
##    <dbl> <chr>  <dbl> <dbl> <dbl> <dbl> <dbl>
##  1     1 f         17     1     9    16     8
##  2     2 f         33     2    67    73    66
##  3     3 f         47     3    86    87    91
##  4     4 f         10     1    64    68    67
##  5     5 f         21     2    40    46    44
##  6     6 f         30     3    26    34    28
##  7     7 m         51     1    64    66    64
##  8     8 m         13     2    61    66    64
##  9     9 m         17     3    67    67    67
## 10    10 m         25     1    38    36    35
## 11    11 m         33     2    22    25    21
## 12    12 m         27     3    81    86    81
# long
dd.l
## # A tibble: 24 × 4
##     subj gender     y time 
##    <dbl> <chr>  <dbl> <chr>
##  1     1 f          9 t1   
##  2     2 f         67 t1   
##  3     3 f         86 t1   
##  4     4 f         64 t1   
##  5     5 f         40 t1   
##  6     6 f         26 t1   
##  7     7 m         64 t1   
##  8     8 m         61 t1   
##  9     9 m         67 t1   
## 10    10 m         38 t1   
## # … with 14 more rows
# we delete temporary data
rm(dd.1, dd.2)

# {} todo two repeated measures variables, long_wide_vw.csv

13.3 Tidyverse: tidyr::pivot_longer() und tidyr::pivot_wider()

require(tidyverse)
dd.w <- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/long_wide_vw.csv")
## Rows: 4 Columns: 6
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): gender
## dbl (5): subj, v_t1, v_t2, w_t1, w_t2
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# we convert v values only
dd.w %>% tidyr::pivot_longer(
  cols = starts_with("v_"), 
    names_to = "tv", 
    values_to = "vl"
  ) 
## # A tibble: 8 × 6
##    subj gender  w_t1  w_t2 tv       vl
##   <dbl> <chr>  <dbl> <dbl> <chr> <dbl>
## 1     1 f          5     3 v_t1      4
## 2     1 f          5     3 v_t2      6
## 3     2 f          6     4 v_t1      5
## 4     2 f          6     4 v_t2      5
## 5     3 m          8     6 v_t1      7
## 6     3 m          8     6 v_t2      9
## 7     4 m          7     5 v_t1      6
## 8     4 m          7     5 v_t2      8
# we convert v and w values 
dd.w %>% 
tidyr::pivot_longer(
  cols = starts_with("v_"), 
    names_to = "tv", 
    values_to = "vl"
  )  %>%
tidyr::pivot_longer(
  cols = starts_with("w_"), 
    names_to = "tw", 
    values_to = "wl"
  ) 
## # A tibble: 16 × 6
##     subj gender tv       vl tw       wl
##    <dbl> <chr>  <chr> <dbl> <chr> <dbl>
##  1     1 f      v_t1      4 w_t1      5
##  2     1 f      v_t1      4 w_t2      3
##  3     1 f      v_t2      6 w_t1      5
##  4     1 f      v_t2      6 w_t2      3
##  5     2 f      v_t1      5 w_t1      6
##  6     2 f      v_t1      5 w_t2      4
##  7     2 f      v_t2      5 w_t1      6
##  8     2 f      v_t2      5 w_t2      4
##  9     3 m      v_t1      7 w_t1      8
## 10     3 m      v_t1      7 w_t2      6
## 11     3 m      v_t2      9 w_t1      8
## 12     3 m      v_t2      9 w_t2      6
## 13     4 m      v_t1      6 w_t1      7
## 14     4 m      v_t1      6 w_t2      5
## 15     4 m      v_t2      8 w_t1      7
## 16     4 m      v_t2      8 w_t2      5
# ... every v is repeated for w values

# if v and w were the same value from different occasions and 1 or 2 specify time
dd.w %>% 
  tidyr::pivot_longer(
    cols = v_t1:w_t2,
    names_to = c("occasion", "time"),
    names_sep = "_",
    values_to = "score"
  )
## # A tibble: 16 × 5
##     subj gender occasion time  score
##    <dbl> <chr>  <chr>    <chr> <dbl>
##  1     1 f      v        t1        4
##  2     1 f      v        t2        6
##  3     1 f      w        t1        5
##  4     1 f      w        t2        3
##  5     2 f      v        t1        5
##  6     2 f      v        t2        5
##  7     2 f      w        t1        6
##  8     2 f      w        t2        4
##  9     3 m      v        t1        7
## 10     3 m      v        t2        9
## 11     3 m      w        t1        8
## 12     3 m      w        t2        6
## 13     4 m      v        t1        6
## 14     4 m      v        t2        8
## 15     4 m      w        t1        7
## 16     4 m      w        t2        5

13.4 Referenzen