Chapter 14 Datum und Zeit

Datums- und Zeitangaben, Zeitpunkte als Kombination aus beiden, Zeiträume.

14.1 Formate

Zeitpunkte und Zeiträume sind ein eigenes Datenformat in R bzw. package(lubridate).

Empfehlung für Strings zu Zeitangaben, werden so häufig in Datenbanken verwendet:

  • internationalen Datumsangaben in der Form JJJJ-MM-TT “2022-04-21”, “1945-05-08”
  • internationale Angabe Zeitpunkt: JJJJ-MM-TT HH:MM:SS “2022-04-21 10:15:00”, “2022-04-21 11:44:59”

14.1.1 Datum

14.1.2 Zeit

14.1.3 Zeitpunkt

datetime - aus Datum und Zeit zusammengesetzt timestamp

14.1.4 jetzt

# base R
Sys.time()
## [1] "2022-06-16 13:06:10 CEST"
# lubridate
lubridate::now()
## [1] "2022-06-16 13:06:10 CEST"

14.1.5 Dauer Zeitraum

Period, lubridate::interval()

14.2 Dataframe mit Zeit- und Datumsangaben

# for some subjects birth
dd.w <- data.frame(
  subj       = c(1,2,3, 4, 5, 6),
  b_self_s   = c("1955-04-01 07:14", "1980-01-14 03:22:10", "2000-01-01 18:45", "1965-02-18 06:26", "1989-04-01 12:21:11", "2003-08-16 19:10"),
  b_mother_s = c("1933-09-28",       "1953-11-06",          "1976-02-28",       "1939-12-15",       "1966-01-12",          "1974-07-15"), 
  b_child_s  = c("1990-12-24 18",    "2001-10-14 01:10",    "2022-03-15 23:55", "2005-03-16 17",    "2010-09-17 08:12",    "2022-04-12 14:55"),
  v          = c(99, 117, 109,          102, 118, 108),
  rt1_s = c("15:34:51.134", "17:39:42.789", "19:56:32.018", "10:16:14.102", "18:38:49.614", "19:06:12.018"),
  rt2_s = c("15:49:11.764", "18:02:36.254", "20:21:14.912", "10:16:27.684", "18:39:04.835", "19:07:14.039")  )

getwd()
## [1] "/Users/pzezula/ownCloud/lehre_ss_2022/unit/b_data_mangling"
readr::write_csv(dd.w, "dt-1.csv")
# can be read as
dd.w <- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/dt-1.csv")
## Rows: 6 Columns: 7
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (2): subj, v
## dttm (2): b_self_s, b_child_s
## date (1): b_mother_s
## time (2): rt1_s, rt2_s
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# ... but automatic conversion "eats" the milliseconds

# we better go with RDS-Format
write_rds(dd.w, "dt-1.rds")
# can be read via
dd <- readRDS("dt-1.rds")  # locally
dd <- readRDS(gzcon(url("http://md.psych.bio.uni-goettingen.de/mv/data/virt/dt-1.rds")))  # after upload

14.3 Base R

The later date is second in difftime()

require(tidyverse)
options(digits.secs=3)
#dd <- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/dt-1.csv")
dd <- readRDS(gzcon(url("http://md.psych.bio.uni-goettingen.de/mv/data/virt/dt-1.rds")))

dd$b_self   <- strptime(dd$b_self_s,   format = "%Y-%m-%d %H:%M:%S")
dd$b_mother <- strptime(dd$b_mother_s, format = "%Y-%m-%d")
dd$b_child  <- strptime(dd$b_child_s,  format = "%Y-%m-%d %H:%M:%S")
# we can use Sys.time() to calculate Intervals with respect to current datetime, f. e. to calculate current age

dd$a_self <- difftime(Sys.time(), dd$b_self)
dd$a_self  # default are days
## Time differences in days
## [1]       NA 15494.36       NA       NA 12129.03       NA
# expressed in years, easyer to use lubridate
lubridate::time_length(difftime(Sys.time(), dd$b_self), "years")
## [1]       NA 42.42126       NA       NA 33.20748       NA
# age difference to mother and child
dd$i_ms <- difftime(dd$b_self, dd$b_mother)
dd$i_sc <- difftime(dd$b_child, dd$b_self)
dd$i_mc <- difftime(dd$b_child, dd$b_mother)

# we can use these intervals for calculations or comparisons
cor(as.numeric(dd$i_ms), dd$v)
## [1] NA
cor(as.numeric(dd$i_sc), dd$v)
## [1] NA
cor(as.numeric(dd$i_mc), dd$v)
## [1] NA
# some idea to have reaction times to the ms
dd$rt1 <- strptime(dd$rt1_s,  format = "%H:%M:%OS")   # today is set as date
dd$rt2 <- strptime(dd$rt2_s,  format = "%H:%M:%OS")
#strptime(dd$rt1_s,  format = "%H:%M:%OS")   # uppercase o in %OS
dd$rt <- difftime(strptime(dd$rt2_s,  format = "%H:%M:%OS"), strptime(dd$rt1_s, format = "%H:%M:%OS")) 

14.4 Tidyverse: library(lubridate)

require(tidyverse)
dd <- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/dt-1.csv") 
## Rows: 6 Columns: 7
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (2): subj, v
## dttm (2): b_self_s, b_child_s
## date (1): b_mother_s
## time (2): rt1_s, rt2_s
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dd$b_self   <- lubridate::parse_date_time(dd$b_self_s, c("ymd", "ymd H:M", "ymd H:M:S"))
dd$b_mother <- lubridate::parse_date_time(dd$b_mother_s, c("ymd", "ymd H:M", "ymd H:M:S"))
dd$b_child  <- lubridate::parse_date_time(dd$b_child_s, c("ymd", "ymd H", "ymd H:M", "ymd H:M:S"))
# we can use now() to calculate Intervals with respect to current datetime, f. e. to calculate current age
dd$a_self <- lubridate::interval(dd$b_self, lubridate::now())
# expressed in years
dd$a_self %>% as.numeric('years')
## [1] 67.20920 42.42114 22.45498 57.32291 33.20725 18.83276
# age difference to mother and child
dd$i_ms <- lubridate::interval(dd$b_mother, dd$b_self)
dd$i_sc <- lubridate::interval(dd$b_self, dd$b_child)
dd$i_mc <- lubridate::interval(dd$b_mother, dd$b_child)

# we can use these intervals for calculations or comparisons
cor(dd$i_ms, dd$v)
## [1] 0.2261904
cor(dd$i_sc, dd$v)
## [1] -0.767554
cor(dd$i_mc, dd$v)
## [1] -0.7745263
# some idea to have reaction times to the ms
options(digits.secs=3)
dd %>%
  mutate(rt1 = lubridate::hms(rt1_s)) %>%
  mutate(rt2 = lubridate::hms(rt2_s)) %>%
  mutate(rt = rt2 - rt1)  -> dd

14.6 Screencast(s)

{} todo