Chapter 14 Datum und Zeit
Datums- und Zeitangaben, Zeitpunkte als Kombination aus beiden, Zeiträume.
14.1 Formate
Zeitpunkte und Zeiträume sind ein eigenes Datenformat in R bzw. package(lubridate)
.
Empfehlung für Strings zu Zeitangaben, werden so häufig in Datenbanken verwendet:
- internationalen Datumsangaben in der Form JJJJ-MM-TT “2022-04-21”, “1945-05-08”
- internationale Angabe Zeitpunkt: JJJJ-MM-TT HH:MM:SS “2022-04-21 10:15:00”, “2022-04-21 11:44:59”
14.2 Dataframe mit Zeit- und Datumsangaben
# for some subjects birth
<- data.frame(
dd.w subj = c(1,2,3, 4, 5, 6),
b_self_s = c("1955-04-01 07:14", "1980-01-14 03:22:10", "2000-01-01 18:45", "1965-02-18 06:26", "1989-04-01 12:21:11", "2003-08-16 19:10"),
b_mother_s = c("1933-09-28", "1953-11-06", "1976-02-28", "1939-12-15", "1966-01-12", "1974-07-15"),
b_child_s = c("1990-12-24 18", "2001-10-14 01:10", "2022-03-15 23:55", "2005-03-16 17", "2010-09-17 08:12", "2022-04-12 14:55"),
v = c(99, 117, 109, 102, 118, 108),
rt1_s = c("15:34:51.134", "17:39:42.789", "19:56:32.018", "10:16:14.102", "18:38:49.614", "19:06:12.018"),
rt2_s = c("15:49:11.764", "18:02:36.254", "20:21:14.912", "10:16:27.684", "18:39:04.835", "19:07:14.039") )
getwd()
## [1] "/Users/pzezula/ownCloud/lehre_ss_2022/unit/b_data_mangling"
::write_csv(dd.w, "dt-1.csv")
readr# can be read as
<- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/dt-1.csv") dd.w
## Rows: 6 Columns: 7
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): subj, v
## dttm (2): b_self_s, b_child_s
## date (1): b_mother_s
## time (2): rt1_s, rt2_s
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# ... but automatic conversion "eats" the milliseconds
# we better go with RDS-Format
write_rds(dd.w, "dt-1.rds")
# can be read via
<- readRDS("dt-1.rds") # locally
dd <- readRDS(gzcon(url("http://md.psych.bio.uni-goettingen.de/mv/data/virt/dt-1.rds"))) # after upload dd
14.3 Base R
The later date is second in difftime()
require(tidyverse)
options(digits.secs=3)
#dd <- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/dt-1.csv")
<- readRDS(gzcon(url("http://md.psych.bio.uni-goettingen.de/mv/data/virt/dt-1.rds")))
dd
$b_self <- strptime(dd$b_self_s, format = "%Y-%m-%d %H:%M:%S")
dd$b_mother <- strptime(dd$b_mother_s, format = "%Y-%m-%d")
dd$b_child <- strptime(dd$b_child_s, format = "%Y-%m-%d %H:%M:%S")
dd# we can use Sys.time() to calculate Intervals with respect to current datetime, f. e. to calculate current age
$a_self <- difftime(Sys.time(), dd$b_self)
dd$a_self # default are days dd
## Time differences in days
## [1] NA 15494.36 NA NA 12129.03 NA
# expressed in years, easyer to use lubridate
::time_length(difftime(Sys.time(), dd$b_self), "years") lubridate
## [1] NA 42.42126 NA NA 33.20748 NA
# age difference to mother and child
$i_ms <- difftime(dd$b_self, dd$b_mother)
dd$i_sc <- difftime(dd$b_child, dd$b_self)
dd$i_mc <- difftime(dd$b_child, dd$b_mother)
dd
# we can use these intervals for calculations or comparisons
cor(as.numeric(dd$i_ms), dd$v)
## [1] NA
cor(as.numeric(dd$i_sc), dd$v)
## [1] NA
cor(as.numeric(dd$i_mc), dd$v)
## [1] NA
# some idea to have reaction times to the ms
$rt1 <- strptime(dd$rt1_s, format = "%H:%M:%OS") # today is set as date
dd$rt2 <- strptime(dd$rt2_s, format = "%H:%M:%OS")
dd#strptime(dd$rt1_s, format = "%H:%M:%OS") # uppercase o in %OS
$rt <- difftime(strptime(dd$rt2_s, format = "%H:%M:%OS"), strptime(dd$rt1_s, format = "%H:%M:%OS")) dd
14.4 Tidyverse: library(lubridate)
require(tidyverse)
<- readr::read_csv("https://md.psych.bio.uni-goettingen.de/mv/data/virt/dt-1.csv") dd
## Rows: 6 Columns: 7
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): subj, v
## dttm (2): b_self_s, b_child_s
## date (1): b_mother_s
## time (2): rt1_s, rt2_s
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
$b_self <- lubridate::parse_date_time(dd$b_self_s, c("ymd", "ymd H:M", "ymd H:M:S"))
dd$b_mother <- lubridate::parse_date_time(dd$b_mother_s, c("ymd", "ymd H:M", "ymd H:M:S"))
dd$b_child <- lubridate::parse_date_time(dd$b_child_s, c("ymd", "ymd H", "ymd H:M", "ymd H:M:S"))
dd# we can use now() to calculate Intervals with respect to current datetime, f. e. to calculate current age
$a_self <- lubridate::interval(dd$b_self, lubridate::now())
dd# expressed in years
$a_self %>% as.numeric('years') dd
## [1] 67.20920 42.42114 22.45498 57.32291 33.20725 18.83276
# age difference to mother and child
$i_ms <- lubridate::interval(dd$b_mother, dd$b_self)
dd$i_sc <- lubridate::interval(dd$b_self, dd$b_child)
dd$i_mc <- lubridate::interval(dd$b_mother, dd$b_child)
dd
# we can use these intervals for calculations or comparisons
cor(dd$i_ms, dd$v)
## [1] 0.2261904
cor(dd$i_sc, dd$v)
## [1] -0.767554
cor(dd$i_mc, dd$v)
## [1] -0.7745263
# some idea to have reaction times to the ms
options(digits.secs=3)
%>%
dd mutate(rt1 = lubridate::hms(rt1_s)) %>%
mutate(rt2 = lubridate::hms(rt2_s)) %>%
mutate(rt = rt2 - rt1) -> dd