--- title: "Health stats" output: html_document: toc: yes pdf_document: default html_notebook: code_folding: hide toc: yes --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = FALSE, warning = FALSE) ``` # Summary This documents provides an overview of health data recorded by *Gadgetbridge* using a *Pebble* smartwatch. # Preparation Put the *GadgetBridge* database in the current folder, then knit the current `.Rmd` file. You should also set you **timezone** in the corresponding variable. * `PEBBLE_HEALTH_ACTIVITY_SAMPLE` is a simple table containing steps and timestamps. * `PEBBLE_HEALTH_ACTIVITY_OVERLAY` is more complex. It tracks activities (sleep, deep sleep, nap…) with a start and an end date. Note that date may overlap: you can be both in sleep and deep sleep. I have decided that any event happening after 8pm is registered for the next day. For instance: * if you go to bed at 9pm on Tuesday and wake up at 7am on Wednesday, the data will return 10 hours of sleep on Wednesday * if you go to bed at 7pm on Tuesday, a part of your sleep will be added to Tuesday. The cutoff won't exactly be 8pm, it will depend on the duration of the first sleep session recorded by you watch. ```{r include=FALSE} # Load data ------------------------------------------------------------------ library(DBI) library(RSQLite) library(tidyverse) library(lubridate) library(scales) con <- DBI::dbConnect(RSQLite::SQLite(), dbname = "gadgetbridge") steps_data <- dbReadTable(con, "PEBBLE_HEALTH_ACTIVITY_SAMPLE") sleep_data <- dbReadTable(con, "PEBBLE_HEALTH_ACTIVITY_OVERLAY") dbDisconnect(con) #str(steps_data) # Transform data ------------------------------------------------------------- ## Interprate timestamp timezone = "Europe/Paris" steps_data$datetime <- as_datetime(steps_data$TIMESTAMP) sleep_data$datetime_from <- with_tz(as_datetime(sleep_data$TIMESTAMP_FROM), timezone) sleep_data$datetime_to <- with_tz(as_datetime(sleep_data$TIMESTAMP_TO), timezone) ## Calculate activity duration ## Data recorded after 8pm is attached to the next day sleep_data$day <- date(sleep_data$datetime_from + hours(4)) sleep_data$duration <- int_length( sleep_data$datetime_from %--% sleep_data$datetime_to ) / period_to_seconds(minutes(1)) sleep_data$bed_time <- case_when( sleep_data$RAW_KIND == 1 ~ sleep_data$datetime_from ) sleep_data$wakeup_time <- case_when( sleep_data$RAW_KIND == 1 ~ sleep_data$datetime_to ) ## Convert RAW_KIND to the corresponding activity and summarize values sleep_data <- sleep_data %>% spread(RAW_KIND,duration,sep="_") %>% group_by(day) %>% summarise( sleep = sum(RAW_KIND_1, na.rm = TRUE), deep_sleep = sum(RAW_KIND_2, na.rm = TRUE), nap = sum(RAW_KIND_3, na.rm = TRUE), deep_nap = sum(RAW_KIND_4, na.rm = TRUE), walk = sum(RAW_KIND_5, na.rm = TRUE), run = sum(RAW_KIND_6, na.rm = TRUE), # Really not sure about this one bed_time = min(bed_time, na.rm = TRUE), wakeup_time = max(wakeup_time, na.rm = TRUE) ) ``` # Visualisation ## Steps ### Distribution of steps ```{r} data <- steps_data %>% mutate(date = date(datetime)) %>% select(date,STEPS) %>% group_by(date) %>% summarise(steps = sum(STEPS)) ggplot(data, aes(steps)) + geom_histogram(binwidth = 500) + theme_minimal() + labs(x="Steps", y="Number of occurences") ``` ### Distribution of steps per day of week ```{r} data <- steps_data %>% mutate( date = date(datetime), wday = wday( datetime, label = TRUE, week_start = getOption("lubridate.week.start", 1) ) ) %>% select(date, wday, STEPS) %>% group_by(date, wday) %>% summarise(steps = sum(STEPS)) ggplot(data, aes(x=wday,y=steps)) + geom_boxplot() + theme_minimal() + labs(x="Day of week", y="Number of steps") ``` ### Number of steps per month ```{r} data <- steps_data %>% mutate(month = floor_date(datetime, unit = "month")) %>% select(month,STEPS) %>% group_by(month) %>% summarise(steps = sum(STEPS)) ggplot(data, aes(x=month, y=steps)) + geom_col() + scale_x_datetime(labels = date_format("%Y-%m")) + scale_y_continuous( breaks = seq(0,500000,50000), labels=function(x) format(x, big.mark = " ") ) + geom_smooth(method = lm) + theme_minimal() + labs(x="Month", y="Number of steps") ``` ### Average number of steps per hour of the day, year after year ```{r} data <- steps_data %>% mutate( date = date(datetime), time = datetime-floor_date(datetime, unit="day"), year = year(datetime) ) %>% group_by(date) %>% mutate(cumsteps = cumsum(STEPS)) %>% select(date, time, year, cumsteps) %>% ungroup() %>% group_by(time, year) %>% summarise(min = min(cumsteps), max = max(cumsteps), average = mean(cumsteps)) ggplot(data) + geom_step(aes(x=time, y=average)) + theme_minimal() + labs(title = "Average number of steps per hour of the day", x="Hour", y="Number of steps") + scale_x_continuous( breaks = seq( 0, period_to_seconds(hours(24)), period_to_seconds(hours(1)) ), labels = seq(0,24,1) ) + facet_wrap(vars(year)) ``` ### Active time per week ```{r} data <- sleep_data %>% mutate( wday = wday( day, label = TRUE, week_start = getOption("lubridate.week.start", 1) ) ) %>% select(day, wday, walk) %>% group_by(day, wday) %>% summarise(walk_time = sum(walk)/60) ggplot(data, aes(x=wday,y=walk_time)) + geom_boxplot() + theme_minimal() + labs(x="Day of week", y="Hours active") ``` ## Sleep ### Distribution of sleep duration ```{r} data <- sleep_data %>% group_by(day) %>% summarise(sleep_duration = sum(sleep)/60) ggplot(data) + geom_histogram(aes(sleep_duration), bins = 50) + scale_x_continuous(breaks = seq(0,12,1)) + theme_minimal() + labs(x="Sleep duration (hours)", y="Number of occurences") ``` ### Distribution of deep sleep duration ```{r} data <- sleep_data %>% group_by(day) %>% summarise(deep_sleep_duration = sum(deep_sleep)/60) ggplot(data) + geom_histogram(aes(deep_sleep_duration),bins = 50) + scale_x_continuous(breaks = seq(0,12,1)) + theme_minimal() + labs(x="Deep sleep duration (hours)", y="Number of occurences") ``` ### Distribution of sleep duration per day of week, year after year ```{r} data <- sleep_data %>% mutate( wday = wday( day, label = TRUE, week_start = getOption("lubridate.week.start", 1) ), year = year(day) ) %>% select(year, day, wday, sleep) %>% group_by(year, day, wday) %>% summarise(sleep_duration = sum(sleep)/60) ggplot(data, aes(x=wday,y=sleep_duration)) + geom_boxplot() + theme_minimal() + labs(x="Day of week", y="Sleep duration") + facet_grid(rows = vars(year), ) ``` ### Distribution of nap duration ```{r} data <- sleep_data %>% filter(nap > 0) %>% group_by(day) %>% summarise(nap_time = sum(nap)) ggplot(data) + geom_histogram(aes(nap_time), bins = 10) + scale_x_continuous(breaks = seq(0,240,15)) + theme_minimal() + labs(x="Nap duration (minutes)", y="Number of occurences") ``` ### Time of bed and waking up by year ```{r} data <- sleep_data %>% mutate( year = year(day), month = floor_date(day, unit = "month"), bed_time_hms = hms::as.hms( period_to_seconds( hours(hour(bed_time)) + minutes(minute(bed_time)) ) ), wakeup_time_hms = hms::as.hms( period_to_seconds( hours(hour(wakeup_time))+minutes(minute(wakeup_time)) ) ) ) %>% drop_na(bed_time_hms) %>% drop_na(wakeup_time_hms) ggplot(data) + geom_histogram(aes(bed_time_hms), fill="orange", alpha=0.5, bins=30) + geom_histogram(aes(wakeup_time_hms), fill="blue", alpha=0.5, bins=30) + scale_x_continuous( breaks = seq( 0, period_to_seconds(hours(24)), period_to_seconds(hours(1)) ), labels = seq(0,24,1) ) + theme_minimal() + labs(x="Bed time and wakeup time", y="Number of occurences") + facet_grid(rows = vars(year),scales="free_y") ```