Some Notes about Time Series

Author

EMW

Published

June 12, 2024

#load packages
library(tidyverse)
library(lubridate)
#load data
power <- read.csv("../data/power.csv")

Date Formats are IMPORTANT

# Time Series when Date is still a "chr" value
ggplot(power, aes(x=Date, y=power_kW)) + 
  geom_line() + 
  ggtitle("Date not in date format (still chr)")
# Transform Date to Date Format
power <- power %>% 
  mutate(Date = mdy_hm(Date))

#Recreate the Time Series
ggplot(power, aes(x=Date, y=power_kW)) + 
  geom_line() + 
  ggtitle("Date in Date Format")

Should we summarize the data? How?

#No summary
power %>% 
  ggplot(aes(x=Date, y=power_kW)) + 
  geom_line() + 
  ggtitle("By minute: no summarizing")
power %>% 
  mutate(Date_round = floor_date(Date, unit="day")) %>% 
  ggplot(aes(x=Date_round, y=power_kW)) +
  geom_point() + 
  ggtitle("By date: all minute data kept (points) :(")
power %>% 
  mutate(Date_round = floor_date(Date, unit="day")) %>% 
  ggplot(aes(x=Date_round, y=power_kW)) +
  geom_line() + 
  ggtitle("By date: all minute data kept (lines) :(")

Over what time period should we summarize?

# Keep in Minutes
power %>% 
  ggplot(aes(x=Date, y=power_kW)) + 
  geom_line() + 
  ggtitle("By minute")
# Sum by Hour
power %>% 
  mutate(Year = year(Date), Month = month(Date), Day = day(Date), Hour=hour(Date)) %>% 
  group_by(Year, Month, Day, Hour) %>% 
  summarize(Total_power_kw = sum(power_kW)) %>% 
  mutate(Date_round = make_datetime(year=Year, month=Month, day=Day, hour=Hour)) %>% 
  ggplot(aes(x=Date_round, y=Total_power_kw)) +
  geom_line() + 
  ggtitle("By hour: summarize by adding")
# Sum by Day
power %>% 
  mutate(Year = year(Date), Month = month(Date), Day = day(Date)) %>% 
  group_by(Year, Month, Day) %>% 
  summarize(Total_power_kw = sum(power_kW)) %>% 
  mutate(Date_round = make_date(year=Year, month=Month, day =Day)) %>% 
  ggplot(aes(x=Date_round, y=Total_power_kw)) +
  geom_line() + 
  ggtitle("By date: summarize by adding")