#load packages
library(tidyverse)
library(lubridate)
Some Notes about Time Series
#load data
<- read.csv("../data/power.csv") power
Date Formats are IMPORTANT
# Time Series when Date is still a "chr" value
ggplot(power, aes(x=Date, y=power_kW)) +
geom_line() +
ggtitle("Date not in date format (still chr)")
# Transform Date to Date Format
<- power %>%
power mutate(Date = mdy_hm(Date))
#Recreate the Time Series
ggplot(power, aes(x=Date, y=power_kW)) +
geom_line() +
ggtitle("Date in Date Format")
Should we summarize the data? How?
#No summary
%>%
power ggplot(aes(x=Date, y=power_kW)) +
geom_line() +
ggtitle("By minute: no summarizing")
%>%
power mutate(Date_round = floor_date(Date, unit="day")) %>%
ggplot(aes(x=Date_round, y=power_kW)) +
geom_point() +
ggtitle("By date: all minute data kept (points) :(")
%>%
power mutate(Date_round = floor_date(Date, unit="day")) %>%
ggplot(aes(x=Date_round, y=power_kW)) +
geom_line() +
ggtitle("By date: all minute data kept (lines) :(")
Over what time period should we summarize?
# Keep in Minutes
%>%
power ggplot(aes(x=Date, y=power_kW)) +
geom_line() +
ggtitle("By minute")
# Sum by Hour
%>%
power mutate(Year = year(Date), Month = month(Date), Day = day(Date), Hour=hour(Date)) %>%
group_by(Year, Month, Day, Hour) %>%
summarize(Total_power_kw = sum(power_kW)) %>%
mutate(Date_round = make_datetime(year=Year, month=Month, day=Day, hour=Hour)) %>%
ggplot(aes(x=Date_round, y=Total_power_kw)) +
geom_line() +
ggtitle("By hour: summarize by adding")
# Sum by Day
%>%
power mutate(Year = year(Date), Month = month(Date), Day = day(Date)) %>%
group_by(Year, Month, Day) %>%
summarize(Total_power_kw = sum(power_kW)) %>%
mutate(Date_round = make_date(year=Year, month=Month, day =Day)) %>%
ggplot(aes(x=Date_round, y=Total_power_kw)) +
geom_line() +
ggtitle("By date: summarize by adding")