--- /dev/null
+# Analyse births in Australia
+library(tidyverse)
+library(ggthemes)
+library(forecast)
+AU_births <- read_csv("data/BIRTHS_MONTH_OCCURRENCE_17102017230358770.csv")
+# Exclude 2015 as data seems incomplete
+AU_births <- filter(AU_births, !(TIME == 2015))
+# Remove subtotals in rows
+AU_births <- filter(AU_births, MONTH_OCCURENCE != "TOT")
+# Make it a number as it was meant to be
+AU_births$MONTH_OCCURENCE <- as.numeric(AU_births$MONTH_OCCURENCE)
+# Add median by month to df
+AU_births_medians <- AU_births %>%
+ group_by(Region, MONTH_OCCURENCE) %>%
+ summarise(median = median(Value))
+AU_births <- merge(AU_births, AU_births_medians)
+
+# Create a seasonal subseries plot
+ggplot(filter(AU_births, Region == "Australia"), aes(x= Time, y = Value)) +
+ geom_line() +
+ geom_line(aes(y = median), color = "red") +
+ facet_grid( ~ MONTH_OCCURENCE) +
+ labs(x = paste("\nFrom", min(AU_births$Time), "to",max(AU_births$Time)),
+ caption = "Data: Australian Bureau of Statistics",
+ y = "Births per month",
+ title = "Births in Australia",
+ subtitle = "Bottoms out in summer months; boomed from 2005/2007") +
+ theme_economist() +
+ theme(axis.text.x = element_blank())
+
+# Let's check the seasonality
+# Uses some base R graphics subtleties to right align the caption...
+AU_births_ts <- as.ts(arrange(filter(AU_births, Region == "Australia"), Time, MONTH_OCCURENCE)$Value)
+Acf(AU_births_ts, lag.max = 48, main = "")
+ title(main = "ACF plot of births in Australia by month")
+ title(sub = "Data: Australian Bureau of Statistics", adj = 1)
+
+# The acf peaks at multiples of 12, showing a yearly seasonality
+
+# Seasonality doesn't appear to increase over time
+#plot(AU_births_ts)
+#trend_AU_births <- ma(AU_births_ts, order = 12, centre = T)
+#plot(trend_AU_births)
+#detrend_AU_births <- AU_births_ts - trend_AU_births
+
+AU_births_decomposed <- decompose(ts(AU_births_ts, frequency = 12))
+years <- rep(c(min(AU_births$TIME):max(AU_births$TIME)), each = 12)
+
+# For ggplot, we prefer a long dataframe over a list of timeseries
+AU_births_decomposed_df <- as.tibble(cbind(observation = c(1:length(years)),
+ years,
+ observed = as.vector(AU_births_ts),
+ seasonal = as.vector(AU_births_decomposed$seasonal),
+ random = as.vector(AU_births_decomposed$random),
+ trend = as.vector(AU_births_decomposed_trend)))
+# Add a character vector only after the above cbind, to avoid everything becoming a chr or a factor
+AU_births_decomposed_df <- mutate(AU_births_decomposed_df,
+ seastrend = seasonal + trend,
+ month = rep(c("01","02","03","04","05","06","07","08","09","10","11","12"), times = 19))
+AU_births_decomposed_df <- gather(AU_births_decomposed_df, key = component, value = value, -observation, -years, - month)
+
+# Plot the various components of the observed births underneath each other
+ggplot(filter(AU_births_decomposed_df, component != "seastrend"), aes(x = as.factor(observation), y = as.numeric(value), group = 1)) +
+ geom_line() +
+ facet_grid(component ~ ., scales = "free_y") +
+ # Hackish way of showing only the year
+ scale_x_discrete(breaks = filter(AU_births_decomposed_df, month == "01")$observation,
+ labels = filter(AU_births_decomposed_df, month == "01")$years) +
+ theme_fivethirtyeight() +
+ theme(axis.text.x = element_text(angle = 45)) +
+ labs(title = "Births in Australia",
+ subtitle = "Seasonally decomposed",
+ caption = "Data: Australian Bureau of Statistics",
+ x = "",
+ y = "")
+
+# Let's look at everything in one plot instead
+ggplot(AU_births_decomposed_df, aes(x = yearmonth, y = as.numeric(value), group = 1)) +
+ geom_line(data = dplyr::filter(AU_births_decomposed_df, component =="trend"), color = "orange", size = 2) +
+ geom_line(data = dplyr::filter(AU_births_decomposed_df, component =="observed"), color = "grey50") +
+ geom_line(data = dplyr::filter(AU_births_decomposed_df, component =="seastrend"), color = "black") +
+ scale_x_discrete(breaks = filter(AU_births_decomposed_df, month == "01")$yearmonth,
+ labels = filter(AU_births_decomposed_df, month == "01")$years) +
+ theme_economist_white() +
+ theme(axis.text.x = element_text(angle = 45)) +
+ labs(title = "Births in Australia",
+ subtitle = "Seasonally decomposed",
+ caption = "Data: Australian Bureau of Statistics",
+ x = "",
+ y = "")
+
+# TODO Horizon plot