1 # Analyse births in Australia
5 AU_births <- read_csv("data/BIRTHS_MONTH_OCCURRENCE_17102017230358770.csv")
6 # Exclude 2015 as data seems incomplete
7 AU_births <- filter(AU_births, !(TIME == 2015))
8 # Remove subtotals in rows
9 AU_births <- filter(AU_births, MONTH_OCCURENCE != "TOT")
10 # Make it a number as it was meant to be
11 AU_births$MONTH_OCCURENCE <- as.numeric(AU_births$MONTH_OCCURENCE)
12 # Add median by month to df
13 AU_births_medians <- AU_births %>%
14 group_by(Region, MONTH_OCCURENCE) %>%
15 summarise(median = median(Value))
16 AU_births <- merge(AU_births, AU_births_medians)
18 # Create a seasonal subseries plot
19 ggplot(filter(AU_births, Region == "Australia"), aes(x= Time, y = Value)) +
21 geom_line(aes(y = median), color = "red") +
22 facet_grid( ~ MONTH_OCCURENCE) +
23 labs(x = paste("\nFrom", min(AU_births$Time), "to",max(AU_births$Time)),
24 caption = "Data: Australian Bureau of Statistics",
25 y = "Births per month",
26 title = "Births in Australia",
27 subtitle = "Bottoms out in summer months; boomed from 2005/2007") +
29 theme(axis.text.x = element_blank())
31 # Let's check the seasonality
32 # Uses some base R graphics subtleties to right align the caption...
33 AU_births_ts <- as.ts(arrange(filter(AU_births, Region == "Australia"), Time, MONTH_OCCURENCE)$Value)
34 Acf(AU_births_ts, lag.max = 48, main = "")
35 title(main = "ACF plot of births in Australia by month")
36 title(sub = "Data: Australian Bureau of Statistics", adj = 1)
38 # The acf peaks at multiples of 12, showing a yearly seasonality
40 # Seasonality doesn't appear to increase over time
42 #trend_AU_births <- ma(AU_births_ts, order = 12, centre = T)
43 #plot(trend_AU_births)
44 #detrend_AU_births <- AU_births_ts - trend_AU_births
46 AU_births_decomposed <- decompose(ts(AU_births_ts, frequency = 12))
47 years <- rep(c(min(AU_births$TIME):max(AU_births$TIME)), each = 12)
49 # For ggplot, we prefer a long dataframe over a list of timeseries
50 AU_births_decomposed_df <- as.tibble(cbind(observation = c(1:length(years)),
52 observed = as.vector(AU_births_ts),
53 seasonal = as.vector(AU_births_decomposed$seasonal),
54 random = as.vector(AU_births_decomposed$random),
55 trend = as.vector(AU_births_decomposed_trend)))
56 # Add a character vector only after the above cbind, to avoid everything becoming a chr or a factor
57 AU_births_decomposed_df <- mutate(AU_births_decomposed_df,
58 seastrend = seasonal + trend,
59 month = rep(c("01","02","03","04","05","06","07","08","09","10","11","12"), times = 19))
60 AU_births_decomposed_df <- gather(AU_births_decomposed_df, key = component, value = value, -observation, -years, - month)
62 # Plot the various components of the observed births underneath each other
63 ggplot(filter(AU_births_decomposed_df, component != "seastrend"), aes(x = as.factor(observation), y = as.numeric(value), group = 1)) +
65 facet_grid(component ~ ., scales = "free_y") +
66 # Hackish way of showing only the year
67 scale_x_discrete(breaks = filter(AU_births_decomposed_df, month == "01")$observation,
68 labels = filter(AU_births_decomposed_df, month == "01")$years) +
69 theme_fivethirtyeight() +
70 theme(axis.text.x = element_text(angle = 45)) +
71 labs(title = "Births in Australia",
72 subtitle = "Seasonally decomposed",
73 caption = "Data: Australian Bureau of Statistics",
77 # Let's look at everything in one plot instead
78 ggplot(AU_births_decomposed_df, aes(x = yearmonth, y = as.numeric(value), group = 1)) +
79 geom_line(data = dplyr::filter(AU_births_decomposed_df, component =="trend"), color = "orange", size = 2) +
80 geom_line(data = dplyr::filter(AU_births_decomposed_df, component =="observed"), color = "grey50") +
81 geom_line(data = dplyr::filter(AU_births_decomposed_df, component =="seastrend"), color = "black") +
82 scale_x_discrete(breaks = filter(AU_births_decomposed_df, month == "01")$yearmonth,
83 labels = filter(AU_births_decomposed_df, month == "01")$years) +
84 theme_economist_white() +
85 theme(axis.text.x = element_text(angle = 45)) +
86 labs(title = "Births in Australia",
87 subtitle = "Seasonally decomposed",
88 caption = "Data: Australian Bureau of Statistics",