devtools::install_github("tidyverse/ggplot2") # needed for geom_sf
library(ggplot2)
library(viridis)
+library(ggthemes)
# Obtain the tax dataset if not available yet
if(!file.exists("data/taxstats2015individual06ataxablestatusstateterritorypostcode.csv"))
# Create a matrix of intersecting postal codes and SA3's
POA_SAs <- st_intersects(x=sa3, y=POA, sparse=FALSE)
-taxstats.POA$incomeperearningcapita[is.na(taxstats.POA$incomeperearningcapita)] <- 0
-
+taxstats.POA$`Total.Income.or.Loss..`[is.na(taxstats.POA$`Total.Income.or.Loss..`)] <- 0
+taxstats.POA$`Total.Income.or.Loss.no.`[is.na(taxstats.POA$`Total.Income.or.Loss.no.`)] <- 0
# Perform matrix multiplication to obtain the income metrix per SA3
# Total income will be incorrect, as the POAs intersect with multiple SA3s
-sa3$incomeperearningcapita <- as.vector(POA_SAs %*% as.matrix(taxstats.POA$incomeperearningcapita))
+sa3$TotalIncome <- as.vector(POA_SAs %*% as.matrix(taxstats.POA$`Total.Income.or.Loss..`))
+sa3$TotalIncomeEarners <- as.vector(POA_SAs %*% as.matrix(taxstats.POA$`Total.Income.or.Loss.no.`))
+sa3$incomeperearningcapita <- (sa3$TotalIncome / sa3$TotalIncomeEarners)/1000
+
+# As SA3s are still to narrow around cities compared to in the country,
+# let's simply look at Melbourne
-ggplot(sa3) +
+ggplot(dplyr::filter(sa3, data.table::`%like%`(GCC_NAME16, "Melbourne") )) +
geom_sf(aes(fill = incomeperearningcapita, color = incomeperearningcapita)) +
- scale_fill_viridis("incomeperearningcapita") +
- scale_color_viridis("incomeperearningcapita")
+ scale_fill_viridis(name = "") +
+ scale_color_viridis(name = "") +
+ coord_sf(datum = NA) + # Work around https://github.com/tidyverse/ggplot2/issues/2071 to remove gridlines
+ labs(title = "Melbourne \nincome distribution",
+ subtitle = "2014/15, in 1000s AUD",
+ caption = "\nSource: Australian Taxation Office") +
+ theme_economist() +
+ theme(legend.position = "bottom",
+ legend.text = element_text(angle = 45, hjust = 1, size = 8),
+ axis.text = element_blank(),
+ axis.ticks = element_blank())
+