X-Git-Url: http://git.vanrenterghem.biz/R/project-au-taxstats.git/blobdiff_plain/0f950959cbac331187ccafaece1b834daf0a432a..83ec806f511257df27151b397a132b3d9d061c84:/AU-taxstats.R?ds=inline diff --git a/AU-taxstats.R b/AU-taxstats.R index d5e3c82..92577c6 100644 --- a/AU-taxstats.R +++ b/AU-taxstats.R @@ -4,6 +4,8 @@ install.packages("devtools") devtools::install_github("tidyverse/ggplot2") # needed for geom_sf library(ggplot2) library(viridis) +library(ggthemes) +library(animation) # for saveGIF # Obtain the tax dataset if not available yet if(!file.exists("data/taxstats2015individual06ataxablestatusstateterritorypostcode.csv")) @@ -30,3 +32,53 @@ ggplot(taxstats.POA) + geom_sf(aes(fill = incomeperearningcapita, color = incomeperearningcapita)) + scale_fill_viridis("incomeperearningcapita") + scale_color_viridis("incomeperearningcapita") + +# Let's try by SA3 +if(!file.exists("data/1270055001_sa3_2016_aust_shape.zip")) + download.file(url = "http://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&1270055001_sa3_2016_aust_shape.zip&1270.0.55.001&Data%20Cubes&43942523105745CBCA257FED0013DB07&0&July%202016&12.07.2016&Latest", destfile = "data/1270055001_sa3_2016_aust_shape.zip") + +if(!file.exists("data/SA3_2016_AUST.shp")) + unzip(zipfile = "data/1270055001_sa3_2016_aust_shape.zip", exdir = "data/") + +sa3 <- st_read(dsn = "data/", layer = "SA3_2016_AUST", stringsAsFactors = FALSE) + +# Create a matrix of intersecting postal codes and SA3's + +POA_SAs <- st_intersects(x=sa3, y=POA, sparse=FALSE) +taxstats.POA$`Total.Income.or.Loss..`[is.na(taxstats.POA$`Total.Income.or.Loss..`)] <- 0 +taxstats.POA$`Total.Income.or.Loss.no.`[is.na(taxstats.POA$`Total.Income.or.Loss.no.`)] <- 0 +# Perform matrix multiplication to obtain the income metrix per SA3 +# Total income will be incorrect, as the POAs intersect with multiple SA3s +sa3$TotalIncome <- as.vector(POA_SAs %*% as.matrix(taxstats.POA$`Total.Income.or.Loss..`)) +sa3$TotalIncomeEarners <- as.vector(POA_SAs %*% as.matrix(taxstats.POA$`Total.Income.or.Loss.no.`)) +sa3$incomeperearningcapita <- (sa3$TotalIncome / sa3$TotalIncomeEarners)/1000 + +# As SA3s are still to narrow around cities compared to in the country, +# let's simply look at Melbourne + +cities = c("Perth","Melbourne","Sydney","Adelaide","Brisbane") + +# Create a plot for each of these cities. This is wrapped in a function +# for use by saveGIF + +plots <- function() {lapply(cities, function(x){ + plot <- ggplot(dplyr::filter(sa3, data.table::`%like%`(GCC_NAME16, x) )) + + geom_sf(aes(fill = incomeperearningcapita, color = incomeperearningcapita)) + + scale_fill_viridis(name = "", + limits = c(min(sa3$incomeperearningcapita, na.rm = TRUE),max(sa3$incomeperearningcapita, na.rm = TRUE))) + + scale_color_viridis(name = "", + limits = c(min(sa3$incomeperearningcapita, na.rm = TRUE),max(sa3$incomeperearningcapita, na.rm = TRUE))) + + coord_sf(datum = NA) + # Work around https://github.com/tidyverse/ggplot2/issues/2071 to remove gridlines + labs(title = paste0(x," \nincome distribution"), + subtitle = "2014/15, in 1000s AUD", + caption = "\nSource: Australian Taxation Office") + + theme_economist() + + theme(legend.position = "bottom", + legend.text = element_text(angle = 45, hjust = 1, size = 8), + axis.text = element_blank(), + axis.ticks = element_blank()) + print(plot) +}) +} + +saveGIF(plots(),movie.name = "AUCitiesIncomeDistribution.gif", interval = 2, loop = 2)