From 0f950959cbac331187ccafaece1b834daf0a432a Mon Sep 17 00:00:00 2001 From: Frederik Vanrenterghem Date: Thu, 2 Nov 2017 20:34:27 +0800 Subject: [PATCH 1/1] Plot by postal codes. --- AU-taxstats.R | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 AU-taxstats.R diff --git a/AU-taxstats.R b/AU-taxstats.R new file mode 100644 index 0000000..d5e3c82 --- /dev/null +++ b/AU-taxstats.R @@ -0,0 +1,32 @@ +install.packages("sf") +library(sf) +install.packages("devtools") +devtools::install_github("tidyverse/ggplot2") # needed for geom_sf +library(ggplot2) +library(viridis) + +# Obtain the tax dataset if not available yet +if(!file.exists("data/taxstats2015individual06ataxablestatusstateterritorypostcode.csv")) + download.file(url = "http://data.gov.au/dataset/5c99cfed-254d-40a6-af1c-47412b7de6fe/resource/90f7f4eb-2c44-4884-96c0-01060c820cfd/download/taxstats2015individual06ataxablestatusstateterritorypostcode.csv", destfile = "data/taxstats2015individual06ataxablestatusstateterritorypostcode.csv") +# http://data.gov.au/dataset/5c99cfed-254d-40a6-af1c-47412b7de6fe/resource/d3189e9d-533a-4893-b6a1-758781083418/download/taxstats2015individual06btaxablestatusstateterritorypostcode.csv + +# Obtain shapefile with Australian postal codes if not available yet +if(!file.exists("data/1270055003_poa_2016_aust_shape.zip")) + download.file(url = "http://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&1270055003_poa_2016_aust_shape.zip&1270.0.55.003&Data%20Cubes&4FB811FA48EECA7ACA25802C001432D0&0&July%202016&13.09.2016&Latest", destfile = "data/1270055003_poa_2016_aust_shape.zip") +# Unzip it if not done already +if(!file.exists("data/POA_2016_AUST.shp")) + unzip(zipfile = "data/1270055003_poa_2016_aust_shape.zip", exdir = "data/") + +taxstats <- read.csv("data/taxstats2015individual06ataxablestatusstateterritorypostcode.csv", stringsAsFactors = FALSE) +taxstats <- dplyr::filter(taxstats, Taxable.status == "Taxable") +POA <- st_read(dsn = "data/", layer = "POA_2016_AUST", stringsAsFactors = FALSE) + +taxstats.POA <- merge(x = taxstats, y = POA, by.x = "Postcode", by.y = "POA_CODE16", all.y = TRUE) + +taxstats.POA$incomeperearningcapita <- taxstats.POA$`Total.Income.or.Loss..` / taxstats.POA$Total.Income.or.Loss.no. +# Postal codes turn out not to be too interesting, as they're way more granular around +# big cities - making the high income postal codes invisible on the chart below +ggplot(taxstats.POA) + + geom_sf(aes(fill = incomeperearningcapita, color = incomeperearningcapita)) + + scale_fill_viridis("incomeperearningcapita") + + scale_color_viridis("incomeperearningcapita") -- 2.39.5