3 install.packages("devtools")
4 devtools::install_github("tidyverse/ggplot2") # needed for geom_sf
8 # Obtain the tax dataset if not available yet
9 if(!file.exists("data/taxstats2015individual06ataxablestatusstateterritorypostcode.csv"))
10 download.file(url = "http://data.gov.au/dataset/5c99cfed-254d-40a6-af1c-47412b7de6fe/resource/90f7f4eb-2c44-4884-96c0-01060c820cfd/download/taxstats2015individual06ataxablestatusstateterritorypostcode.csv", destfile = "data/taxstats2015individual06ataxablestatusstateterritorypostcode.csv")
11 # http://data.gov.au/dataset/5c99cfed-254d-40a6-af1c-47412b7de6fe/resource/d3189e9d-533a-4893-b6a1-758781083418/download/taxstats2015individual06btaxablestatusstateterritorypostcode.csv
13 # Obtain shapefile with Australian postal codes if not available yet
14 if(!file.exists("data/1270055003_poa_2016_aust_shape.zip"))
15 download.file(url = "http://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&1270055003_poa_2016_aust_shape.zip&1270.0.55.003&Data%20Cubes&4FB811FA48EECA7ACA25802C001432D0&0&July%202016&13.09.2016&Latest", destfile = "data/1270055003_poa_2016_aust_shape.zip")
16 # Unzip it if not done already
17 if(!file.exists("data/POA_2016_AUST.shp"))
18 unzip(zipfile = "data/1270055003_poa_2016_aust_shape.zip", exdir = "data/")
20 taxstats <- read.csv("data/taxstats2015individual06ataxablestatusstateterritorypostcode.csv", stringsAsFactors = FALSE)
21 taxstats <- dplyr::filter(taxstats, Taxable.status == "Taxable")
22 POA <- st_read(dsn = "data/", layer = "POA_2016_AUST", stringsAsFactors = FALSE)
24 taxstats.POA <- merge(x = taxstats, y = POA, by.x = "Postcode", by.y = "POA_CODE16", all.y = TRUE)
26 taxstats.POA$incomeperearningcapita <- taxstats.POA$`Total.Income.or.Loss..` / taxstats.POA$Total.Income.or.Loss.no.
27 # Postal codes turn out not to be too interesting, as they're way more granular around
28 # big cities - making the high income postal codes invisible on the chart below
29 ggplot(taxstats.POA) +
30 geom_sf(aes(fill = incomeperearningcapita, color = incomeperearningcapita)) +
31 scale_fill_viridis("incomeperearningcapita") +
32 scale_color_viridis("incomeperearningcapita")
35 if(!file.exists("data/1270055001_sa3_2016_aust_shape.zip"))
36 download.file(url = "http://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&1270055001_sa3_2016_aust_shape.zip&1270.0.55.001&Data%20Cubes&43942523105745CBCA257FED0013DB07&0&July%202016&12.07.2016&Latest", destfile = "data/1270055001_sa3_2016_aust_shape.zip")
38 if(!file.exists("data/SA3_2016_AUST.shp"))
39 unzip(zipfile = "data/1270055001_sa3_2016_aust_shape.zip", exdir = "data/")
41 sa3 <- st_read(dsn = "data/", layer = "SA3_2016_AUST", stringsAsFactors = FALSE)
42 taxstats.sa3 <- merge(x = taxstats, y = sa3, by.x = "Postcode", by.y = "POA_CODE16", all.y = TRUE)
44 # Create a matrix of intersecting postal codes and SA3's
46 POA_SAs <- st_intersects(x=sa3, y=POA, sparse=FALSE)
47 taxstats.POA$incomeperearningcapita[is.na(taxstats.POA$incomeperearningcapita)] <- 0
49 # Perform matrix multiplication to obtain the income metrix per SA3
50 # Total income will be incorrect, as the POAs intersect with multiple SA3s
51 sa3$incomeperearningcapita <- as.vector(POA_SAs %*% as.matrix(taxstats.POA$incomeperearningcapita))
54 geom_sf(aes(fill = incomeperearningcapita, color = incomeperearningcapita)) +
55 scale_fill_viridis("incomeperearningcapita") +
56 scale_color_viridis("incomeperearningcapita")