X-Git-Url: http://git.vanrenterghem.biz/R/project-au-taxstats.git/blobdiff_plain/49286c850b490d5617b8eaf4485671f52d264a7e..a4d86b2c1b034830351c7d5d9e72a1acb2d36a29:/AU-taxstats.R diff --git a/AU-taxstats.R b/AU-taxstats.R index bda1448..76f9dcf 100644 --- a/AU-taxstats.R +++ b/AU-taxstats.R @@ -4,6 +4,7 @@ install.packages("devtools") devtools::install_github("tidyverse/ggplot2") # needed for geom_sf library(ggplot2) library(viridis) +library(ggthemes) # Obtain the tax dataset if not available yet if(!file.exists("data/taxstats2015individual06ataxablestatusstateterritorypostcode.csv")) @@ -44,13 +45,28 @@ taxstats.sa3 <- merge(x = taxstats, y = sa3, by.x = "Postcode", by.y = "POA_CODE # Create a matrix of intersecting postal codes and SA3's POA_SAs <- st_intersects(x=sa3, y=POA, sparse=FALSE) -taxstats.POA$incomeperearningcapita[is.na(taxstats.POA$incomeperearningcapita)] <- 0 - +taxstats.POA$`Total.Income.or.Loss..`[is.na(taxstats.POA$`Total.Income.or.Loss..`)] <- 0 +taxstats.POA$`Total.Income.or.Loss.no.`[is.na(taxstats.POA$`Total.Income.or.Loss.no.`)] <- 0 # Perform matrix multiplication to obtain the income metrix per SA3 # Total income will be incorrect, as the POAs intersect with multiple SA3s -sa3$incomeperearningcapita <- as.vector(POA_SAs %*% as.matrix(taxstats.POA$incomeperearningcapita)) +sa3$TotalIncome <- as.vector(POA_SAs %*% as.matrix(taxstats.POA$`Total.Income.or.Loss..`)) +sa3$TotalIncomeEarners <- as.vector(POA_SAs %*% as.matrix(taxstats.POA$`Total.Income.or.Loss.no.`)) +sa3$incomeperearningcapita <- (sa3$TotalIncome / sa3$TotalIncomeEarners)/1000 + +# As SA3s are still to narrow around cities compared to in the country, +# let's simply look at Melbourne -ggplot(sa3) + +ggplot(dplyr::filter(sa3, data.table::`%like%`(GCC_NAME16, "Melbourne") )) + geom_sf(aes(fill = incomeperearningcapita, color = incomeperearningcapita)) + - scale_fill_viridis("incomeperearningcapita") + - scale_color_viridis("incomeperearningcapita") + scale_fill_viridis(name = "") + + scale_color_viridis(name = "") + + coord_sf(datum = NA) + # Work around https://github.com/tidyverse/ggplot2/issues/2071 to remove gridlines + labs(title = "Melbourne \nincome distribution", + subtitle = "2014/15, in 1000s AUD", + caption = "\nSource: Australian Taxation Office") + + theme_economist() + + theme(legend.position = "bottom", + legend.text = element_text(angle = 45, hjust = 1, size = 8), + axis.text = element_blank(), + axis.ticks = element_blank()) +