X-Git-Url: http://git.vanrenterghem.biz/R/project-using-kafka-in-R.git/blobdiff_plain/027602ba76ffbda23f8b02859a72687e4b68b16f..7df155dc850d206391becead77267dd0e1243715:/predictWebsiteHits.R diff --git a/predictWebsiteHits.R b/predictWebsiteHits.R index ec74310..2a0d66e 100644 --- a/predictWebsiteHits.R +++ b/predictWebsiteHits.R @@ -1,19 +1,29 @@ -setwd("~/work") library(jsonlite) library(fable) library(tsibble) library(lubridate) +library(ggplot2) -logfile <- file("photos.vanrenterghem.biz.access.kvp.log") +logfile <- file("data/photos.vanrenterghem.biz.access.kvp.log.1") apachelog <- stream_in(logfile) #apachelog$time <- gsub("\\[|\\]", "", apachelog$time) apachelog %>% mutate(time = gsub("\\[|\\]", "", time), - time = dmy_hms(time)) %>% - group_by(time) %>% + time = dmy_hms(time), + datehour = floor_date(time, unit = "hour") + ) %>% + filter(time > ymd_h("2018-08-22 12")) %>% + group_by(datehour) %>% summarise(hits = n()) -> - apachelog + apachelog_tidy + +apachelog_tsbl <- as.tsibble(apachelog_tidy, index = datehour) +apachelog_tsbl %>% + ARIMA(log(hits)) %>% + forecast(h=5) %>% + autoplot + + ggtitle("Forecasting website hits using apache log only") + + xlab("Date time") -as.tsibble(apachelog, index = time)