X-Git-Url: http://git.vanrenterghem.biz/R/project-using-kafka-in-R.git/blobdiff_plain/027602ba76ffbda23f8b02859a72687e4b68b16f..HEAD:/predictWebsiteHits.R diff --git a/predictWebsiteHits.R b/predictWebsiteHits.R index ec74310..415ab1e 100644 --- a/predictWebsiteHits.R +++ b/predictWebsiteHits.R @@ -1,19 +1,24 @@ -setwd("~/work") -library(jsonlite) library(fable) library(tsibble) library(lubridate) +library(ggplot2) -logfile <- file("photos.vanrenterghem.biz.access.kvp.log") - -apachelog <- stream_in(logfile) - -#apachelog$time <- gsub("\\[|\\]", "", apachelog$time) -apachelog %>% +createPlot <- function(x){ +x %>% mutate(time = gsub("\\[|\\]", "", time), - time = dmy_hms(time)) %>% - group_by(time) %>% + time = dmy_hms(time), + datehour = floor_date(time, unit = "hour") + ) %>% + filter(time > ymd_h("2018-08-22 12")) %>% + group_by(datehour) %>% summarise(hits = n()) -> - apachelog + apachelog_tidy -as.tsibble(apachelog, index = time) +apachelog_tsbl <- as.tsibble(apachelog_tidy, index = datehour) +apachelog_tsbl %>% + ARIMA(log(hits)) %>% + forecast(h=5) %>% + autoplot + + ggtitle("Forecasting website hits using apache log only") + + xlab("Date time") +} \ No newline at end of file