From: Frederik Vanrenterghem Date: Thu, 23 Aug 2018 13:30:14 +0000 (+0800) Subject: Create forecast. X-Git-Url: http://git.vanrenterghem.biz/R/project-using-kafka-in-R.git/commitdiff_plain/de9b20ec0492220020ebe25b91b9c38d5ac3ad67?hp=904d32efb0841d569e4557bb0535f8d9532963b4 Create forecast. - Use combined logfile. - Start by using ARIMA function for forecast. --- diff --git a/predictWebsiteHits.R b/predictWebsiteHits.R index 4c2d65c..2a0d66e 100644 --- a/predictWebsiteHits.R +++ b/predictWebsiteHits.R @@ -2,17 +2,28 @@ library(jsonlite) library(fable) library(tsibble) library(lubridate) +library(ggplot2) -logfile <- file("data/photos.vanrenterghem.biz.access.kvp.log") +logfile <- file("data/photos.vanrenterghem.biz.access.kvp.log.1") apachelog <- stream_in(logfile) #apachelog$time <- gsub("\\[|\\]", "", apachelog$time) apachelog %>% mutate(time = gsub("\\[|\\]", "", time), - time = dmy_hms(time)) %>% - group_by(time) %>% + time = dmy_hms(time), + datehour = floor_date(time, unit = "hour") + ) %>% + filter(time > ymd_h("2018-08-22 12")) %>% + group_by(datehour) %>% summarise(hits = n()) -> - apachelog + apachelog_tidy + +apachelog_tsbl <- as.tsibble(apachelog_tidy, index = datehour) +apachelog_tsbl %>% + ARIMA(log(hits)) %>% + forecast(h=5) %>% + autoplot + + ggtitle("Forecasting website hits using apache log only") + + xlab("Date time") -as.tsibble(apachelog, index = time)