From de9b20ec0492220020ebe25b91b9c38d5ac3ad67 Mon Sep 17 00:00:00 2001 From: Frederik Vanrenterghem Date: Thu, 23 Aug 2018 21:30:14 +0800 Subject: [PATCH] Create forecast. - Use combined logfile. - Start by using ARIMA function for forecast. --- predictWebsiteHits.R | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/predictWebsiteHits.R b/predictWebsiteHits.R index 4c2d65c..2a0d66e 100644 --- a/predictWebsiteHits.R +++ b/predictWebsiteHits.R @@ -2,17 +2,28 @@ library(jsonlite) library(fable) library(tsibble) library(lubridate) +library(ggplot2) -logfile <- file("data/photos.vanrenterghem.biz.access.kvp.log") +logfile <- file("data/photos.vanrenterghem.biz.access.kvp.log.1") apachelog <- stream_in(logfile) #apachelog$time <- gsub("\\[|\\]", "", apachelog$time) apachelog %>% mutate(time = gsub("\\[|\\]", "", time), - time = dmy_hms(time)) %>% - group_by(time) %>% + time = dmy_hms(time), + datehour = floor_date(time, unit = "hour") + ) %>% + filter(time > ymd_h("2018-08-22 12")) %>% + group_by(datehour) %>% summarise(hits = n()) -> - apachelog + apachelog_tidy + +apachelog_tsbl <- as.tsibble(apachelog_tidy, index = datehour) +apachelog_tsbl %>% + ARIMA(log(hits)) %>% + forecast(h=5) %>% + autoplot + + ggtitle("Forecasting website hits using apache log only") + + xlab("Date time") -as.tsibble(apachelog, index = time) -- 2.30.2