]> git.vanrenterghem.biz Git - R/project-using-kafka-in-R.git/blob - predictWebsiteHits.R
2a0d66e2e6f96b429a07495d91a92ce5bcb27aba
[R/project-using-kafka-in-R.git] / predictWebsiteHits.R
1 library(jsonlite)
2 library(fable)
3 library(tsibble)
4 library(lubridate)
5 library(ggplot2)
7 logfile <- file("data/photos.vanrenterghem.biz.access.kvp.log.1")
9 apachelog <- stream_in(logfile)
11 #apachelog$time <- gsub("\\[|\\]", "", apachelog$time)
12 apachelog %>%
13   mutate(time = gsub("\\[|\\]", "", time),
14          time = dmy_hms(time),
15          datehour = floor_date(time, unit = "hour") 
16          ) %>%
17   filter(time > ymd_h("2018-08-22 12")) %>%
18   group_by(datehour) %>%
19   summarise(hits = n()) ->
20   apachelog_tidy
22 apachelog_tsbl <- as.tsibble(apachelog_tidy, index = datehour) 
23 apachelog_tsbl %>%
24   ARIMA(log(hits)) %>%
25   forecast(h=5) %>%
26   autoplot +
27   ggtitle("Forecasting website hits using apache log only") +
28   xlab("Date time")