将预测的时间序列与R中的原始序列重叠 [英] overlapping the predicted time series on the original series in R

查看:68
本文介绍了将预测的时间序列与R中的原始序列重叠的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我执行预测

  w = read.csv( C:/Users/admin/Documents/aggrmonth.csv ,sep =;,dec =,)
w
#创建时间序列对象
w = ts(w $ new,frequency = 12,start = c(2015,1))
w
#timeplot
plot.ts(w)

#接下来几个月的预测
library( forecast)
m< -stats :: HoltWinters(w)
test = forecast ::: forecast.HoltWinters(m,h = 4)#h是您希望预测
测试
多少月

现在我想对未来4个月进行预测。
从01.2017-04.2017。我知道原始值。

  17年1月1日1020 
-17年2月1日800
17年3月1日1130
17年4月1日600

但我需要获取情节其中显示的带有CI的预测值与原始值重叠。
当然,如果我没有明确说明,我会附上情节。

绿色曲线是该系列的初始值(我的4个月)
,绿色虚线是预测值与原始值重叠。
虚线上的虚线是置信区间。



如何创建这种图

  w = 

structure(list(yearMon = structure(c(9L,7L,15L,1L,17L,13L,
11L,3L,23L, 21L,19L,5L,10L,8L,16L,2L,18L,14L,12L,
4L,24L,22L,20L,6L),.Label = c( 1-Apr-15, 1 -Apr-16, 1-Aug-15,
1-Aug-16, 1-Dec-15, 1-Dec-16, 1-Feb-15, 16年2月1日, 15年1月1日,
16年1月1日, 15年7月1日, 16年7月1日, 15年6月1日 , 16年6月1日, 15年3月1日,
16年3月1日, 15年5月1日, 16年5月1日, 11月1日 -15, 1-Nov-16, 1-Oct-15,
1-Oct-16, 1-Sep-15, 1-Sep-16),类= factor),新= c(8575L,
8215L,16399L,16415L,15704L,19805L,17484L,18116L,19977L,
14439L,9258L,12259L,4909L,9539L,8802L,11253L, 11971L,7838L,
2095L,4157L,3910L,1306L,3429L,1390L)),. names = c( yearMon,
new),类= data.frame,行。名称= c(NA,-24L))


解决方案

我们可以使用 ggfortify 创建一个数据框,然后使用 ggplot2

绘制两个时间序列



 #加载所需的库
库(润滑)
库(magrittr)
库(tidyverse)
库(比例)
库(预测)
库(ggfortify)

w<-结构( list(yearMon = structure(c(9L,7L,15L,1L,17L,13L,
11L,3L,23L,21L,19L,5L,10L,8L,16L,2L,18L,14L,12L,
4L,24L,22L,20L,6L),.Label = c( 1-Apr-15, 1-Apr-16, 1-Aug-15,
1 -bugb, 1-Dec-15, 1-Dec-16, 1-Feb-15, 1-Feb-16, 1 Jan-15,
$ 1-Jan-16, 1-Jul-15, 1-Jul-16, 1-Jun-15, 1-Jun-16, 1-Mar-15,
1-Mar-16, 1-May-15, 1-May-16, 1-Nov-15, 1-Nov-16, 1-Oct-15,
1-Oct-16, 1-Sep-15, 1-Sep-16),类= factor),新= c(8575L,
8215L,16399L, 16415L,15704L,19805L,17484L,18116L,19977L,
14439L,9258L,12 259L,4909L,9539L,8802L,11253L,11971L,7838L,
2095L,4157L,3910L,1306L,3429L,1390L)),.names = c( yearMon,
new), class = data.frame,row.names = c(NA,-24L))

#创建时间序列对象
w = ts(w $ new,frequency = 12,start = c(2015,1))
w

#>一月二月三月四月五月六月七月八月九月十月十一月十一月
#> 2015 8575 8215 16399 16415 15704 19805 17484 18116 19977 14439 9258
#> 2016 4909 9539 8802 11253 11971 7838 2095 4157 3910 1306 3429
#> 12月
#> 2015 12259
#> 2016 1390

#未来几个月的预测
m<-stats :: HoltWinters(w)

#h是您希望预测多少个月
pred =预测:::: cast.HoltWinters(m,h = 4)
pred

#>点预测Lo 80嗨80 Lo 95嗨95
#> 2017年1月-5049.00381 -9644.003 -454.0045 -12076.449 1978.441
#> 2017年2月37.44605 -5599.592 5674.4843 -8583.660 8658.552
#> 2017年3月-256.41474 -6770.890 6258.0601 -10219.444 9706.615
#> 2017年4月2593.09445 -4693.919 9880.1079 -8551.431 13737.620

#情节
情节(pred,include = 24,showgap = FALSE)

 #将pred从列表转换为数据框对象
df1<-fortify(pred)%>%as_tibble()

#创建日期列,删除索引列并重命名其他列
df1%<>%
mutate(Date = as.Date(Index,%Y-%m-% d))%>%
select(-Index)%&%;%
重命名( Low95 = Lo 95,
Low80 = Lo 80,
High95 =嗨95,
High80 =嗨80,
预测 =积分预测)
df1

#> #小动作:28 x 8
#>数据拟合的预测Low80 High80 Low95 High95 Date
#> < int> < dbl> < dbl> < dbl> < dbl> < dbl> < dbl> < date>
#> 1 8575 NA NA NA NA NA NA NA 2015-01-01
#> 2 8215不适用不适用不适用不适用2015-02-01
#> 3 16399不适用不适用不适用不适用不适用2015-03-01
#> 4 16415不适用不适用不适用不适用不适用2015-04-01
#> 5 15704不适用不适用不适用不适用不适用2015-05-01
#> 6 19805不适用不适用不适用不适用不适用2015-06-01
#> 7 17484不适用不适用不适用不适用不适用2015-07-01
#> 8 18116不适用不适用不适用不适用不适用2015-08-01
#> 9 19977不适用不适用不适用不适用不适用2015-09-01
#> 10 14439不适用不适用不适用不适用2015-10-01
#> #...还有18行

###避免数据和预测之间的差距
#在obs中找到最后一个不丢失的NA值,然后使用该
#一个初始化所有预测列
lastNonNAinData<-max(which(complete.cases(df1 $ Data)))
df1 [lastNonNAinData,
!(colnames(df1)%in%c(数据,拟合,日期))]--df1 $ Data [lastNonNAinData]

ggplot(df1,aes(x = Date))+
geom_ribbon(aes( ymin = Low95,ymax = High95,填充= 95%))+
geom_ribbon(aes(ymin = Low80,ymax = High80,填充= 80%))+
geom_point(aes( y =数据,颜色=数据),大小= 4)+
geom_line(aes(y =数据,组= 1,颜色=数据),
线型=虚线,大小= 0.75)+
geom_line(aes(y = Fitted,group = 2,color = Fitted),size = 0.75)+
geom_line(aes(y = Forecast,group = 3,color = 预测),大小= 0.75)+
scale_x_date(breaks = scales :: pretty_breaks(),date_labels =%b%y)+
scale_colour_brewer(name = 图例,类型= qual,调色板= Dark2)+
scale_fill_brewer(name = Intervals)+
指南(颜色= guide_legend(顺序= 1),fill = guide_legend(顺序= 2))+
theme_bw(base_size = 14)



编辑:要包含 2017- 01-01到 2017-04-01

 #创建一个已知的新列值
df1 $ Obs<-NA
df1 $ Obs [[nrow(df1)-3):( nrow(df1))]<-c(1020,800,1130,600)

ggplot(df1,aes(x = Date))+
geom_ribbon(aes(ymin = Low95,ymax = High95,fill = 95%))+
geom_ribbon( aes(ymin = Low80,ymax = High80,填充= 80%))+
geom_point(aes(y =数据,颜色= Data),大小= 4)+
geom_line(aes (y =数据,组= 1,颜色=数据),
线型=点状,大小= 0.75)+
geom_line(aes(y =拟合,组= 2,颜色=拟合),大小= 0.75)+
geom_line(aes(y =预测,组= 3,颜色=预测),大小= 0.75)+
scale_x_date(breaks = scales :: pretty_breaks( ),date_labels =%b%y)+
scale_colour_brewer(name = Legend,type = qual,调色板= Dark2)+
scale_fill_brewer(name = Intervals)+
指南(颜色= guide_legend(顺序= 1),填充= guide_legend(顺序= 2))+
theme_bw(base_size = 14)+
geom_line(aes(y = Obs,group = 4,color = Obs),linetype = dotted,size = 0.75)



或将这些值直接放入 Data

  df1 $ Data [(nrow(df1)-3 ):( nrow(df1))]<-c(1020,800,1130,600)

ggplot(df1,aes(x = Date))+
geom_ribbon(aes( ymin = Low95,ymax = High95,填充= 95%))+
geom_ribbon(aes(ymin = Low80,ymax = High80,填充= 80%))+
ge om_point(aes(y =数据,颜色=数据),大小= 3)+
geom_line(aes(y =数据,组= 1,颜色=数据),
线型=点分,大小= 0.75)+
geom_line(aes(y =已拟合,组= 2,颜色=拟合),大小= 0.75)+
geom_line(aes(y =预测,组= 3,颜色=预测),大小= 0.75)+
scale_x_date(breaks = scales :: pretty_breaks(),date_labels =%b%y)+
scale_colour_brewer(name = Legend ,类型= qual,调色板= Dark2)+
scale_fill_brewer(name = Intervals)+
guides(color = guide_legend(order = 1),fill = guide_legend(order = 2) )+
theme_bw(base_size = 14)



reprex软件包(v0.2.0)。


I perform forecast

w=read.csv("C:/Users/admin/Documents/aggrmonth.csv", sep=";",dec=",")
w
#create time series object
w=ts(w$new,frequency = 12,start=c(2015,1)) 
w
#timeplot
plot.ts(w)

#forecast for the next months
library("forecast")
m <- stats::HoltWinters(w)
test=forecast:::forecast.HoltWinters(m,h=4) #h is how much month do you want to predict
test

now i want get forecast for 4 months ahead. From 01.2017-04.2017. I this know original values.

1-Jan-17    1020
1-Feb-17    800
1-Mar-17    1130
1-Apr-17    600

But i need get plot where displayed predicted values with CI are overlapped with original value. Of course if i don't clearly exlplain, i attached the plot. The green curve is the initial value of the series(my 4 months) and green dotted line is predictied values are overlapped on original values. Dashes on the predicted dotted curve are confidence intervals.

How to create such plot

w=

structure(list(yearMon = structure(c(9L, 7L, 15L, 1L, 17L, 13L, 
11L, 3L, 23L, 21L, 19L, 5L, 10L, 8L, 16L, 2L, 18L, 14L, 12L, 
4L, 24L, 22L, 20L, 6L), .Label = c("1-Apr-15", "1-Apr-16", "1-Aug-15", 
"1-Aug-16", "1-Dec-15", "1-Dec-16", "1-Feb-15", "1-Feb-16", "1-Jan-15", 
"1-Jan-16", "1-Jul-15", "1-Jul-16", "1-Jun-15", "1-Jun-16", "1-Mar-15", 
"1-Mar-16", "1-May-15", "1-May-16", "1-Nov-15", "1-Nov-16", "1-Oct-15", 
"1-Oct-16", "1-Sep-15", "1-Sep-16"), class = "factor"), new = c(8575L, 
8215L, 16399L, 16415L, 15704L, 19805L, 17484L, 18116L, 19977L, 
14439L, 9258L, 12259L, 4909L, 9539L, 8802L, 11253L, 11971L, 7838L, 
2095L, 4157L, 3910L, 1306L, 3429L, 1390L)), .Names = c("yearMon", 
"new"), class = "data.frame", row.names = c(NA, -24L))

解决方案

We can use ggfortify to create a data frame then plot both timeseries with ggplot2

# Load required libraries
library(lubridate)
library(magrittr)
library(tidyverse)
library(scales)
library(forecast)
library(ggfortify)

w <- structure(list(yearMon = structure(c(9L, 7L, 15L, 1L, 17L, 13L, 
  11L, 3L, 23L, 21L, 19L, 5L, 10L, 8L, 16L, 2L, 18L, 14L, 12L, 
  4L, 24L, 22L, 20L, 6L), .Label = c("1-Apr-15", "1-Apr-16", "1-Aug-15", 
  "1-Aug-16", "1-Dec-15", "1-Dec-16", "1-Feb-15", "1-Feb-16", "1-Jan-15", 
  "1-Jan-16", "1-Jul-15", "1-Jul-16", "1-Jun-15", "1-Jun-16", "1-Mar-15", 
  "1-Mar-16", "1-May-15", "1-May-16", "1-Nov-15", "1-Nov-16", "1-Oct-15", 
  "1-Oct-16", "1-Sep-15", "1-Sep-16"), class = "factor"), new = c(8575L, 
  8215L, 16399L, 16415L, 15704L, 19805L, 17484L, 18116L, 19977L, 
  14439L, 9258L, 12259L, 4909L, 9539L, 8802L, 11253L, 11971L, 7838L, 
  2095L, 4157L, 3910L, 1306L, 3429L, 1390L)), .Names = c("yearMon", 
  "new"), class = "data.frame", row.names = c(NA, -24L))

# create time series object
w = ts(w$new, frequency = 12, start=c(2015, 1)) 
w

#>        Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov
#> 2015  8575  8215 16399 16415 15704 19805 17484 18116 19977 14439  9258
#> 2016  4909  9539  8802 11253 11971  7838  2095  4157  3910  1306  3429
#>        Dec
#> 2015 12259
#> 2016  1390

# forecast for the next months
m <- stats::HoltWinters(w)

# h is how much month do you want to predict
pred = forecast:::forecast.HoltWinters(m, h=4) 
pred

#>          Point Forecast     Lo 80     Hi 80      Lo 95     Hi 95
#> Jan 2017    -5049.00381 -9644.003 -454.0045 -12076.449  1978.441
#> Feb 2017       37.44605 -5599.592 5674.4843  -8583.660  8658.552
#> Mar 2017     -256.41474 -6770.890 6258.0601 -10219.444  9706.615
#> Apr 2017     2593.09445 -4693.919 9880.1079  -8551.431 13737.620

# plot
plot(pred, include = 24, showgap = FALSE)

# Convert pred from list to data frame object
df1 <- fortify(pred) %>% as_tibble()

# Create Date column, remove Index column and rename other columns 
df1 %<>% 
  mutate(Date = as.Date(Index, "%Y-%m-%d")) %>% 
  select(-Index) %>% 
  rename("Low95" = "Lo 95",
         "Low80" = "Lo 80",
         "High95" = "Hi 95",
         "High80" = "Hi 80",
         "Forecast" = "Point Forecast")
df1

#> # A tibble: 28 x 8
#>     Data Fitted Forecast Low80 High80 Low95 High95 Date      
#>    <int>  <dbl>    <dbl> <dbl>  <dbl> <dbl>  <dbl> <date>    
#>  1  8575     NA       NA    NA     NA    NA     NA 2015-01-01
#>  2  8215     NA       NA    NA     NA    NA     NA 2015-02-01
#>  3 16399     NA       NA    NA     NA    NA     NA 2015-03-01
#>  4 16415     NA       NA    NA     NA    NA     NA 2015-04-01
#>  5 15704     NA       NA    NA     NA    NA     NA 2015-05-01
#>  6 19805     NA       NA    NA     NA    NA     NA 2015-06-01
#>  7 17484     NA       NA    NA     NA    NA     NA 2015-07-01
#>  8 18116     NA       NA    NA     NA    NA     NA 2015-08-01
#>  9 19977     NA       NA    NA     NA    NA     NA 2015-09-01
#> 10 14439     NA       NA    NA     NA    NA     NA 2015-10-01
#> # ... with 18 more rows

### Avoid the gap between data and forcast
# Find the last non missing NA values in obs then use that
# one to initialize all forecast columns
lastNonNAinData <- max(which(complete.cases(df1$Data)))
df1[lastNonNAinData, 
    !(colnames(df1) %in% c("Data", "Fitted", "Date"))] <- df1$Data[lastNonNAinData]

ggplot(df1, aes(x = Date)) + 
  geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
  geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
  geom_point(aes(y = Data, colour = "Data"), size = 4) +
  geom_line(aes(y = Data, group = 1, colour = "Data"), 
            linetype = "dotted", size = 0.75) +
  geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
  geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
  scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
  scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
  scale_fill_brewer(name = "Intervals") +
  guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
  theme_bw(base_size = 14)

Edit: To included known values from "2017-01-01" to "2017-04-01"

# Create new column which has known values
df1$Obs <- NA
df1$Obs[(nrow(df1)-3):(nrow(df1))] <- c(1020, 800, 1130, 600)

ggplot(df1, aes(x = Date)) + 
  geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
  geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
  geom_point(aes(y = Data, colour = "Data"), size = 4) +
  geom_line(aes(y = Data, group = 1, colour = "Data"), 
            linetype = "dotted", size = 0.75) +
  geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
  geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
  scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
  scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
  scale_fill_brewer(name = "Intervals") +
  guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
  theme_bw(base_size = 14) +
  geom_line(aes(y = Obs, group = 4, colour = "Obs"), linetype = "dotted", size = 0.75)

Or put those values directly into Data column

df1$Data[(nrow(df1)-3):(nrow(df1))] <- c(1020, 800, 1130, 600)

ggplot(df1, aes(x = Date)) + 
  geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
  geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
  geom_point(aes(y = Data, colour = "Data"), size = 3) +
  geom_line(aes(y = Data, group = 1, colour = "Data"), 
            linetype = "dotted", size = 0.75) +
  geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
  geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
  scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
  scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
  scale_fill_brewer(name = "Intervals") +
  guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
  theme_bw(base_size = 14)

Created on 2018-04-21 by the reprex package (v0.2.0).

这篇关于将预测的时间序列与R中的原始序列重叠的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆