解释ARIMA加权代码背后的方法 [英] Explaination of the methodology behind this ARIMA weighted code
问题描述
我有一个代码,运行ARIMA模型,重点放在最近的错误上,它提供了出色的结果,比简单的ARIMA好得多,但我不理解它背后的方法。如果你能理解发生了什么以及为什么以及它如何工作那么我会非常感激:)
我想解释的代码来自# - - 重量---
(你可以将以下全部内容粘贴到R中)
suppressMessages(library(lmtest))
suppressMessages(库(tseries))
suppressMessages(库(预测))
suppressMessages(库(TTR))
#------------------------------------------------ -------------------------------
Input.data< - matrix(c( 8Q1\" , 8Q2, 8Q3, 8Q4, 9Q1, 9Q2, 9Q3, 9Q4, 10Q1, 10Q2, 10Q3, 10Q4, 11Q1 11Q2, 11Q3, 11Q4, 12Q1, 12Q2, 12Q3, 12Q4, 13Q1, 13Q2, 13Q3, 13Q4, 14Q1, 14Q2\" , 14Q3,5403.675741,6773.504993,7231.117289,7835.55156,5236.709983,5526.619467,6555.781711,11464.72728,7210.068674,7501.610403,8670.903486,10872.93518,8209.022658,8153.393088,10196.44775,13244.50201,8356.732878,10188.44157,10601.32205,12617.82102,11786.526 41,10044.98676,11006.0051,15101.9456,10992.27282,11421.18922,10731.31198),ncol = 2,byrow = FALSE)
#---------- -------------------------------------------------- -------------------
#允许的最大季节差异。对于典型系列,建议使用0。
max.sdiff< - 2
# - -------------------------------------------------- ---------------------------
#强制季节性
arima.force.seasonality< - y
#------------------ -------------------------------------------------- -----------
#数据的频率。 1/4为季度,1/12为每月
频率< - 1/4
#------------------------------------------------- ------------------------------
#预测多少个季度/月。
Forecast.horizon< - 4
#-------------- -------------------------------------------------- ---------------
#系列中的第一个日期。用c(8,1)表示2008 q1
Start.date< - c(8,1)
#-------------------------------------------- -----------------------------------
#预测的日期< br $> b $ b
Forecast.dates< - c(14Q4,15Q1,15Q2,15Q3)
#-------------------------------------------- -----------------------------------
#设置是否应该是数据登录。取值s(让脚本选择记录)
#级别(强制级别)或日志(强制日志)
force.log< - s
#----------------------- -------------------------------------------------- ------
#从Input.data中选择数据列
Data.col< - as.numeric(输入.data [,length(Input.data [1,])])
#----------------- -------------------------------------------------- ------------
#将Data.col转换为时间序列
Data.col .ts< - ts(Data.col,deltat = Frequency,start = Start.date)
#------------ -------------------------------------------------- -----------------
#来自Input.data的日期的字符向量
Dates.col< - as.character(Input.data [,1])
#------------- -------------------------------------------------- ----------------
#开始了测试是否应该记录数据
transform.method< - round(BoxCox.lambda(Data.col.ts,method =loglik), 5)
log.values< - seq(0,0.24999,by = 0.00001)
sqrt.values< - seq( 0.25,0.74999,by = 0.00001)
which.transform.log< - transform.method%in%log.values
.transform.sqrt< - transform.method%in%sqrt.values
if(which.transform.log ==TRUE){
as.log< - log
Data.new< - log(Data.col.ts)
}其他{
if(which.transform.sqrt ==TRUE){
as.log< - sqrt
Data.new< - sqrt (Data.col.ts)
} else {
as.log< - no
Data.new< - 数据.col.ts
}
}
#-----加权----- -------------------------------------------------- --------
fweight< - function(x){
PatX< - 0.5 + x
返回(PatX)
}
integ1< - integrate( fweight,lower = 0.00,upper = 1)
valinteg< - 2 * integ1 $ value
#Split几个区间的积分,并相应选择权重
integvals< - rep(0,length.out = length(Data.new))
for(i in 1:length(Data.new)){
integi< - integrate(fweight,lower =(i-1)/ length(Data.new) ),upper = i / length(Data.new))
integvals [i]< - 2 * integi $ value
}
suppressWarnings(kpssW< - kpss.test(Data.new,null =Level))
suppressWarnings(ppW< - tryCatch({
ppW< - pp.test(Data.new,alternative =stationary)},
error = function(ppW){
ppW< - list(error =TRUE,p.value = 0.99)
}))
抑制警告(adfW< - adf.test(Data.new,alternative =statio nary,
k = trunc((length(Data.new) - 1)^(1/3))))
suppressWarnings (if(kpssW $ p.value< 0.05 |
ppW $ p.value> 0.05 |
adfW $ p.value> 0.05){
ndiffsW = 1
}否则{
ndiffsW = 0
})
aaw< - auto.arima(Data.new,
max.D = max.sdiff,
d = ndiffsW ,
seasonal = TRUE,
allowdrift = FALSE,
stepwise = FALSE,
trace = FALSE,
seasonal.test =ch)
order.arima< - c(aaw $ arma [1],aaw $ arma [ 6],aaw $ arma [2])
order.seasonal.arima< - c(aaw $ arma [3],aaw $ arma [7], aaw $ arma [4])
if(sum(aaw $ arma [1:2])== 0){
order .arima [1]< - 1
}否则{
NULL
}
if(arima.force.seasonality ==y){
if(sum(aaw $ arma [3:4])== 0){
order.seasonal.arima [1]< - 1
} else {
NULL
}
}否则{
NULL
}
#----- ARIMA --------------------------- ----------------------------------------
#适合设定订单的ARIMA模型
stAW< - Arima(Data.new,
order = order.arima,
seasonal = list (order = order.seasonal.arima),
method =ML)
parSW< - stAW $ coef
WMAEOPT< - 功能(parSW){
ArimaW< - Arima(Data.new,
order = order.arima,
seasonal = list(order = order.seasonal.arima),
include.drift = FALSE,
method = ML,
fixed = c(parSW))
errAR< - c(abs(resid(ArimaW)))
WMAE< ; - t(errAR)%*%整数
返回(WMAE)
}
OPTWMAE< - optim(parSW,
WMAEOPT,
method =SANN,
set.seed(2),
control = list(fnscale = 1, maxit = 5000))
parS3< - OPTWMAE $ par
Arima.Data.new< ; - Arima(Data.new,order = order.arima,seasonal = list(order = order.seasonal.arima),
include.drift = FALSE,method =ML,fixed = c (parS3))
I have a code that was given to me that runs an ARIMA model putting weight on more recent errors, it gives excellent results, much better than simple ARIMA, but i do not understand the methodology behind it. If you can understand whats going on and why and how it works then i would really appreciate it :)
The code that i would like explaining is from the #---Weighting---
(you can paste the whole of the following into R)
suppressMessages(library(lmtest))
suppressMessages(library(tseries))
suppressMessages(library(forecast))
suppressMessages(library(TTR))
#-------------------------------------------------------------------------------
Input.data <- matrix(c("8Q1","8Q2","8Q3","8Q4","9Q1","9Q2","9Q3","9Q4","10Q1","10Q2","10Q3","10Q4","11Q1","11Q2","11Q3","11Q4","12Q1","12Q2","12Q3","12Q4","13Q1","13Q2","13Q3","13Q4","14Q1","14Q2","14Q3",5403.675741,6773.504993,7231.117289,7835.55156,5236.709983,5526.619467,6555.781711,11464.72728,7210.068674,7501.610403,8670.903486,10872.93518,8209.022658,8153.393088,10196.44775,13244.50201,8356.732878,10188.44157,10601.32205,12617.82102,11786.52641,10044.98676,11006.0051,15101.9456,10992.27282,11421.18922,10731.31198),ncol=2,byrow=FALSE)
#-------------------------------------------------------------------------------
# Maximum seasonal differences allowed. For typical series, 0 is recommended.
max.sdiff <- 2
#-------------------------------------------------------------------------------
# Force seasonality
arima.force.seasonality <- "y"
#-------------------------------------------------------------------------------
# The frequency of the data. 1/4 for QUARTERLY, 1/12 for MONTHLY
Frequency <- 1/4
#-------------------------------------------------------------------------------
# How many quarters/months to forecast
Forecast.horizon <- 4
#-------------------------------------------------------------------------------
# The first date in the series. Use c(8, 1) to denote 2008 q1
Start.date <- c(8, 1)
#-------------------------------------------------------------------------------
# The dates of the forecasts
Forecast.dates <- c("14Q4", "15Q1", "15Q2", "15Q3")
#-------------------------------------------------------------------------------
# Set if the data should be logged. Takes value "s" (lets script choose logging)
#"level" (forces levels) or "log" (forces logs)
force.log <- "s"
#-------------------------------------------------------------------------------
# Selects the data column from Input.data
Data.col <- as.numeric(Input.data[, length(Input.data[1, ])])
#-------------------------------------------------------------------------------
# Turns the Data.col into a time-series
Data.col.ts <- ts(Data.col, deltat=Frequency, start = Start.date)
#-------------------------------------------------------------------------------
# A character vector of the dates from Input.data
Dates.col <- as.character(Input.data[,1])
#-------------------------------------------------------------------------------
# Starts the testing to see if the data should be logged
transform.method <- round(BoxCox.lambda(Data.col.ts, method = "loglik"), 5)
log.values <- seq(0, 0.24999, by = 0.00001)
sqrt.values <- seq(0.25, 0.74999, by = 0.00001)
which.transform.log <- transform.method %in% log.values
which.transform.sqrt <- transform.method %in% sqrt.values
if (which.transform.log == "TRUE"){
as.log <- "log"
Data.new <- log(Data.col.ts)
} else {
if (which.transform.sqrt == "TRUE"){
as.log <- "sqrt"
Data.new <- sqrt(Data.col.ts)
} else {
as.log <- "no"
Data.new <- Data.col.ts
}
}
#----- Weighting ---------------------------------------------------------------
fweight <- function(x){
PatX <- 0.5+x
return(PatX)
}
integ1 <- integrate(fweight, lower = 0.00, upper = 1)
valinteg <- 2*integ1$value
#Split the integral to several intervals, and pick the weights accordingly
integvals <- rep(0, length.out = length(Data.new))
for (i in 1:length(Data.new)){
integi <- integrate(fweight, lower = (i-1)/length(Data.new), upper= i/length(Data.new))
integvals[i] <- 2*integi$value
}
suppressWarnings(kpssW <- kpss.test(Data.new, null="Level"))
suppressWarnings(ppW <- tryCatch({
ppW <- pp.test(Data.new, alternative = "stationary")},
error = function(ppW){
ppW <- list(error = "TRUE", p.value = 0.99)
}))
suppressWarnings(adfW <- adf.test(Data.new, alternative = "stationary",
k = trunc((length(Data.new) - 1)^(1/3))))
suppressWarnings(if (kpssW$p.value < 0.05 |
ppW$p.value > 0.05 |
adfW$p.value > 0.05){
ndiffsW = 1
} else {
ndiffsW = 0
})
aaw <- auto.arima(Data.new,
max.D = max.sdiff,
d = ndiffsW,
seasonal = TRUE,
allowdrift = FALSE,
stepwise = FALSE,
trace = FALSE,
seasonal.test = "ch")
order.arima <- c(aaw$arma[1], aaw$arma[6] , aaw$arma[2])
order.seasonal.arima <- c(aaw$arma[3], aaw$arma[7], aaw$arma[4])
if (sum(aaw$arma[1:2]) == 0){
order.arima[1] <- 1
} else {
NULL
}
if (arima.force.seasonality == "y"){
if(sum(aaw$arma[3:4]) == 0){
order.seasonal.arima[1] <- 1
} else {
NULL
}
} else {
NULL
}
#----- ARIMA -------------------------------------------------------------------
# Fits an ARIMA model with the orders set
stAW <- Arima(Data.new,
order = order.arima,
seasonal = list(order = order.seasonal.arima),
method ="ML")
parSW <- stAW$coef
WMAEOPT <- function(parSW){
ArimaW <- Arima(Data.new,
order = order.arima,
seasonal = list(order = order.seasonal.arima),
include.drift = FALSE,
method = "ML",
fixed = c(parSW))
errAR <- c(abs(resid(ArimaW)))
WMAE <- t(errAR) %*% integvals
return(WMAE)
}
OPTWMAE <- optim(parSW,
WMAEOPT,
method = "SANN",
set.seed(2),
control = list(fnscale = 1, maxit = 5000))
parS3 <- OPTWMAE$par
Arima.Data.new <- Arima(Data.new, order = order.arima, seasonal=list(order=order.seasonal.arima),
include.drift=FALSE, method = "ML", fixed = c(parS3))
推荐答案
value
#Split几个区间的积分,并选择权重相应的
整数< - rep(0,length.out = length(Data.new))
for(i in 1: length(Data.new)){
integi< - integrate(fweight,lower =(i-1)/ length(Data.new),upper = i / length(Data.new))
integvals [i]< - 2 * integi
value
#Split the integral to several intervals, and pick the weights accordingly
integvals <- rep(0, length.out = length(Data.new))
for (i in 1:length(Data.new)){
integi <- integrate(fweight, lower = (i-1)/length(Data.new), upper= i/length(Data.new))
integvals[i] <- 2*integi
value < br $>
}
suppressWarnings(kpssW< - kpss.test(Data.new,null =Level))
suppressWarnings(ppW< - tryCatch({
ppW< - pp.test(Data.new,alternative =stationary)}},
error = function(ppW){
ppW< - list(error =TRUE,p.value = 0.99)
}))
suppressWarnings(adfW< - adf.test(Data.new,alternative =fixed,
k = trunc((length( Data.new) - 1)^(1/3))))
suppressWarnings(if(kpssW
value
}
suppressWarnings(kpssW <- kpss.test(Data.new, null="Level"))
suppressWarnings(ppW <- tryCatch({
ppW <- pp.test(Data.new, alternative = "stationary")},
error = function(ppW){
ppW <- list(error = "TRUE", p.value = 0.99)
}))
suppressWarnings(adfW <- adf.test(Data.new, alternative = "stationary",
k = trunc((length(Data.new) - 1)^(1/3))))
suppressWarnings(if (kpssW
p。价值< 0.05 |
ppW
p.value < 0.05 |
ppW
这篇关于解释ARIMA加权代码背后的方法的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!