自动查找和乘以 R 系数 [英] Automatic vlookup and multiply coefficients with R

查看:62
本文介绍了自动查找和乘以 R 系数的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在尝试用 R(统计编程语言)编写一个函数,它可以让我自动计算线性回归 (lm)

I´m trying to code a function in R (stats programming language) that would allow me to automate the calculation of a linear regression (lm)

问题:回归是通过step"函数计算出来的,所以选择的系数不能提前知道.问题

The problem: The regression is calculated through the "step" function, so the coefficients selected cannot be known in advance. Problem

  1. 自动识别阶跃函数选择的系数.

  1. Automate identifying the coefficients selected by the step function.

Vlookup 并交叉乘以结果的第二列 Ex."View(OpenCoefs)" (estimates) 与原始数据框 "sp" 各列的最后一行(最后一天)

Vlookup and cross multiply the second column of the results Ex."View(OpenCoefs)" (estimates) with the last row(last day) of respective columns of the original data frame "sp"

理想的解决方案是一个函数,我只需输入run()",该函数将返回每个回归的y",即标准普尔500指数对第二天的预测(开盘价、低价), 高, 关闭).

The desirable solution would be a function that i would just type "run()" that would return the "y"s for each regression, namely, the forecast of the S&P500 index for the following day(Open, Low, High,Close).

该代码从雅虎财经网站检索数据,因此如果您运行它,它就可以运行.

The code retrieves data from the yahoo finance website, so it´s operational if you run it.

这是代码.

sp <- read.csv(paste("http://ichart.finance.yahoo.com/table.csv?s=%5EGSPC&a=03&b=1&c=1940&d=03&e=1&f=2014&g=d&ignore=.csv"))

sp$Adj.Close<-NULL

sp<-sp[nrow(sp):1,]

sp<-as.data.frame(sp)


for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Open" ] <-
    ( sp[ i , "Open" ] / sp[ i - 1 , "Open" ] ) - 1       
} 


for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_High" ] <-
    ( sp[ i , "High" ] / sp[ i - 1 , "High" ] ) - 1       
} 


for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Low" ] <-
    ( sp[ i , "Low" ] / sp[ i - 1 , "Low" ] ) - 1       
} 


for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Close" ] <-
    ( sp[ i , "Close" ] / sp[ i - 1 , "Close" ] ) - 1       
} 


for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Volume" ] <-
    ( sp[ i , "Volume" ] / sp[ i - 1 , "Volume" ] ) - 1       
} 

nRows_in_sp<-1:nrow(sp)

sp<-cbind(sp,nRows_in_sp)


Open_Rollin<-NA

sp<-cbind(sp,Open_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Open_Rollin" ]<-0 
} else {
sp[ i , "Open_Rollin" ]<-(( mean(sp[,"Open"][(i-100):i])))
}
}


Close_Rollin<-NA

nRows_in_sp<-1:nrow(sp)

sp<-cbind(sp,Close_Rollin)

for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , " Close_Rollin" ]<-0 
} else {
sp[ i , "Close_Rollin" ]<-(( mean(sp[,"Close"][(i-100):i])))
}
}



Low_Rollin<-NA

sp<-cbind(sp,Low_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Low_Rollin" ]<-0 
} else {
sp[ i , "Low_Rollin" ]<-(( mean(sp[,"Low"][(i-100):i])))
}
}


High_Rollin<-NA

sp<-cbind(sp,High_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "High_Rollin" ]<-0 
} else {
sp[ i , "High_Rollin" ]<-(( mean(sp[,"High"][(i-100):i])))
}
}


Open_GR_Rollin<-NA

sp<-cbind(sp,Open_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Open_GR_Rollin" ]<-0 
} else {
sp[ i , "Open_GR_Rollin" ]<-(( mean(sp[,"Gr_Open"][(i-100):i])))
}
}



Close_GR_Rollin<-NA

sp<-cbind(sp, Close_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Close_GR_Rollin" ]<-0 
} else {
sp[ i , "Close_GR_Rollin" ]<-(( mean(sp[,"Gr_Close"][(i-100):i])))
}
}



Low_GR_Rollin<-NA

sp<-cbind(sp, Low_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Low_GR_Rollin" ]<-0 
} else {
sp[ i , "Low_GR_Rollin" ]<-(( mean(sp[,"Gr_Low"][(i-100):i])))
}
}


High_GR_Rollin<-NA

sp<-cbind(sp, High_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "High_GR_Rollin" ]<-0 
} else {
sp[ i , "High_GR_Rollin" ]<-(( mean(sp[,"Gr_High"][(i-100):i])))
}
}


Open_SD_Rollin<-NA

sp<-cbind(sp,Open_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "Open_SD_Rollin" ] <- sd(sp[,"Open"][(i-100):i])
} 
}



Close_SD_Rollin<-NA

sp<-cbind(sp, Close_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "Close_SD_Rollin" ] <- sd(sp[,"Close"][(i-100):i])
} 
}


Low_SD_Rollin<-NA

sp<-cbind(sp, Low_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "Low_SD_Rollin" ] <- sd(sp[,"Low"][(i-100):i])
} 
}



High_SD_Rollin<-NA

sp<-cbind(sp, High_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "High_SD_Rollin" ] <- sd(sp[,"High"][(i-100):i])
} 
}


N <- length(sp[,"Open"])



Openlag <- c(NA, sp[,"Open"][1:(N-1)])
sp<-cbind(sp,Openlag)

Highlag <- c(NA, sp[,"High"][1:(N-1)])

sp<-cbind(sp,Highlag)

Lowlag <- c(NA, sp[,"Low"][1:(N-1)])

sp<-cbind(sp,Lowlag)

Closelag <- c(NA, sp[,"Close"][1:(N-1)])

sp<-cbind(sp,Closelag)


Gr_Openlag <- c(NA, sp[,"Gr_Open"][1:(N-1)])

sp<-cbind(sp,Gr_Openlag)

Gr_Highlag <- c(NA, sp[,"Gr_High"][1:(N-1)])

sp<-cbind(sp,Gr_Highlag)

Gr_Lowlag <- c(NA, sp[,"Gr_Low"][1:(N-1)])

sp<-cbind(sp,Gr_Lowlag)

Gr_Closelag <- c(NA, sp[,"Gr_Close"][1:(N-1)])

sp<-cbind(sp,Gr_Closelag)

Gr_Volumelag <- c(NA, sp[,"Gr_Volume"][1:(N-1)])

sp<-cbind(sp,Gr_Volumelag)



Open_GR_Rollinlag <- c(NA, sp[,"Open_GR_Rollin"][1:(N-1)])

sp<-cbind(sp, Open_GR_Rollinlag)

Low_GR_Rollinlag <- c(NA, sp[,"Low_GR_Rollin"][1:(N-1)])

sp<-cbind(sp, Low_GR_Rollinlag)

High_GR_Rollinlag <- c(NA, sp[,"High_GR_Rollin"][1:(N-1)])
sp<-cbind(sp, High_GR_Rollinlag)

Close_GR_Rollinlag <- c(NA, sp[,"Close_GR_Rollin"][1:(N-1)])

sp<-cbind(sp, Close_GR_Rollinlag)


Open_SD_Rollinlag <- c(NA, sp[,"Open_SD_Rollin"][1:(N-1)])

sp<-cbind(sp, Open_SD_Rollinlag)

Low_SD_Rollinlag <- c(NA, sp[,"Low_SD_Rollin"][1:(N-1)])

sp<-cbind(sp, Low_SD_Rollinlag)

High_SD_Rollinlag <- c(NA, sp[,"High_SD_Rollin"][1:(N-1)])

sp<-cbind(sp, High_SD_Rollinlag)

Close_SD_Rollinlag <- c(NA, sp[,"Close_SD_Rollin"][1:(N-1)])

sp<-cbind(sp, Close_SD_Rollinlag)




OpenCoefs<-coefficients(summary(step(lm(sp[,"Open"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))


LowCoefs<-coefficients(summary(step(lm(sp[,"Low"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))


HighCoefs<-coefficients(summary(step(lm(sp[,"High"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))


CloseCoefs<-coefficients(summary(step(lm(sp[,"Close"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))


View(OpenCoefs)

View(LowCoefs)

View(HighCoefs)

View(CloseCoefs)

View(sp)

推荐答案

你的代码太糟糕了,我不得不同情你.:) 这是您的代码的重构版本:

Your code is so bad, I had to take pity on you. :) Here's a refactored version of your code:

library(quantmod)
sp <- getSymbols("^GSPC", auto.assign=FALSE)
sp$GSPC.Adjusted <- NULL
colnames(sp) <- gsub("^GSPC\\.","",colnames(sp))

sp$Gr_Open   <- ROC(Op(sp), type="discrete")
sp$Gr_High   <- ROC(Hi(sp), type="discrete")
sp$Gr_Low    <- ROC(Lo(sp), type="discrete")
sp$Gr_Close  <- ROC(Cl(sp), type="discrete")
sp$Gr_Volume <- ROC(Vo(sp), type="discrete")

N <- 100
sp$Open_Rollin  <- runMean(sp$Open, N)
sp$High_Rollin  <- runMean(sp$High, N)
sp$Low_Rollin   <- runMean(sp$Low, N)
sp$Close_Rollin <- runMean(sp$Close, N)

sp$Open_GR_Rollin  <- runMean(sp$Gr_Open, N)
sp$High_GR_Rollin  <- runMean(sp$Gr_High, N)
sp$Low_GR_Rollin   <- runMean(sp$Gr_Low, N)
sp$Close_GR_Rollin <- runMean(sp$Gr_Close, N)

sp$Open_SD_Rollin  <- runSD(sp$Open, N)
sp$High_SD_Rollin  <- runSD(sp$High, N)
sp$Low_SD_Rollin   <- runSD(sp$Low, N)
sp$Close_SD_Rollin <- runSD(sp$Close, N)

spLag <- lag(sp)
colnames(spLag) <- paste(colnames(sp),"lag",sep="")
sp <- na.omit(merge(sp, spLag))

无需回答您的第一个问题即可回答您的第二个问题.您不必手动将系数与数据相乘.您可以简单地从模型访问拟合值.这需要您保留模型...

There's no need to answer your first question in order to answer your second question. You don't have to cross-multiply coefficients with data by hand. You can simply access the fitted values from the model. That requires that you preserve the model though...

f <- Open ~ Openlag + Lowlag + Highlag + Closelag +
  Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag +
  Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag +
  Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag

OpenLM <- lm(f, data=sp)
HighLM <- update(OpenLM, High ~ .)
LowLM <- update(OpenLM, Low ~ .)
CloseLM <- update(OpenLM, Close ~ .)

OpenStep <- step(OpenLM,direction="both",test="F")
HighStep <- step(HighLM,direction="both",test="F")
LowStep <- step(LowLM,direction="both",test="F")
CloseStep <- step(CloseLM,direction="both",test="F")

tail(fitted(OpenStep),1)
# 2013-02-01 
#    1497.91 
tail(fitted(HighStep),1)
# 2013-02-01 
#    1504.02 
tail(fitted(LowStep),1)
# 2013-02-01 
#   1491.934 
tail(fitted(CloseStep),1)
# 2013-02-01 
#   1499.851

这篇关于自动查找和乘以 R 系数的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆