R中一致的匹配对 [英] consistent matched pairs in R

查看:164
本文介绍了R中一致的匹配对的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

因此,使用匹配(链接到这里的包)



我们可以通过修改 GenMatch 例子






$引入一个id vaiable
lalonde $ ID < - 1:长度(lalonde $年龄)

x = cbind(lalonde $年龄,lalonde $ educ,lalonde $黑色,lalonde $ hisp,
lalonde $已婚,lalonde $ nodegr,lalonde $ u74,lalonde $ u75,
lalonde $ re75,lalonde $ re74)

BalanceMat < - cbind(lalonde $ age,lalonde $ educ, lalonde $ black,
lalonde $ hisp,lalonde $已婚,lalonde $ nodegr,
lalonde $ u74,lalonde $ u75,lalonde $ re75,lalonde $ re74,
I(lalonde $ re74 * $ GenBank $(Tr = lalonde $ treat,X = X,BalanceMatrix = BalanceMat,estimand =ATE,
pop.size = 16, max.generations = 10,wait.generations = 1)

mout < - 匹配(Y = NULL,Tr = lalonde $ treat,X = X,
Weight.matrix = genout,
replace = TRUE,ties = FALSE)
#在这里,我们设置关系FALSE,所以我们只有1-1匹配
总结(mout)

#now让创建我们的匹配数据集
处理了< - lalonde [mout $ index.treated,]
#并且为每一对
处理了$ Pair_ID< - 处理的$ ID $ b引入了一个indetity变量
$ b non.treated< - lalonde [mout $ index.control,]
non.treated $ Pair_ID< - 已处理$ ID

matched.data< - rbind(被处理的,未被处理的)
matched.data< - matched.data [order(matched.data $ Pair_ID)]]

#这将输出哪个未处理的ID与第一人配对
matched.data $ ID [matched.data $ Pair_ID == 1& matched.data $ treat == 0]

我们看到,对于数据, ID = 1与ID = 193匹配

现在让我们按照数据的顺序引入一些随机化,看看我们是否得到相同的对

  n < -  500 
P1 < - rep(NA,n)
P2 < - rep ,n)
P3 < - rep(NA,n)
P4 < - rep(NA,n)
P5 < - rep(NA,n)
P6 < - rep(NA,n)
P7 < - rep(NA,n)

for(i in 1:n){

lalonde< ; - 样本(1:nrow(lalonde)),]#随机订单

genout< - GenMatch(Tr = lalonde $ treat,X = X,BalanceMatrix = BalanceMat,estimand = ,
pop.size = 16,max.generations = 10,wait.generations = 1)

mout < - 匹配(Y = NULL,Tr = lalonde $ treat,X = X,
Weight.matrix = genout,
replace = TRUE,ties = FALSE)

总结(mout)

处理< - lalonde [ mout $ index.treated,]
被处理$ P air_ID< - 处理的$ ID

非处理< - lalonde [mout $ index.control,]
non.treated $ Pair_ID< - 处理的$ ID

matched.data< - rbind(已处理,未处理)
matched.data< - matched.data [order(matched.data $ Pair_ID),]

P1 [i] < - matched.data $ ID [matched.data $ Pair_ID == 1& matched.data $ treat == 0]
P2 [i] < - matched.data $ ID [matched.data $ Pair_ID == 2& matched.data $ treat == 0]
P3 [i] < - matched.data $ ID [matched.data $ Pair_ID == 3& matched.data $ treat == 0]
P4 [i]< - matched.data $ ID [matched.data $ Pair_ID == 4& matched.data $ treat == 0]
P5 [i] < - matched.data $ ID [matched.data $ Pair_ID == 5& matched.data $ treat == 0]
P6 [i] < - matched.data $ ID [matched.data $ Pair_ID == 6& matched.data $ treat == 0]
P7 [i] < - matched.data $ ID [matched.data $ Pair_ID == 7& matched.data $ treat == 0]

}

code> loop 将匹配500次, P1 将保存 treat == 0 case每次。

然后我们看看哪个 P1 出现最多,通过:

  plot(1:n,P1,main =P1)
pre>

OR

 汇总(as.factor(P1)) 

我们看到没有人 treat == 0 案件通常配对。
我希望有一个情况(可能= 193 ??),通常配对,不依赖于数据的顺序。所以我觉得我的循环是错误的。有人可以指出哪里?或者当他们运行一个循环时,他们会发现,与数据的顺序无关,类似的情况是配对的??

问题在于,你随机地输入了 lalonde ,但是你输入的内容是 GenMatch Match X BalanceMat ,它们仍然具有原始顺序。当你在最后建立你的 matched.data 时,你使用的索引不属于 lalonde 任何更多。再次尝试,但在循环中包括 X BalanceMat 的赋值





  X = cbind(lalonde $年龄,lalonde $ educ,lalonde $黑色,lalonde $ hisp, 
lalonde $已婚,lalonde $ nodegr,lalonde $ u74,lalonde $ u75,
lalonde $ re75,lalonde $ re74)

BalanceMat< - cbind(lalonde $ age, lalonde $教育,lalonde $黑色,
lalonde $ hisp,lalonde $已婚,lalonde $ nodegr,
lalonde $ u74,lalonde $ u75,lalonde $ re75,lalonde $ re74,
我( lalonde $ re74 * lalonde $ re75))


So using the Matching Package (Link to package here)

We can work through a modified GenMatch example.

library(Matching)
data(lalonde)

#introduce an id vaiable
lalonde$ID <- 1:length(lalonde$age)

X = cbind(lalonde$age, lalonde$educ, lalonde$black, lalonde$hisp, 
          lalonde$married, lalonde$nodegr, lalonde$u74, lalonde$u75, 
          lalonde$re75, lalonde$re74)

BalanceMat <- cbind(lalonde$age, lalonde$educ, lalonde$black, 
                    lalonde$hisp, lalonde$married, lalonde$nodegr, 
                    lalonde$u74, lalonde$u75, lalonde$re75, lalonde$re74, 
                    I(lalonde$re74*lalonde$re75))

genout <- GenMatch(Tr=lalonde$treat, X=X, BalanceMatrix=BalanceMat, estimand="ATE", 
                   pop.size=16, max.generations=10, wait.generations=1)

mout <- Match(Y=NULL, Tr=lalonde$treat, X=X,
              Weight.matrix=genout,
              replace=TRUE, ties=FALSE)
# here we set ties FALSE so we only have 1-1 Matching
summary(mout)

#now lets create our "Matched dataset"
treated <- lalonde[mout$index.treated,]
# and introduce an indetity variable for each pair
treated$Pair_ID <- treated$ID

non.treated <- lalonde[mout$index.control,]
non.treated$Pair_ID <- treated$ID

matched.data <- rbind(treated, non.treated)
matched.data <- matched.data[order(matched.data$Pair_ID),]

#this outputs which of the non-treated ID was paired with the first person
matched.data$ID[matched.data$Pair_ID==1 & matched.data$treat==0]

We see that for the data, the ID=1 is matched with ID=193

Now lets introduce some randomisation into the order of the data and see if we get the same pairs

n <- 500
P1 <- rep(NA, n)
P2 <- rep(NA, n)
P3 <- rep(NA, n)
P4 <- rep(NA, n)
P5 <- rep(NA, n)
P6 <- rep(NA, n)
P7 <- rep(NA, n)

for (i in 1:n) {

  lalonde <- lalonde[sample(1:nrow(lalonde)), ] # randomise order

  genout <- GenMatch(Tr=lalonde$treat, X=X, BalanceMatrix=BalanceMat, estimand="ATE", 
                     pop.size=16, max.generations=10, wait.generations=1)

  mout <- Match(Y=NULL, Tr=lalonde$treat, X=X,
                Weight.matrix=genout,
                replace=TRUE, ties=FALSE)

  summary(mout)

  treated <- lalonde[mout$index.treated,]
  treated$Pair_ID <- treated$ID

  non.treated <- lalonde[mout$index.control,]
  non.treated$Pair_ID <- treated$ID

  matched.data <- rbind(treated, non.treated)
  matched.data <- matched.data[order(matched.data$Pair_ID),]

  P1[i] <- matched.data$ID[matched.data$Pair_ID==1 & matched.data$treat==0]
  P2[i] <- matched.data$ID[matched.data$Pair_ID==2 & matched.data$treat==0]
  P3[i] <- matched.data$ID[matched.data$Pair_ID==3 & matched.data$treat==0]
  P4[i] <- matched.data$ID[matched.data$Pair_ID==4 & matched.data$treat==0]
  P5[i] <- matched.data$ID[matched.data$Pair_ID==5 & matched.data$treat==0]
  P6[i] <- matched.data$ID[matched.data$Pair_ID==6 & matched.data$treat==0]
  P7[i] <- matched.data$ID[matched.data$Pair_ID==7 & matched.data$treat==0]

}

So the loop will match the pairs 500 times and P1 will save the treat==0 case each time.

We then look at the which P1 appears the most, by:

plot(1:n, P1, main="P1")

OR

summary(as.factor(P1))

We see that no one treat==0 case is commonly paired. I would expect there to be a case (possibly =193??) that is commonly paired that does not depend on the order of the data. Therefore I think my loop is wrong. Can anybody point out where? Or when they run a loop, they find, independent of the order of the data, that similar cases are paired??

解决方案

The problem is that you randomise the order of lalonde, but your input to GenMatch and Match are X and BalanceMat which still have the original order. When you then build your matched.data at the end, you are subsetting using indices which don't tie into lalonde any more. Try again but including the assignment of X and BalanceMat in your loop.

i.e.

X = cbind(lalonde$age, lalonde$educ, lalonde$black, lalonde$hisp, 
          lalonde$married, lalonde$nodegr, lalonde$u74, lalonde$u75, 
          lalonde$re75, lalonde$re74)

BalanceMat <- cbind(lalonde$age, lalonde$educ, lalonde$black, 
                    lalonde$hisp, lalonde$married, lalonde$nodegr, 
                    lalonde$u74, lalonde$u75, lalonde$re75, lalonde$re74, 
                    I(lalonde$re74*lalonde$re75))

这篇关于R中一致的匹配对的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆