r data.frame创建新变量 [英] r data.frame create new variable

查看:286
本文介绍了r data.frame创建新变量的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我有一个数据框,大约有150万行和5列。一个变量(VARIABLE)是这种类型的NATIONALITY_YEAR(例如SPAIN_1998),我想将它分成两列,一列包含国籍,它是下划线之前的名称的左边,一个包含Year的右边下划线。我已经尝试过concat.split这应该是最简单的方法:

  aa<  -  concat.split(mydata,VARIABLE ,sep =_,drop = F)

但运行2小时后,任何输出。我不知道我是否应该运行一段较长的时间,或者如果这样做是非常耗时的方式。



有关这个问题的任何帮助非常感谢!



这是一个可重复的(子集!)示例:

  mydata<  -  structure(list(PROVINCE = c(1L,4L,7L,8L,11L,14L,17L,20L,
24L,28L,30L,33L,36L,41L,44L,46L ,48L,3L,6L,8L,10L,
13L,15L,18L,23L,26L,29L,31L,35L,38L,41L,46L,47L,2L,
4L,8L,8L ,11L,15L,17L,21L,24L,28L,30L,33L,37L,41L,
45L,46L,49L,3L,6L,8L,10L,13L,15L,19L,23L,27L,29L ,
32L,36L,39L,43L,46L,48L,2L,5L,8L,8L,12L,15L,18L,
21L,24L,28L,30L,33L,37L,41L,45L ,46L,50L,3L,7L,8L,
10L,14L,16L,20L,23L,27L,29L,32L,36L,39L,43L,46L,48L,
3L,6L,8L ,8L,12L,15L,18L,21L,25L,28L,31L,34L,38L,
41L,45L,46L,50L,3L,7L,8L,11L,14L,17L,20L,23L,27L ,
29L,33L,36L,40L,4 3L,46L,48L,3L,6L,8L,9L,12L,15L,
18L,22L,25L,28L,31L,35L,38L,41L,45L,46L,50L,4L,7L,
8L,11L,14L,17L,20L,24L,28L,30L,33L,36L,41L,43L,46L,
48L,3L,6L,8L,10L,13L,15L,18L,22L, 26L,28L,31L,35L,
38L,41L,46L,47L,1L,4L,8L,8L,11L,14L,17L,20L,24L,
28L,30L,33L,36L, 41L,44L,46L,49L,3L,6L),AGE5 =结构(c(1L,
5L,9L,7L,6L,7L,5L,8L,3L,3L,3L,5L,8L, ,3L,6L,9L,
5L,7L,4L,3L,5L,8L,8L,2L,8L,2L,9L,7L,9L,9L,2L,7L,
2L,9L ,1L,8L,8L,6L,6L,6L,6L,9L,6L,6L,8L,9L,3L,4L, ,3L,4L,4L,1L,3L,1L,2L,
2L,6L,6L,2L,9L,2L,2L,1L,5L,9L,5L,8L,9L,7L,4L,3L ,
7L,2L,8L,2L,6L,9L,1L,5L,1L,6L,6L,6L,7L,3L,6L,3L,
3L,4L,1L,1L,2L ,9L,6L,3L,3L,3L,8L,3L,3L,1L,5L,2L,6L, ,8L,1L,5L,1L,
6L,4L,6L,2L,3L,3L,5L,9L,5L,5L,4L ,7L,8L,3L,6L,3L,6L,3L,6L,1L,2L,2L,3L,7L,
1L,9L,5L,8L,4L,5L,4L,1L,3L,7L,7L,9L,3L,9L,7L,5L,
7L,8L,1L,4L,4L,6L,1L ,8L,7L,8L,6L,8L,4L,3L,4L,5L,
9L,2L,6L,6L,1L,5L,7L),标号= c(10-14 15-19,20-24,
25-29,30-34,35-39,40-44,45-49 ),
ZONA91OK = c(101L,4079L,712L,8205L,11022L,14021L,1714L,
20067L,2414L,2810L,300799L,3305L,36026L,41024L,4405L ,
4607L,48015L,308L,610L,8121L,1006L,1307L,1511L,1813L,
2308L,2605L,2910L,310799L,35026L,3811L,411199L,4601L,
4708L,202L ,405L,8015L,837L,11033L,1502L,1702L,2112L,
2408L,28047L,30015L,3305L,3709L,410199L,4511L,1202L,
490699L,3063L,610L,827L,1006L,1301L ,15036L,1901L,
2310L,2709L,29025L,3201L,36008L,390899L,4301L,46184L,
4805L,206L,504L,817L,813L,12135L,1519L,1810L,2104L,
2402L,281 30L,30030L,3305L,3707L,411399L,45165L,46181L,
5008L,305L,7026L,803L,1006L,1413L,16078L,200999L,
2312L,2712L,29069L,3210L,3616L,391199L, 4313L,46105L,
4805L,310L,6153L,8252L,8205L,1205L,1505L,1808L,2110L,
2508L,2810L,311399L,3405L,3807L,41024L,4507L,46102L,
500599L,3014L,706L,8121L,11028L,14042L,1712L,20045L,
2314L,27031L,29901L,33024L,3614L,400199L,4307L,46021L,
4805L,3066L,6153L,8015L,901L, 12040L,1522L,1806L,2203L,
2508L,28047L,311099L,35004L,3801L,410199L,4515L,46017L,
501199L,407L,7027L,827L,1102L,1404L,17155L,200599L,
24089L,2812L,30019L,33024L,3612L,41038L,4301L,4628L,
4805L,307L,6153L,817L,1004L,1309L,1508L,1804L,2206L,
2606L,28130L,310799L, 35011L,38022L,411399L,4622L,4701L,
1036L,4079L,807L,803L,1108L,1410L,1708L,201399L,2410L,
28058L,30043L,33024L,3610L,410399L,4401L,4621L, 490499L,
30 (1L,1L,1L,1L,1L,
1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L, ,2L,2L,2L,2L,2L,2L,2L,2L,2L,2L,3L,3L,
3L,3L,3L,3L ,3L,3L,3L,3L,3L,3L,3L,3L,3L,3L,3L,
4L,4L,4L,4L,4L,4L,4L,4L,4L,4L,4L,4L ,4L,4L,4L,4L,4L,4L,5L,5L,5L,5L,5L,5L,5L,5L,5L,5L,5L,5L,5L,5L,
5L,5L,5L ,6L,6L,6L,6L,6L,6L,6L,6L,6L,6L,6L,6L,6L,6L,6L,6L,6L,6L,7L,7L,7L,7L,7L,7L,7L ,7L,7L,7L,7L,7L,7L,7L,
7L,7L,7L,7L,7L,7L,8L,8L,8L,8L,8L,8L,8L,8L,8L,
8L,8L 8L,9L,9L,9L,9L,9L,9L,9L,9L,9L,9L,9L,9L,9L,9L,9L,9L,9L,9L,9L,9L,9L,9L,9L, ,10L,10L,10L,10L,10L,10L,10L,10L,10L,10L,10L,10L,10L,10L,10L,10L,10L,11L,11L,11L,11L, ,11L,11L,11L,11L,11L,11L,11L,11L,11L,11L,11L,
11L,11L,11L,12L,12L,12L,12L,12L,12L,12L,12L,12L,
12L,12L ,12L,12L,12L,12L,12L,12L,13L,13L),.Label = c(SPAIN_1998,
EU15DC_1998,ROE_1998,MAGREB_1998,SSA_1998,LA_1998,
ASIA_1998,ROW_1998,Total_1998,SPAIN_1999,EU15DC_1999,
ROE_1999,MAGREB_1999,SSA_1999,LA_1999,ASIA_1999,
ROW_1999,Total_1999,SPAIN_2000,EU15DC_2000,ROE_2000,
MAGREB_2000 ,SSA_2000,LA_2000,ASIA_2000,ROW_2000,
Total_2000,SPAIN_2001,EU15DC_2001,ROE_2001,MAGREB_2001,
SSA_2001 LA_2001,ASIA_2001,ROW_2001,Total_2001,
SPAIN_2002,EU15DC_2002,ROE_2002,MAGREB_2002,SSA_2002,
LA_2002,ASIA_2002 ,ROW_2002,Total_2002,SPAIN_2003,
EU15DC_2003,ROE_2003,MAGREB_2003,SSA_2003,LA_2003,
ASIA_2003,ROW_2003 Total_2003,SPAIN_2004,EU15DC_2004,
ROE_2004,MAGREB_2004,SSA_2004,LA_2004,ASIA_2004,
ROW_2004,Total_2004,SPAIN_2005 ,EU15DC_2005,ROE_2005,
MAGREB_2005,SSA_2005,LA_2005,ASIA_2005,ROW_2005,
Total_2005,SPAIN_2006,EU15DC_2006,ROE_2006,MAGREB_2006,
SSA_2006,LA_2006,ASIA_2006,ROW_2006,Total_2006,
SPAIN_2007,EU15DC_2007,ROE_2007,MAGREB_2007,SSA_2007,
LA_2007,ASIA_2007,ROW_2007,Total_2007,SPAIN_2008,
EU15DC_2008 ROE_2008,MAGREB_2008,SSA_2008,LA_2008,
ASIA_2008,ROW_2008,Total_2008,SPAIN_2009,EU15DC_2009,
ROE_2009 MAGREB_2009,SSA_2009,LA_2009,ASIA_2009,
ROW_2009,Total_2009,SPAIN_2010,EU15DC_2010,ROE_2010,
MAGREB_2010 LA_2010,LA_2010,ASIA_2010,ROW_2010,
Total_2010,SPAIN_2011,EU15DC_2011,ROE_2011,MAGREB_2011,
SSA_2011 ASIA_2011,ROW_2011,Total_2011,
SPAIN_2012,EU15DC_2012,ROE_2012,MAGREB_2012,SSA_2012,
LA_2012,ASIA_2012,ROW_2012 ,Total_2012,NOTSPAIN_1998,
NOTSPAIN_1999,NOTSPAIN_2000,NOTSPAIN_2001,NOTSPAIN_2 002,
NOTSPAIN_2003,NOTSPAIN_2004,NOTSPAIN_2005,NOTSPAIN_2006,
NOTSPAIN_2007,NOTSPAIN_2008,NOTSPAIN_2009,NOTSPAIN_2010,
NOTSPAIN_2011 ,AFRICA_2002,AFRICA_2009,AFRICA_2009,AFRICA_2009,AFRICA_2009, AFRICA_2007,
AFRICA_2008,AFRICA_2009,AFRICA_2010,AFRICA_2011,
AFRICA_2012,DWC_1998,DWC_1999,DWC_2000,DWC_2001 bDWC_2002,DWC_2003,DWC_2004,DWC_2005,DWC_2006,
DWC_2007,DWC_2008,DWC_2009,DWC_2010,DWC_2011 DWC_2012),class =factor),FREQUENCY = c(614,1943,
59,201,188,10859,93,
1494,60,1001,1000,689,675,934 ,51,
1240,165,13,0,14,2,2,
2,0,3,0,40,1,18,41,1,0,3,0,0 ,0,1,0,
0,0,0,0,7,1,0,0,0,0,0,0,0,0,80,0,
0,0 ,4,0,0,15,0,0,1,1,3,4,0,0,0,0,0,0,0,
0,0,0,0,0,2 ,0, 1,0,0,2,11,0,0,0,3,2,1,5,
64,1,4,1,3,4,8,1,1,1,1, 0,0,0,
0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,2173,907,9059,839,
4303,100,1727,663,694,1210,623,
1261,772, 697,490,1031,490,956,704,
1293,1011,739,927,755,3340,1190,1254,12880,528,
3244,277,892,837,1, 2,10,1,1,2,2,0,0,1,8,3,
12,0,2,1,0,4,0,0,0,0,0, 1,12,0,7,0,0,0,
0,0,5,2)),.Names = c(PROVINCE,AGE5,ZONA91OK,
VARIABLE,FREQUENCY),row.names = c(1L,501L,1001L,1501L,
2001L,2501L,3001L,3501L,4001L,4501L,5001L,5501L,6001L,
6501L, 7001L,7501L,8001L,8501L,9001L,9501L,10001L,10501L,
11001L,11501L,12001L,12501L,13001L,13501L,14001L,14501L,
15001L,15501L,16001L,16501L,17001L, 17501L,18001L,18501L,
19001L,19501L,20001L,20501L,21001L,21501L,22001L,22501L,
23001L,23501L,24001L,2450 1L,25001L,25501L,26001L,26501L,
27001L,27501L,28001L,28501L,29001L,29501L,30001L,30501L,
31001L,31501L,32001L,32501L,33001L,33501L,34001L,34501L,
35001L,35501L,36001L,36501L,37001L,37501L,38001L,38501L,
39001L,39501L,40001L,40501L,41001L,41501L,42001L,42501L,
43001L,43501L,44001L, 44501L,45001L,45501L,46001L,46501L,
47001L,47501L,48001L,48501L,49001L,49501L,50001L,50501L,
51001L,51501L,52001L,52501L,53001L,53501L,54001L,54501L,
55001L,55501L,56001L,56501L,57001L,57501L,58001L,58501L,
59001L,59501L,60001L,60501L,61001L,61501L,62001L,62501L,
63001L,63501L,64001L, 64501L,65001L,65501L,66001L,66501L,
67001L,67501L,68001L,68501L,69001L,69501L,70001L,70501L,
71001L,71501L,72001L,72501L,73001L,73501L,74001L,74501L,
75001L,75501L,76001L,76501L,77001L,77501L,78001L,78501L,
79001L,79501L,80001L,80501L,81001L,81501L, 82001L,82501L,
83001L,83501L,84001L,84501L,85001L,85501L,86001L,86501L,
87001L,87501L,88001L,88501L,89001L,89501L,90001L,90501L,
91001L, 91501L,92001L,92501L,93001L,93501L,94001L,94501L,
95001L,95501L,96001L,96501L,97001L,97501L,98001L,98501L,
99001L,99501L),class =data.frame )


解决方案


$ b

  library(data.table)
dt = data.table(mydata)

dt [,`:= `(NATIONALITY = sub('(。*)_(。*)','\\1',VARIABLE),
YEAR = sub('(。*)_(。*) \\2',VARIABLE))]


I have a dataframe with around 1.5 million rows and 5 cols. One variable (VARIABLE) is of this type NATIONALITY_YEAR (e.g. SPAIN_1998) and I want to split it in two columns, one containing the Nationality, which is the left side of the name before the underscore, and one containing the Year, right side of the underscore. I have tried with concat.split which should be the easiest way:

aa <- concat.split(mydata, "VARIABLE", sep = "_", drop = F)

but after 2 hours running it did not produce any output. I am not sure if I should leave it running for a longer period of time or if there is a non time consuming way to do this.

Any help on the issue would be very much appreciated!

Here is a reproducible (subset!) sample:

mydata<-  structure(list(PROVINCE = c(1L, 4L, 7L, 8L, 11L, 14L, 17L, 20L, 
24L, 28L, 30L, 33L, 36L, 41L, 44L, 46L, 48L, 3L, 6L, 8L, 10L, 
13L, 15L, 18L, 23L, 26L, 29L, 31L, 35L, 38L, 41L, 46L, 47L, 2L, 
4L, 8L, 8L, 11L, 15L, 17L, 21L, 24L, 28L, 30L, 33L, 37L, 41L, 
45L, 46L, 49L, 3L, 6L, 8L, 10L, 13L, 15L, 19L, 23L, 27L, 29L, 
32L, 36L, 39L, 43L, 46L, 48L, 2L, 5L, 8L, 8L, 12L, 15L, 18L, 
21L, 24L, 28L, 30L, 33L, 37L, 41L, 45L, 46L, 50L, 3L, 7L, 8L, 
10L, 14L, 16L, 20L, 23L, 27L, 29L, 32L, 36L, 39L, 43L, 46L, 48L, 
3L, 6L, 8L, 8L, 12L, 15L, 18L, 21L, 25L, 28L, 31L, 34L, 38L, 
41L, 45L, 46L, 50L, 3L, 7L, 8L, 11L, 14L, 17L, 20L, 23L, 27L, 
29L, 33L, 36L, 40L, 43L, 46L, 48L, 3L, 6L, 8L, 9L, 12L, 15L, 
18L, 22L, 25L, 28L, 31L, 35L, 38L, 41L, 45L, 46L, 50L, 4L, 7L, 
8L, 11L, 14L, 17L, 20L, 24L, 28L, 30L, 33L, 36L, 41L, 43L, 46L, 
48L, 3L, 6L, 8L, 10L, 13L, 15L, 18L, 22L, 26L, 28L, 31L, 35L, 
38L, 41L, 46L, 47L, 1L, 4L, 8L, 8L, 11L, 14L, 17L, 20L, 24L, 
28L, 30L, 33L, 36L, 41L, 44L, 46L, 49L, 3L, 6L), AGE5 = structure(c(1L, 
5L, 9L, 7L, 6L, 7L, 5L, 8L, 3L, 3L, 3L, 5L, 8L, 2L, 3L, 6L, 9L, 
5L, 7L, 4L, 3L, 5L, 8L, 8L, 2L, 8L, 2L, 9L, 7L, 9L, 9L, 2L, 7L, 
2L, 9L, 1L, 8L, 8L, 1L, 8L, 1L, 6L, 4L, 6L, 7L, 2L, 3L, 1L, 7L, 
5L, 6L, 9L, 5L, 6L, 8L, 9L, 3L, 4L, 3L, 4L, 4L, 1L, 3L, 1L, 2L, 
2L, 6L, 6L, 2L, 9L, 2L, 2L, 1L, 5L, 9L, 5L, 8L, 9L, 7L, 4L, 3L, 
7L, 2L, 8L, 2L, 6L, 9L, 1L, 5L, 1L, 6L, 6L, 6L, 7L, 3L, 6L, 3L, 
3L, 4L, 1L, 1L, 2L, 9L, 6L, 4L, 3L, 8L, 3L, 7L, 1L, 5L, 2L, 6L, 
6L, 8L, 5L, 9L, 5L, 6L, 2L, 3L, 1L, 4L, 8L, 9L, 8L, 1L, 5L, 1L, 
6L, 4L, 6L, 2L, 3L, 3L, 5L, 9L, 5L, 5L, 4L, 7L, 8L, 4L, 2L, 5L, 
7L, 8L, 9L, 8L, 3L, 7L, 7L, 5L, 6L, 3L, 6L, 1L, 2L, 2L, 3L, 7L, 
1L, 9L, 5L, 8L, 4L, 5L, 4L, 1L, 3L, 7L, 7L, 9L, 3L, 9L, 7L, 5L, 
7L, 8L, 1L, 4L, 4L, 6L, 1L, 8L, 7L, 8L, 6L, 8L, 4L, 3L, 4L, 5L, 
9L, 2L, 6L, 6L, 1L, 5L, 7L), .Label = c("10-14", "15-19", "20-24", 
"25-29", "30-34", "35-39", "40-44", "45-49", "50-54"), class = "factor"), 
ZONA91OK = c(101L, 4079L, 712L, 8205L, 11022L, 14021L, 1714L, 
20067L, 2414L, 2810L, 300799L, 3305L, 36026L, 41024L, 4405L, 
4607L, 48015L, 308L, 610L, 8121L, 1006L, 1307L, 1511L, 1813L, 
2308L, 2605L, 2910L, 310799L, 35026L, 3811L, 411199L, 4601L, 
4708L, 202L, 405L, 8015L, 837L, 11033L, 1502L, 1702L, 2112L, 
2408L, 28047L, 30015L, 3305L, 3709L, 410199L, 4511L, 1202L, 
490699L, 3063L, 610L, 827L, 1006L, 1301L, 15036L, 1901L, 
2310L, 2709L, 29025L, 3201L, 36008L, 390899L, 4301L, 46184L, 
4805L, 206L, 504L, 817L, 813L, 12135L, 1519L, 1810L, 2104L, 
2402L, 28130L, 30030L, 3305L, 3707L, 411399L, 45165L, 46181L, 
5008L, 305L, 7026L, 803L, 1006L, 1413L, 16078L, 200999L, 
2312L, 2712L, 29069L, 3210L, 3616L, 391199L, 4313L, 46105L, 
4805L, 310L, 6153L, 8252L, 8205L, 1205L, 1505L, 1808L, 2110L, 
2508L, 2810L, 311399L, 3405L, 3807L, 41024L, 4507L, 46102L, 
500599L, 3014L, 706L, 8121L, 11028L, 14042L, 1712L, 20045L, 
2314L, 27031L, 29901L, 33024L, 3614L, 400199L, 4307L, 46021L, 
4805L, 3066L, 6153L, 8015L, 901L, 12040L, 1522L, 1806L, 2203L, 
2508L, 28047L, 311099L, 35004L, 3801L, 410199L, 4515L, 46017L, 
501199L, 407L, 7027L, 827L, 1102L, 1404L, 17155L, 200599L, 
24089L, 2812L, 30019L, 33024L, 3612L, 41038L, 4301L, 4628L, 
4805L, 307L, 6153L, 817L, 1004L, 1309L, 1508L, 1804L, 2206L, 
2606L, 28130L, 310799L, 35011L, 38022L, 411399L, 4622L, 4701L, 
1036L, 4079L, 807L, 803L, 1108L, 1410L, 1708L, 201399L, 2410L, 
28058L, 30043L, 33024L, 3610L, 410399L, 4401L, 4621L, 490499L, 
3059L, 6153L), VARIABLE = structure(c(1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L), .Label = c("SPAIN_1998", 
"EU15DC_1998", "ROE_1998", "MAGREB_1998", "SSA_1998", "LA_1998", 
"ASIA_1998", "ROW_1998", "Total_1998", "SPAIN_1999", "EU15DC_1999", 
"ROE_1999", "MAGREB_1999", "SSA_1999", "LA_1999", "ASIA_1999", 
"ROW_1999", "Total_1999", "SPAIN_2000", "EU15DC_2000", "ROE_2000", 
"MAGREB_2000", "SSA_2000", "LA_2000", "ASIA_2000", "ROW_2000", 
"Total_2000", "SPAIN_2001", "EU15DC_2001", "ROE_2001", "MAGREB_2001", 
"SSA_2001", "LA_2001", "ASIA_2001", "ROW_2001", "Total_2001", 
"SPAIN_2002", "EU15DC_2002", "ROE_2002", "MAGREB_2002", "SSA_2002", 
"LA_2002", "ASIA_2002", "ROW_2002", "Total_2002", "SPAIN_2003", 
"EU15DC_2003", "ROE_2003", "MAGREB_2003", "SSA_2003", "LA_2003", 
"ASIA_2003", "ROW_2003", "Total_2003", "SPAIN_2004", "EU15DC_2004", 
"ROE_2004", "MAGREB_2004", "SSA_2004", "LA_2004", "ASIA_2004", 
"ROW_2004", "Total_2004", "SPAIN_2005", "EU15DC_2005", "ROE_2005", 
"MAGREB_2005", "SSA_2005", "LA_2005", "ASIA_2005", "ROW_2005", 
"Total_2005", "SPAIN_2006", "EU15DC_2006", "ROE_2006", "MAGREB_2006", 
"SSA_2006", "LA_2006", "ASIA_2006", "ROW_2006", "Total_2006", 
"SPAIN_2007", "EU15DC_2007", "ROE_2007", "MAGREB_2007", "SSA_2007", 
"LA_2007", "ASIA_2007", "ROW_2007", "Total_2007", "SPAIN_2008", 
"EU15DC_2008", "ROE_2008", "MAGREB_2008", "SSA_2008", "LA_2008", 
"ASIA_2008", "ROW_2008", "Total_2008", "SPAIN_2009", "EU15DC_2009", 
"ROE_2009", "MAGREB_2009", "SSA_2009", "LA_2009", "ASIA_2009", 
"ROW_2009", "Total_2009", "SPAIN_2010", "EU15DC_2010", "ROE_2010", 
"MAGREB_2010", "SSA_2010", "LA_2010", "ASIA_2010", "ROW_2010", 
"Total_2010", "SPAIN_2011", "EU15DC_2011", "ROE_2011", "MAGREB_2011", 
"SSA_2011", "LA_2011", "ASIA_2011", "ROW_2011", "Total_2011", 
"SPAIN_2012", "EU15DC_2012", "ROE_2012", "MAGREB_2012", "SSA_2012", 
"LA_2012", "ASIA_2012", "ROW_2012", "Total_2012", "NOTSPAIN_1998", 
"NOTSPAIN_1999", "NOTSPAIN_2000", "NOTSPAIN_2001", "NOTSPAIN_2002", 
"NOTSPAIN_2003", "NOTSPAIN_2004", "NOTSPAIN_2005", "NOTSPAIN_2006", 
"NOTSPAIN_2007", "NOTSPAIN_2008", "NOTSPAIN_2009", "NOTSPAIN_2010", 
"NOTSPAIN_2011", "NOTSPAIN_2012", "AFRICA_1998", "AFRICA_1999", 
"AFRICA_2000", "AFRICA_2001", "AFRICA_2002", "AFRICA_2003", 
"AFRICA_2004", "AFRICA_2005", "AFRICA_2006", "AFRICA_2007", 
"AFRICA_2008", "AFRICA_2009", "AFRICA_2010", "AFRICA_2011", 
"AFRICA_2012", "DWC_1998", "DWC_1999", "DWC_2000", "DWC_2001", 
"DWC_2002", "DWC_2003", "DWC_2004", "DWC_2005", "DWC_2006", 
"DWC_2007", "DWC_2008", "DWC_2009", "DWC_2010", "DWC_2011", 
"DWC_2012"), class = "factor"), FREQUENCY = c(614, 1943, 
59, 201, 188, 10859, 93, 
1494, 60, 1001, 1000, 689, 675, 934, 51, 
1240, 165, 13, 0, 14, 2, 2, 
2, 0, 3, 0, 40, 1, 18, 41, 1, 0, 3, 0, 0, 0, 1, 0, 
0, 0, 0, 0, 7, 1, 0, 0, 0, 0, 0, 0, 0, 0, 80, 0, 
0, 0, 4, 0, 0, 15, 0, 0, 1, 1, 3, 4, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 2, 11, 0, 0, 0, 3, 2, 1, 5, 
64, 1, 4, 1, 3, 4, 8, 1, 1, 1, 1, 0, 0, 0, 
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 2173, 907, 9059, 839, 
4303, 100, 1727, 663, 694, 1210, 623, 
1261, 772, 697, 490, 1031, 490, 956, 704, 
1293, 1011, 739, 927, 755, 3340, 1190, 1254, 12880, 528, 
3244, 277, 892, 837, 1, 2, 10, 1, 1, 2, 2, 0, 0, 1, 8, 3, 
12, 0, 2, 1, 0, 4, 0, 0, 0, 0, 0, 0, 1, 12, 0, 7, 0, 0, 0, 
0, 0, 5, 2)), .Names = c("PROVINCE", "AGE5", "ZONA91OK", 
"VARIABLE", "FREQUENCY"), row.names = c(1L, 501L, 1001L, 1501L, 
2001L, 2501L, 3001L, 3501L, 4001L, 4501L, 5001L, 5501L, 6001L, 
6501L, 7001L, 7501L, 8001L, 8501L, 9001L, 9501L, 10001L, 10501L, 
11001L, 11501L, 12001L, 12501L, 13001L, 13501L, 14001L, 14501L, 
15001L, 15501L, 16001L, 16501L, 17001L, 17501L, 18001L, 18501L, 
19001L, 19501L, 20001L, 20501L, 21001L, 21501L, 22001L, 22501L, 
23001L, 23501L, 24001L, 24501L, 25001L, 25501L, 26001L, 26501L, 
27001L, 27501L, 28001L, 28501L, 29001L, 29501L, 30001L, 30501L, 
31001L, 31501L, 32001L, 32501L, 33001L, 33501L, 34001L, 34501L, 
35001L, 35501L, 36001L, 36501L, 37001L, 37501L, 38001L, 38501L, 
39001L, 39501L, 40001L, 40501L, 41001L, 41501L, 42001L, 42501L, 
43001L, 43501L, 44001L, 44501L, 45001L, 45501L, 46001L, 46501L, 
47001L, 47501L, 48001L, 48501L, 49001L, 49501L, 50001L, 50501L, 
51001L, 51501L, 52001L, 52501L, 53001L, 53501L, 54001L, 54501L, 
55001L, 55501L, 56001L, 56501L, 57001L, 57501L, 58001L, 58501L, 
59001L, 59501L, 60001L, 60501L, 61001L, 61501L, 62001L, 62501L, 
63001L, 63501L, 64001L, 64501L, 65001L, 65501L, 66001L, 66501L, 
67001L, 67501L, 68001L, 68501L, 69001L, 69501L, 70001L, 70501L, 
71001L, 71501L, 72001L, 72501L, 73001L, 73501L, 74001L, 74501L, 
75001L, 75501L, 76001L, 76501L, 77001L, 77501L, 78001L, 78501L, 
79001L, 79501L, 80001L, 80501L, 81001L, 81501L, 82001L, 82501L, 
83001L, 83501L, 84001L, 84501L, 85001L, 85501L, 86001L, 86501L, 
87001L, 87501L, 88001L, 88501L, 89001L, 89501L, 90001L, 90501L, 
91001L, 91501L, 92001L, 92501L, 93001L, 93501L, 94001L, 94501L, 
95001L, 95501L, 96001L, 96501L, 97001L, 97501L, 98001L, 98501L, 
99001L, 99501L), class = "data.frame")

解决方案

Try this instead:

library(data.table)
dt = data.table(mydata)

dt[, `:=`(NATIONALITY = sub('(.*)_(.*)', '\\1', VARIABLE),
          YEAR        = sub('(.*)_(.*)', '\\2', VARIABLE))]

这篇关于r data.frame创建新变量的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆