将R data.frame导出到SPSS [英] Export R data.frame to SPSS

查看:454
本文介绍了将R data.frame导出到SPSS的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

有一个包含可以写SPS和CSV文件的函数 write.foreign()的包 foreign 。 SPS文件比可以将CSV字段读入SPSS包括标签。到目前为止,但这个函数有一些问题:


  1. 较新的SPSS版本可能会显示一个错误, DATA LIST

  2. 如果通过 attr $ c>,这些都会丢失。

  3. 即使SPSS vesion支持高达32767的字符串,函数 write.foreign()

  4. 如果使用任何字符变量,则为星号(*),但是较新的SPSS版本无法处理。

  5. CSV文件以逗号分隔,并且不能使用引号,因此字符串中不允许使用逗号。

  6. 非ASCII字符崩溃导入

  7. 如果您的字符包含任何NA值,您将看到...

...这样的错误消息:

  if(any(lengths> 255L)) stop(不能处理大于255的字符变量):
缺少值,其中TRUE / FALSE需要

我花了很多时间,然后发现了一个很好的帖子( http://r.789695.n4.nabble.com/SPSS-export-in-R-package-foreign-td921491.html )开始并使其更好。

解决方案

此函数可替代

注意:为了避免SPSS发现问题CSV文件,请至少为 datafile 指定完整路径(!)(如果使用原始 foreign:write.foreign()注意:此脚本将替换制表符(TAB)和其他间距(包括CR + LF)字符串通过空白而没有警告。可以考虑使用 GET DATA 而不是麻烦的 DATA LIST 来解决这个限制。



注意:可能有一个警告在FUN(X [[i]],...):可能完全丧失模数精度

POSIXlt

POSIXct 变量尚未由脚本正确处理。

  writeForeignMySPSS = function(df,datafile,codefile,varnames = NULL,len = 32767){
adQuote< - function(x)paste(\,x,\ )

#DATA LIST的最后一个变量不能为空
if(any(is.na(df [[length(df)]]))) {
df $ END_CASE = 0
}

#http://stackoverflow.com/questions/5173692/how-to-return-number-of-decimal-places- in-r
decimalplaces< - function(x){
y = x [!is.na(x)]
if(length(y)== 0){
return(0)
}
if(any((y %% 1)!= 0)){
info = strsplit(sub('0 + $','字符(y)),。,fixed = TRUE)
info = info [sapply(info,FUN = length)== 2]
if(length(info)> = 2){
dec = nchar(unlist(info))[seq(2,length(info),2)]
} else {
return(0)
}
return(max(dec,na.rm = T))
} else {
return(0)
}
}

dfn < lapply(df,function(x)if(is.factor(x))
as.numeric(x)
else x)

#布尔变量b $ b bv = sapply(dfn,is.logical)
for(v其中(bv)){
dfn [[v]] = ifelse(dfn [[v]],1,0 )
}

varlabels< - names(df)
#在适用的地方使用注释
for(i in 1:length(df)){
cm = comment(df [[i]])
if(ischaracter(cm)&& (length(cm)> 0)){
varlabels [i] = comment(df [[i]])
}
}

if .nu​​ll(varnames)){
varnames< - abbreviate(names(df),8L)
if(any(sapply(varnames,nchar)> 8L))
stop我不能将变量名称缩写为八个或更少的字母)
if(any(varnames!= varlabels))
warning(一些变量名称被缩写)
}
varnames< - gsub([^ [:alnum:] _ \\ $ @#],\\。,varnames)
dl.varnames< - varnames
chv = sapply(df,is.character)
if(any(chv)){
for(v in which(chv)){
dfn [[v]] = gsub \\ s,,dfn [[v]])
}
lengths< - sapply(df [chv],function(v)max(nchar(v),na。 rm = T))
if(any(lengths> len)){
warning(paste(Clipped strings in,names(df [chv]),to,len, ))
for(v in which(chv)){
df [[v]] = substr(df [[v]],start = 1,stop = len)
}
}
长度[is.infinite(长度)] = 0
长度[长度< 1] = 1
lengths< - paste((A,lengths,),sep =)
#star< - ifelse(c(FALSE,diff )> 1)),*,
dl.varnames [chv]< - paste(dl.varnames [chv],lengths)
}

#和bools
nmv = sapply(df,is.numeric)
dbv = sapply(df,is.numeric)
nv =(nmv | dbv)
decimals = sapply [nv],FUN = decimalplaces)
dl.varnames [nv] = paste(dl.varnames [nv],(F,decimals + 8,。,decimals,),sep = )
if(length(bv)> 0){
dl.varnames [bv] = paste(dl.varnames [bv],(F1.0))
}
rmv =!(chv | nv | bv)
if(length(rmv)> 0){
dl.varnames [rmv] = paste(dl.varnames [rmv], (F8.0))
}
#输出中断
brv = seq(1,length(dl.varnames),10)
dl.varnames [brv] =粘贴(dl.varnames [brv],\\\
,sep =)

cat(SET LOCALE = ENGLISH.\\\
,file = codefile)
cat (DATA LIST FILE =,adQuote(datafile),free(TAB)\\\
,file = codefile,append = TRUE)
cat(/,dl.varnames,。 \ n,file = codefile,append = TRUE)
cat(VARIABLE LABELS\ n,file = codefile,append = TRUE)
cat(粘贴(varnames,adQuote(varlabels) \ n),.\\\
,file = codefile,
append = TRUE)
因子< - sapply(df,is.factor)
if因此){
cat(\\\
VALUE LABELS\\\
,file = codefile,append = TRUE)
for(v其中\ n,file = codefile,append = TRUE)
cat(varnames [v],\\\
,file = codefile,append = TRUE)
levs < [v]])
cat(粘贴(1:length(levs),adQuote(levs),\\\
,sep =),
file = codefile,append = TRUE)
}
cat(。\\\
,file = codefile,append = TRUE)
}

#存储在attr < - !unlist(lapply(sapply(df,FUN = attr,which =1),FUN = is.null))
if(any(attribs)){
cat \\ nVALUE LABELS\\\
,file = codefile,append = TRUE)
for(v其中(attribs)){
cat(/ \\\
,file = codefile,append = TRUE )
cat(varnames [v],\\\
,file = codefile,append = TRUE)
#检查标记值
tc = list()
for in dimnames(table(df [[v]]))[[1]]){
if(!is.null(tcl < - attr(df [[v]],tcv))){
tc [tcv] = tcl
}
}
cat(paste(names(tc),tc,\\\
,sep =),
file = codefile,append = TRUE)
}
cat(。\\\
,file = codefile,append = TRUE)
}

ordinal< - sapply(df,is.ordered)
if(any(ordinal)){
tmp = varnames [ordinal]
brv = seq(1,length(tmp),10)
tmp [brv] =粘贴(tmp [brv],\\\

cat(粘贴(\\\
VARIABLE LEVEL,粘贴(tmp,collapse =),(ORDINAL)。 \ n),
file = codefile,append = TRUE)
}
num < - sapply(df,is.numeric)
if {
tmp = varnames [num]
brv = seq(1,length(tmp),10)
tmp [brv] = paste(tmp [brv],\\\

cat(粘贴(\\\
VARIABLE LEVEL,粘贴(tmp,collapse =),(SCALE).\\\
),
file = codefile,append = TRUE)
}
cat(\\\
EXECUTE.\\\
,file = codefile,append = TRUE)

write.table(dfn,file = datafile,row = FALSE,col = FALSE,
sep =\t,quote = F,na =,eol =\\\
,fileEncoding =UTF-8)
} / code>

从长远来看,这些更改可能被视为合并到 foreign package。不幸的是,r项目的错误报告系统目前仅限于以前注册的开发人员。


There is a package foreign with a function write.foreign() that can write a SPS and CSV file. The SPS file than can read the CSV fiel into SPSS including labels. Fine so far, but there are some issues with that function:

  1. Newer SPSS versions may show an error that you have too few format definitions in DATA LIST
  2. If there are "labels" for numeric variables stored via attr(), these are lost.
  3. Even if the SPSS vesion supports strings up to 32767, the function write.foreign() stops if there are more than 255 in any variable.
  4. Theres a star (*) if any character variables are used, but newer SPSS versions cannot handle that.
  5. The CSV file is comma-separated and does (can) not use quotes, therefore no commas are allowed in strings (character)
  6. Non-ASCII caracters (e.g. umlauts) will crash the import
  7. Should you have a character that contains any NA value, you'll see...

... an error message like this:

Error in if (any(lengths > 255L)) stop("Cannot handle character variables longer than 255") : 
    missing value where TRUE/FALSE needed

I spent a lot of time with that and then found a good posting (http://r.789695.n4.nabble.com/SPSS-export-in-R-package-foreign-td921491.html) to start on and make it better. Here's my result, I'd like to share with you.

解决方案

This function is a replacement for foreign:write.foreign to handle the issues stated above.

Note: To avoid issues with SPSS finding the CSV file, please specify the full path (!) at least for datafile (also if using the original foreign:write.foreign()).

Note: This script will replace a tabulator (TAB) and other spacing (incl. CR+LF) in strings by a blank without warning. One may consider using GET DATA instead of the troublesome DATA LIST to solve that limitation.

Note: There may be a warning In FUN(X[[i]], ...) : probable complete loss of accuracy in modulus - this refers to counting the decimals and can be ignored.

Note: POSIXlt and POSIXct variables are not yet handled by the script properly.

writeForeignMySPSS = function (df, datafile, codefile, varnames = NULL, len = 32767) {
    adQuote <-  function (x) paste("\"", x, "\"", sep = "")

    # Last variable must not be empty for DATA LIST
    if (any(is.na(df[[length(df)]]))) {
        df$END_CASE = 0
    }

    # http://stackoverflow.com/questions/5173692/how-to-return-number-of-decimal-places-in-r
    decimalplaces <- function(x) {
        y = x[!is.na(x)]
        if (length(y) == 0) {
            return(0)
        }
        if (any((y %% 1) != 0)) {
            info = strsplit(sub('0+$', '', as.character(y)), ".", fixed=TRUE)
            info = info[sapply(info, FUN=length) == 2]
            if (length(info) >= 2) {
              dec = nchar(unlist(info))[seq(2, length(info), 2)]
            } else {
              return(0)
            }
            return(max(dec, na.rm=T))
        } else {
            return(0)
        }
    }

    dfn <- lapply(df, function(x) if (is.factor(x))
        as.numeric(x)
        else x)

    # Boolean variables (dummy coding)
    bv = sapply(dfn, is.logical)
    for (v in which(bv)) {
        dfn[[v]] = ifelse(dfn[[v]], 1, 0)
    }

    varlabels <- names(df)
    # Use comments where applicable
    for (i in 1:length(df)) {
      cm = comment(df[[i]])
      if (is.character(cm) && (length(cm) > 0)) {
        varlabels[i] = comment(df[[i]])
      }
    }

    if (is.null(varnames)) {
        varnames <- abbreviate(names(df), 8L)
        if (any(sapply(varnames, nchar) > 8L))
            stop("I cannot abbreviate the variable names to eight or fewer letters")
        if (any(varnames != varlabels))
            warning("some variable names were abbreviated")
    }
    varnames <- gsub("[^[:alnum:]_\\$@#]", "\\.", varnames)
    dl.varnames <- varnames
    chv = sapply(df, is.character)
    if (any(chv)) {
        for (v in which(chv)) {
            dfn[[v]] = gsub("\\s", " ", dfn[[v]])
        }
        lengths <- sapply(df[chv], function(v) max(nchar(v), na.rm=T))
        if (any(lengths > len)) {
            warning(paste("Clipped strings in", names(df[chv]), "to", len, "characters"))
            for (v in which(chv)) {
                df[[v]] = substr(df[[v]], start=1, stop=len)
            }
        }
        lengths[is.infinite(lengths)] = 0
        lengths[lengths < 1] = 1
        lengths <- paste("(A", lengths, ")", sep = "")
        # star <- ifelse(c(FALSE, diff(which(chv) > 1)), " *",
        dl.varnames[chv] <- paste(dl.varnames[chv], lengths)
    }

    # decimals and bools
    nmv = sapply(df, is.numeric)
    dbv = sapply(df, is.numeric)
    nv = (nmv | dbv)
    decimals = sapply(df[nv], FUN=decimalplaces)
    dl.varnames[nv] = paste(dl.varnames[nv], " (F", decimals+8, ".", decimals, ")", sep="")
    if (length(bv) > 0) {
        dl.varnames[bv] = paste(dl.varnames[bv], "(F1.0)")
    }
    rmv = !(chv | nv | bv)
    if (length(rmv) > 0) {
        dl.varnames[rmv] = paste(dl.varnames[rmv], "(F8.0)")
    }
    # Breaks in output
    brv = seq(1, length(dl.varnames), 10)
    dl.varnames[brv] = paste(dl.varnames[brv], "\n", sep=" ")

    cat("SET LOCALE = ENGLISH.\n", file = codefile)
    cat("DATA LIST FILE=", adQuote(datafile), " free (TAB)\n", file = codefile, append = TRUE)
    cat("/", dl.varnames, " .\n\n", file = codefile, append = TRUE)
    cat("VARIABLE LABELS\n", file = codefile, append = TRUE)
    cat(paste(varnames, adQuote(varlabels), "\n"), ".\n", file = codefile,
        append = TRUE)
    factors <- sapply(df, is.factor)
    if (any(factors)) {
        cat("\nVALUE LABELS\n", file = codefile, append = TRUE)
        for (v in which(factors)) {
            cat("/\n", file = codefile, append = TRUE)
            cat(varnames[v], " \n", file = codefile, append = TRUE)
            levs <- levels(df[[v]])
            cat(paste(1:length(levs), adQuote(levs), "\n", sep = " "),
                file = codefile, append = TRUE)
        }
        cat(".\n", file = codefile, append = TRUE)
    }

    # Labels stored in attr()
    attribs <- !unlist(lapply(sapply(df, FUN=attr, which="1"), FUN=is.null))
    if (any(attribs)) {
        cat("\nVALUE LABELS\n", file = codefile, append = TRUE)
        for (v in which(attribs)) {
            cat("/\n", file = codefile, append = TRUE)
            cat(varnames[v], " \n", file = codefile, append = TRUE)
            # Check labeled values
            tc = list()
            for (tcv in dimnames(table(df[[v]]))[[1]]) {
                if (!is.null(tcl <- attr(df[[v]], tcv))) {
                    tc[tcv] = tcl
                }
            }
            cat(paste(names(tc), tc, "\n", sep = " "),
                file = codefile, append = TRUE)
        }
        cat(".\n", file = codefile, append = TRUE)
    }

    ordinal <- sapply(df, is.ordered)
    if (any(ordinal)) {
        tmp = varnames[ordinal]
        brv = seq(1, length(tmp), 10)
        tmp[brv] = paste(tmp[brv], "\n")
        cat(paste("\nVARIABLE LEVEL", paste(tmp, collapse=" "), "(ORDINAL).\n"),
            file = codefile, append = TRUE)
    }
    num <- sapply(df, is.numeric)
    if (any(num)) {
        tmp = varnames[num]
        brv = seq(1, length(tmp), 10)
        tmp[brv] = paste(tmp[brv], "\n")
        cat(paste("\nVARIABLE LEVEL", paste(tmp, collapse=" "), "(SCALE).\n"),
            file = codefile, append = TRUE)
    }
    cat("\nEXECUTE.\n", file = codefile, append = TRUE)

    write.table(dfn, file = datafile, row = FALSE, col = FALSE,
                sep = "\t", quote = F, na = "", eol = "\n", fileEncoding="UTF-8")
}

On the long term, the changes might be considered to be merged into the foreignpackage. Unfortunately, the bug reporting system for the r-project is currently limited to previously registered developers.

这篇关于将R data.frame导出到SPSS的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆