PCA使用ggbiplot进行缩放 [英] PCA Scaling with ggbiplot

查看：1567 发布时间：2018/4/24 20:07:25 r ggplot2 pca ggbiplot

本文介绍了PCA使用ggbiplot进行缩放的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我试图用 prcomp 和 ggbiplot 来绘制一个主成分分析。我正在获取单位圆外的数据值，并且无法在调用 prcomp 之前重新调整数据的大小，以便可以将数据限制为（酒）
需要（ggbiplot）
wine.pca = prcomp（单位）。

  wine [，1：3]，scale。= TRUE）
 ggbiplot（wine.pca，obs.scale = 1，
 var.scale = 1，groups = wine.class，ellipse = TRUE，circle = TRUE）

在调用 prcomp ：

  wine2 = wine [，1：3] 
 mean = apply （wine2,2，mean）
 sd = apply（wine2,2，mean）
 for（i in 1：ncol（wine2））{
 wine2 [，i] =（wine2 [ （wine2，scale。= TRUE）
 ggbiplot（wine2.pca，obs.scale = 1）[b] 
 
 wine2.pca = 1，
 var.scale = 1，groups = wine.class，ellipse = TRUE，circle = TRUE）

ggbiplot 安装包如下：

  r equire（devtools）
 install_github（'ggbiplot'，'vqv'）

代码块：

根据@Brian Hanson的评论，我添加了一个反映我想要获得的输出的额外图像。

解决方案

$ b
ggbiplot2
我编辑了plot函数的代码，并能够获得我想要的功能。 = function（pcobj，choices = 1：2，scale = 1，pc.biplot = TRUE， obs.scale = 1 - scale，var.scale = scale， groups = NULL，ellipse = FALSE ，ellipse.prob = 0.68， labels = NULL，labels.size = 3，alpha = 1， var.axes = TRUE， circle = FALSE，circle.prob = 0.69， varname.size = 3，varna me.adjust = 1.5， varname.abbrev = FALSE，...） { library（ggplot2） library（plyr） library（scales）库（grid） stopifnot（长度（选择）== 2）＃恢复SVD if（inherits（pcobj，'prcomp' ））{ nobs.factor< - sqrt（nrow（pcobj $ x）-1） d< - pcobj $ sdev u< - 扫描（pcobj $ x，2，1 /（d * nobs.factor），FUN ='*'） v< - pcobj $ rotation } else if（inherits（pcobj，'princomp'））{ nobs.factor < - sqrt（pcobj $ n.obs） d< - pcobj $ sdev u< - 扫描（pcobj $分数，2,1 /（d * nobs.factor），FUN ='* '） v < - pcobj $ loadings } else if（继承（pcobj，'PCA'））{ nobs.factor< - sqrt（nrow（pcobj $ call $ X）） d < - unlist（sqrt（pcobj $ eig）[1]）$ b $ bu < - sweep（pcobj $ ind $ coord，2,1 /（d * nobs.factor），FUN =' *'） v < - 扫描（pcobj $ var $ coord，2，sqrt（pcobj $ eig [1：ncol（pcobj $ var $ coord），1] ），FUN =/） } else { stop（'期望类prcomp，princomp或PCA'的对象） } ＃得分 df.u< - as.data.frame（sweep（u [，choices]，2，d [choices] ^ obs.scale，FUN ='*'））＃ v< - sweep（v，2，d ^ var.scale，FUN ='*'） df.v < - as.data.frame（v [，choices]）名称（df.u）< - c（'xvar'，'yvar'）名称（df.v）< - 名称（df.u） if（pc.biplot）{ df.u< - df.u * nobs.factor } ＃缩放相关圆的半径，使它对应于标准化PC分数的＃a数据椭圆 r < - 1 ＃比例方向 v.scale < - rowSums（v ^ 2） df.v < - df.v / sqrt（max（v.scale）） ##比例分数 r.scale = sqrt（max（df.u [，1] ^ 2 + df.u [，2] ^ 2）） df.u = .99 * df.u / r.scale ＃更改轴 if（obs.scale == 0）{ u.axis.labs< - paste（'s （'PC'，choices，sep =''） } ＃将解释的方差比例附加到轴标签上 u.axis.labs < - paste（u.axis.labs， sprintf（'（％0.1f %%解释变量）'， 100 * pcobj $ sdev [options] ^ 2 / sum（pcobj $ sdev ^ 2）））＃得分标签 if（！is .null（标签））{ df.u $标签< - 标签 } ＃分组变量 if（！is.null（groups）） { df.u $ groups< - groups } ＃变量名称 if（varname.abbrev）{ df.v $ varname < - abbreviate（rownames（v）） } else { df.v $ varname< - rownames（v） } ＃文本的变量（df.v，（180 / pi）* atan（yvar / xvar）） df.v $ hjust = with（df.v，（ 1 - varname.adjust * sign（xvar））/ 2）＃底图 g < - ggplot（data = df.u，aes（x = xvar，y = yvar））+ xlab（u.axis.labs [1]）+ ylab（u.axis.labs [2] ）+ coord_equal（） if（var.axes）{ ＃绘制圆圈如果（圆圈） { theta< - c seq（-pi，pi，length = 50），seq（pi，-pi，length = 50）） circle < - data.frame（xvar = r * cos（theta），yvar = r * sin （theta）） g < - g + geom_path（data = circle，color = muted（'white'）， size = 1/2，alpha = 1/3） } ＃绘制方向 g < - g + geom_segment（data = df.v， aes（x = 0，y = 0，xend = xvar，yend = yvar）， arrow =箭头（长度=单位（1/2，'picas'））， color =静音（'red'）） } ＃如果（！is.null（df.u $ labels））{ if（！is.null（df.u $ groups））{ g，绘制标签或点 < - g + geom_text（aes（label = labels，color = groups）， size = labels.size） } else { g< -g + geom_text（aes（label = labels），size = labels.size） } } else { if（！is.null（df.u $ groups））{ g < - g + geom_point（aes（color = groups），alpha = alpha） } else { g <-g + geom_point（alpha = alpha） } } ＃如果存在组，覆盖浓度椭圆if（！is.null（ df.u $ groups）&&椭圆）{ theta <-c（seq（-pi，pi，length = 50），seq（pi，-pi，length = 50）） circle <-cbind（cos（theta ），sin（theta）） ell < - ddply（df.u，'groups'，function（x）{ if（nrow（x）<2）{ return（NULL） } else if（nrow（x）== 2）{ sigma< - var（cbind（x $ xvar，x $ yvar）） } （x（x $ xvar），var（x $ yvar））） } mu< -c（平均值（x $ xvar），平均（x $ yvar）） ed< - sqrt（qchisq（ellipse.prob，df = 2）） data.frame（sweep（circle％*％chol sigma）* ed， 2，mu，FUN ='+'）， groups = x $ groups [1]）$ b $ b}） names（ell）[1：2]< - c（'xvar '，'yvar'） g < - g + geom_path（data = ell，aes（color = groups，group = groups）） } ＃标记变量轴 if（var.axes）{ g< - g + geom_text（data = df.v， aes（label = varname，x = xvar ，y = yvar， angle = angle，hjust = hjust）， color ='darkred'，size = varname.size） } ＃更改图例的名称对于组＃if（！is.null（groups））{ ＃g < - g + scale_color_brewer（name = deparse（substitute（groups）），＃palette ='Dark2 '）＃} ＃TODO：添加第二组轴返回（g） }

I'm trying to plot a principal component analysis using prcomp and ggbiplot. I'm getting data values outside of the unit circle, and haven't been able to rescale the data prior to calling prcomp in such a way that I can constrain the data to the unit circle.
data(wine) require(ggbiplot) wine.pca=prcomp(wine[,1:3],scale.=TRUE) ggbiplot(wine.pca,obs.scale = 1, var.scale=1,groups=wine.class,ellipse=TRUE,circle=TRUE)
I tried scaling by subtracting mean and dividing by standard deviation before calling prcomp:
wine2=wine[,1:3] mean=apply(wine2,2,mean) sd=apply(wine2,2,mean) for(i in 1:ncol(wine2)){ wine2[,i]=(wine2[,i]-mean[i])/sd[i] } wine2.pca=prcomp(wine2,scale.=TRUE) ggbiplot(wine2.pca,obs.scale=1, var.scale=1,groups=wine.class,ellipse=TRUE,circle=TRUE)
ggbiplot package installed as follows:
require(devtools) install_github('ggbiplot','vqv')
Output of either code chunk:

Per @Brian Hanson's comment below, I'm adding an additional image reflecting the output I'm trying to get.

解决方案
I edited the code for the plot function and was able to get the functionality I wanted.
ggbiplot2=function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE, obs.scale = 1 - scale, var.scale = scale, groups = NULL, ellipse = FALSE, ellipse.prob = 0.68, labels = NULL, labels.size = 3, alpha = 1, var.axes = TRUE, circle = FALSE, circle.prob = 0.69, varname.size = 3, varname.adjust = 1.5, varname.abbrev = FALSE, ...) { library(ggplot2) library(plyr) library(scales) library(grid) stopifnot(length(choices) == 2) # Recover the SVD if(inherits(pcobj, 'prcomp')){ nobs.factor <- sqrt(nrow(pcobj$x) - 1) d <- pcobj$sdev u <- sweep(pcobj$x, 2, 1 / (d * nobs.factor), FUN = '*') v <- pcobj$rotation } else if(inherits(pcobj, 'princomp')) { nobs.factor <- sqrt(pcobj$n.obs) d <- pcobj$sdev u <- sweep(pcobj$scores, 2, 1 / (d * nobs.factor), FUN = '*') v <- pcobj$loadings } else if(inherits(pcobj, 'PCA')) { nobs.factor <- sqrt(nrow(pcobj$call$X)) d <- unlist(sqrt(pcobj$eig)[1]) u <- sweep(pcobj$ind$coord, 2, 1 / (d * nobs.factor), FUN = '*') v <- sweep(pcobj$var$coord,2,sqrt(pcobj$eig[1:ncol(pcobj$var$coord),1]),FUN="/") } else { stop('Expected a object of class prcomp, princomp or PCA') } # Scores df.u <- as.data.frame(sweep(u[,choices], 2, d[choices]^obs.scale, FUN='*')) # Directions v <- sweep(v, 2, d^var.scale, FUN='*') df.v <- as.data.frame(v[, choices]) names(df.u) <- c('xvar', 'yvar') names(df.v) <- names(df.u) if(pc.biplot) { df.u <- df.u * nobs.factor } # Scale the radius of the correlation circle so that it corresponds to # a data ellipse for the standardized PC scores r <- 1 # Scale directions v.scale <- rowSums(v^2) df.v <- df.v / sqrt(max(v.scale)) ## Scale Scores r.scale=sqrt(max(df.u[,1]^2+df.u[,2]^2)) df.u=.99*df.u/r.scale # Change the labels for the axes if(obs.scale == 0) { u.axis.labs <- paste('standardized PC', choices, sep='') } else { u.axis.labs <- paste('PC', choices, sep='') } # Append the proportion of explained variance to the axis labels u.axis.labs <- paste(u.axis.labs, sprintf('(%0.1f%% explained var.)', 100 * pcobj$sdev[choices]^2/sum(pcobj$sdev^2))) # Score Labels if(!is.null(labels)) { df.u$labels <- labels } # Grouping variable if(!is.null(groups)) { df.u$groups <- groups } # Variable Names if(varname.abbrev) { df.v$varname <- abbreviate(rownames(v)) } else { df.v$varname <- rownames(v) } # Variables for text label placement df.v$angle <- with(df.v, (180/pi) * atan(yvar / xvar)) df.v$hjust = with(df.v, (1 - varname.adjust * sign(xvar)) / 2) # Base plot g <- ggplot(data = df.u, aes(x = xvar, y = yvar)) + xlab(u.axis.labs[1]) + ylab(u.axis.labs[2]) + coord_equal() if(var.axes) { # Draw circle if(circle) { theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50)) circle <- data.frame(xvar = r * cos(theta), yvar = r * sin(theta)) g <- g + geom_path(data = circle, color = muted('white'), size = 1/2, alpha = 1/3) } # Draw directions g <- g + geom_segment(data = df.v, aes(x = 0, y = 0, xend = xvar, yend = yvar), arrow = arrow(length = unit(1/2, 'picas')), color = muted('red')) } # Draw either labels or points if(!is.null(df.u$labels)) { if(!is.null(df.u$groups)) { g <- g + geom_text(aes(label = labels, color = groups), size = labels.size) } else { g <- g + geom_text(aes(label = labels), size = labels.size) } } else { if(!is.null(df.u$groups)) { g <- g + geom_point(aes(color = groups), alpha = alpha) } else { g <- g + geom_point(alpha = alpha) } } # Overlay a concentration ellipse if there are groups if(!is.null(df.u$groups) && ellipse) { theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50)) circle <- cbind(cos(theta), sin(theta)) ell <- ddply(df.u, 'groups', function(x) { if(nrow(x) < 2) { return(NULL) } else if(nrow(x) == 2) { sigma <- var(cbind(x$xvar, x$yvar)) } else { sigma <- diag(c(var(x$xvar), var(x$yvar))) } mu <- c(mean(x$xvar), mean(x$yvar)) ed <- sqrt(qchisq(ellipse.prob, df = 2)) data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = '+'), groups = x$groups[1]) }) names(ell)[1:2] <- c('xvar', 'yvar') g <- g + geom_path(data = ell, aes(color = groups, group = groups)) } # Label the variable axes if(var.axes) { g <- g + geom_text(data = df.v, aes(label = varname, x = xvar, y = yvar, angle = angle, hjust = hjust), color = 'darkred', size = varname.size) } # Change the name of the legend for groups # if(!is.null(groups)) { # g <- g + scale_color_brewer(name = deparse(substitute(groups)), # palette = 'Dark2') # } # TODO: Add a second set of axes return(g) }

这篇关于PCA使用ggbiplot进行缩放的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！

查看全文

PCA使用ggbiplot进行缩放 [英] PCA Scaling with ggbiplot

问题描述

相关文章

其他开发最新文章

热门教程

热门工具

登录关闭

PCA使用ggbiplot进行缩放 [英] PCA Scaling with ggbiplot

问题描述

相关文章

其他开发最新文章

热门教程

热门工具

登录 关闭

登录关闭