通过颜色识别点 [英] Identifying points by color

查看：53 发布时间：2021/4/22 19:42:20 r machine-learning data-visualization cluster-analysis data-manipulation

本文介绍了通过颜色识别点的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我正在此处关注本教程:

是否可以这样做?现在，我正在尝试使用 iris.som $ classif 来以某种方式跟踪哪个点在哪个圆中.有更好的方法吗?

更新:@Jonny Phelps向我展示了如何识别三角形形式的观测值(请参见下面的答案).但是我仍然不确定是否可以识别不规则形状的形式.例如.

在以前的帖子中(

根据上述情节，您如何使用"som $ classif"声明以找出圆圈92、91、82、81、72和71中有哪些观测值?

谢谢

解决方案

现在使用Shiny App！

也可以使用 plotly 解决方案，您可以将鼠标悬停在单个神经元上以显示相关的虹膜行名(此处称为id).根据您的 iris.som 数据和Jonny Phelps的网格方法，您只需将行号作为连接字符串分配给各个神经元，并在鼠标悬停时显示它们:

 库(ggplot2)图书馆(密谋)ga<-data.frame(g = iris.som $ unit.classif，sample = seq_len(dim(iris.som $ data [[1]])[1]))grid_pts<-as.data.frame(iris.som $ grid $ pts)grid_pts $ column<-rep(1:iris.som $ grid $ xdim，by = iris.som $ grid $ ydim)grid_pts $ row<-rep(1:iris.som $ grid $ ydim，each = iris.som $ grid $ xdim)grid_pts $ classif<-1:前进(grid_pts)grid_pts $ id<-sapply(seq_along(grid_pts $ classif)，函数(x)粘贴(ga $ sample [ga $ g == x]，collapse =，")))grid_pts $ count<-sapply(seq_along(grid_pts $ classif)，函数(x)的长度(ga $ sample [ga $ g == x]))grid_pts $ count<-factor(grid_pts $ count，level = 0:max(grid_pts $ count))p1<-ggplot(grid_pts，aes(x = x，y = y，colour = count，row = row，column = column，id = id))+geom_point(大小= 8)+scale_colour_manual(values = c("grey50"，heat.colors(length(unique(grid_pts $ count)))))+theme_void()+主题(plot.margin = unit(c(1，rep(.3，3))，"cm"))ggplotly(p1)

这是一个完整的Shiny应用程序，允许选择套索并显示带有数据的表:

  invisible(suppressPackageStartupMessages(lapply(c(闪亮"，"dplyr"，"ggplot2"，"plotly"，"kohonen"，"GGally"，"DT"))，要求，character.only = TRUE)))iris_complete<-iris [complete.cases(iris)，]iris_unique<-unique(iris_complete)#删除重复项#scale数据iris.sc = scale(iris_unique [，1:4])#级别/因子无法缩放...但是在使用xyf的预测SOM:s中使用.之后.#build网格iris.grid = somgrid(xdim = 10，ydim = 10，topo ="hexagonal"，toroidal = TRUE)set.seed(33)#用于重现性iris.som<-som(iris.sc，grid = iris.grid，rlen = 700，alpha = c(0.05,0.01)，keep.data = TRUE)ga<-data.frame(g = iris.som $ unit.classif，sample = seq_len(dim(iris.som $ data [[1]])[1]))grid_pts<-as.data.frame(iris.som $ grid $ pts)grid_pts $ column<-rep(1:iris.som $ grid $ xdim，by = iris.som $ grid $ ydim)grid_pts $ row<-rep(1:iris.som $ grid $ ydim，each = iris.som $ grid $ xdim)grid_pts $ classif<-1:前进(grid_pts)grid_pts $ id<-sapply(seq_along(grid_pts $ classif)，函数(x)粘贴(ga $ sample [ga $ g == x]，collapse =，")))grid_pts $ count<-sapply(seq_along(grid_pts $ classif)，函数(x)的长度(ga $ sample [ga $ g == x]))grid_pts $ count<-factor(grid_pts $ count，level = 0:max(grid_pts $ count))#闪亮的应用程序，改编自https://gist.github.com/dgrapov/128e3be71965bf00495768e47f0428b9ui<-fluidPage(fluidRow(列(12，plotlyOutput("plot"，高度＝"600px")))，列(12，DT :: dataTableOutput('data_table'))))服务器<-功能(输入，输出){output $ plot<-renderPlotly({req(数据())p<-ggplot(data = data()$ data，aes(x = x，y = y，classif = classif，colour = count，row = row，column = column，id = id))+geom_point(大小= 8)+scale_colour_manual(值= c("grey50"，heat.colors(length(unique(grid_pts $ count)))))+theme_void()+主题(plot.margin = unit(c(1，rep(.3，3))，"cm"))obj<-data()$ selif(nrow(obj)！= 0){p<-p + geom_point(data = obj，mapping = aes(x = x，y = y，classif = classif，count = count，row = row，column = id，id = id)，color ="blue"，size = 5，Inherit.aes = FALSE)}ggplotly(p，source ="p1")％>％layout(dragmode ="lasso")})已选择<-反应性({event_data("plotly_selected"，来源＝"p1")})输出$ data_table<-DT :: renderDataTable(data()$ sel，filter ='top'，options = list(pageLength = 5，autoWidth = TRUE))数据<-反应性({tmp<-grid_ptssel<-tryCatch(filter(grid_pts，paste(x，y，sep ="_")％in％paste(selected()$ x，selected()$ y，sep ="_"))，错误=功能(e){NULL})列表(数据= tmp，sel = sel)})}ShinyApp(用户界面，服务器)

I am following the tutorial over here : https://www.rpubs.com/loveb/som . This tutorial shows how to use the Kohonen Network (also called SOM, a type of machine learning algorithm) on the iris data.

I ran this code from the tutorial:

library(kohonen) #fitting SOMs
library(ggplot2) #plots
library(GGally) #plots
library(RColorBrewer) #colors, using predefined palettes

iris_complete <-iris[complete.cases(iris),] 
iris_unique <- unique(iris_complete) # Remove duplicates

#scale data
iris.sc = scale(iris_unique[, 1:4]) #Levels/Factors cannot be scaled... But used in predictive SOM:s using xyf. Later.

#build grid
iris.grid = somgrid(xdim = 10, ydim=10, topo="hexagonal", toroidal = TRUE)

set.seed(33) #for reproducability
iris.som <- som(iris.sc, grid=iris.grid, rlen=700, alpha=c(0.05,0.01), keep.data = TRUE)

#plot 1
plot(iris.som, type="count")

#plot2
var <- 1 #define the variable to plot
plot(iris.som, type = "property", property = getCodes(iris.som)[,var], main=colnames(getCodes(iris.som))[var], palette.name=terrain.colors)

The above code fits a Kohonen Network on the iris data. Each observation from the data set is assigned to each one of the "colorful circles" (also called "neurons") in the below pictures.

My question: In these plots, how would you identify which observations were assigned to which circles? Suppose I wanted to know which observations belong in the circles outlined in with the black triangles below:

Is it possible to do this? Right now, I am trying to use iris.som$classif to somehow trace which points are in which circle. Is there a better way to do this?

UPDATE: @Jonny Phelps showed me how to identify observations within a triangular form (see answer below). But i am still not sure if it possible to identify irregular shaped forms. E.g.

In a previous post (Labelling Points on a Plot (R Language)), a user showed me how to assign arbitrary numbers to each circle on the grid:

Based on the above plot, how could you use the "som$classif" statement to find out which observations were in circles 92,91,82,81,72 and 71?

Thanks

解决方案

EDIT: Now with Shiny App!

A plotly solution is also possible, where you can mouse over individual neurons to display the associated iris rownames (called id here). Based on your iris.som data and Jonny Phelps' grid approach, you can just assign the row numbers as concatenated strings to the individual neurons and have these shown upon mouseover:

library(ggplot2)
library(plotly)
ga <- data.frame(g=iris.som$unit.classif, 
                 sample=seq_len(dim(iris.som$data[[1]])[1]))
grid_pts <- as.data.frame(iris.som$grid$pts)
grid_pts$column <- rep(1:iris.som$grid$xdim, by=iris.som$grid$ydim)
grid_pts$row <- rep(1:iris.som$grid$ydim, each=iris.som$grid$xdim)
grid_pts$classif <- 1:nrow(grid_pts)
grid_pts$id <- sapply(seq_along(grid_pts$classif), 
                      function(x) paste(ga$sample[ga$g==x], collapse=", "))
grid_pts$count <- sapply(seq_along(grid_pts$classif), 
                         function(x) length(ga$sample[ga$g==x]))
grid_pts$count <- factor(grid_pts$count, levels=0:max(grid_pts$count))
p1 <- ggplot(grid_pts, aes(x=x, y=y, colour=count, row=row, column=column, id=id)) +
    geom_point(size=8) +
    scale_colour_manual(values=c("grey50", heat.colors(length(unique(grid_pts$count))))) +
    theme_void() +
    theme(plot.margin=unit(c(1,rep(.3, 3)),"cm"))
ggplotly(p1)

Here is a full Shiny app that allows lasso selection and shows a table with the data:

invisible(suppressPackageStartupMessages(
    lapply(c("shiny","dplyr","ggplot2", "plotly", "kohonen", "GGally", "DT"),
           require, character.only=TRUE)))

iris_complete <- iris[complete.cases(iris),] 
iris_unique <- unique(iris_complete) # Remove duplicates

#scale data
iris.sc = scale(iris_unique[, 1:4]) #Levels/Factors cannot be scaled... But used in predictive SOM:s using xyf. Later.

#build grid
iris.grid = somgrid(xdim = 10, ydim=10, topo="hexagonal", toroidal = TRUE)

set.seed(33) #for reproducability
iris.som <- som(iris.sc, grid=iris.grid, rlen=700, alpha=c(0.05,0.01), keep.data = TRUE)

ga <- data.frame(g=iris.som$unit.classif, 
                 sample=seq_len(dim(iris.som$data[[1]])[1]))
grid_pts <- as.data.frame(iris.som$grid$pts)
grid_pts$column <- rep(1:iris.som$grid$xdim, by=iris.som$grid$ydim)
grid_pts$row <- rep(1:iris.som$grid$ydim, each=iris.som$grid$xdim)
grid_pts$classif <- 1:nrow(grid_pts)
grid_pts$id <- sapply(seq_along(grid_pts$classif), 
                      function(x) paste(ga$sample[ga$g==x], collapse=", "))
grid_pts$count <- sapply(seq_along(grid_pts$classif), 
                         function(x) length(ga$sample[ga$g==x]))
grid_pts$count <- factor(grid_pts$count, levels=0:max(grid_pts$count))

# Shiny app, adapted from https://gist.github.com/dgrapov/128e3be71965bf00495768e47f0428b9

ui <- fluidPage(
    fluidRow(
        column(12, plotlyOutput("plot", height = "600px")),
        column(12, DT::dataTableOutput('data_table'))
    )
)


server <- function(input, output){
    
    output$plot <- renderPlotly({
        req(data()) 
        p <- ggplot(data = data()$data, 
            aes(x=x, y=y, classif=classif, colour=count, row=row, column=column, id=id)) +
            geom_point(size=8) +
            scale_colour_manual(
                values=c("grey50", heat.colors(length(unique(grid_pts$count))))
            ) +
            theme_void() +
            theme(plot.margin=unit(c(1, rep(.3, 3)), "cm"))
        
        obj <- data()$sel
        if(nrow(obj) != 0) {
            p <- p + geom_point(data=obj, mapping=aes(x=x, y=y, classif=classif, 
                    count=count, row=row, column=column, id=id), color="blue", 
                    size=5, inherit.aes=FALSE)
        }
        ggplotly(p, source="p1") %>% layout(dragmode = "lasso")
    })
   
    selected <- reactive({
        event_data("plotly_selected", source = "p1")
    })

    output$data_table <- DT::renderDataTable(
        data()$sel, filter='top', options=list(  
            pageLength=5, autoWidth=TRUE
        )
    )
    
    data <- reactive({
        tmp <- grid_pts 
        sel <- tryCatch(filter(grid_pts, paste(x, y, sep="_") %in% 
                paste(selected()$x, selected()$y, sep="_")),
            error=function(e){NULL})
        list(data=tmp, sel=sel)
    })
}  

shinyApp(ui,server)

这篇关于通过颜色识别点的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持IT屋！

查看全文

通过颜色识别点 [英] Identifying points by color

问题描述

相关文章

AI人工智能最新文章

热门教程

热门工具

登录关闭

通过颜色识别点 [英] Identifying points by color

问题描述

相关文章

AI人工智能最新文章

热门教程

热门工具

登录 关闭

登录关闭