r ggplot2 APA主题

ggplot2 APA主题

ggplot2_apa_theme.R
theme_minimal()+
theme(axis.line = element_line(colour = "black")) +
theme(panel.grid = element_blank()) +
theme(legend.title = element_blank()) +
theme(plot.title = element_text(hjust=0.5)) +

r 转换变量

convert_vars
## Converting Numeric Variables to Factor
# Using Column Index Numbers
# In this case, we are converting first, second, third and fifth numeric variables to factor variables. mydata is a data frame.

names <- c(1:3,5)
mydata[,names] <- lapply(mydata[,names] , factor)
str(mydata)

## 2. Using Column Names
# In this case, we are converting two variables 'Credit' and 'Balance' to factor variables.

names <- c('Credit' ,'Balance')
mydata[,names] <- lapply(mydata[,names] , factor)
str(mydata)

## 3.  Converting all variables
col_names <- names(mydata)
mydata[,col_names] <- lapply(mydata[,col_names] , factor)

## 4.  Converting all numeric variables
mydata[sapply(mydata, is.numeric)] <- lapply(mydata[sapply(mydata, is.numeric)], as.factor)

## 5. Checking unique values in a variable and convert to factor only those variables having unique count less than 4
col_names <- sapply(mydata, function(col) length(unique(col)) < 4)
mydata[ , col_names] <- lapply(mydata[ , col_names] , factor)

r 分组在R(dplyr)

按类型分组并计算每组的平均评分

group_dplyr.r
mydata_group <- mydata %>%
  group_by(Genre) %>%
  summarise(avg_rating=mean(IMDB_Rating)) %>%
  ungroup() #optional

r 案例在R中

case_when.r
mydata <- mydata %>%
  mutate(
    Rating2 = case_when(
      IMDB_Rating > 8.0 ~ "Very Good",
      IMDB_Rating > 7.0 &  IMDB_Rating <= 8.0 ~ "OK",
      TRUE  ~  "Not Good"
    )
  )

r R单行

R单行

rOneLiners.r
#-- capture folder path, filename, and extension from filepath
<filename> <- sub("(.*\\/)([^.]+)(\\.[[:alnum:]]+$)", "\\2", <path>)

r 使用slurm启动H2O集群

start_h2o_cluster.R
node_list = Sys.getenv("SLURM_NODELIST")
cat("SLURM nodes:", node_list, "\n")

# Loop up IPs of the allocated nodes.
if (node_list != "") {
  nodes = strsplit(node_list, ",")[[1]]
  ips = rep(NA, length(nodes))
  for (i in 1:length(nodes)) {
    args = c(nodes[i], " | awk '/has address/ { print $4 ; exit }'")
    result = system2("host", args = args, stdout = T)
    # Extract the IP from the result output.
    ips[i] = sub("^([^ ]+) +.*$", "\\1", result, perl = T)
  }
  cat("SLURM IPs:", paste(ips, collapse=", "), "\n")
  # Combine into a network string for h2o.
  network = paste0(paste0(ips, "/32"), collapse=",")
  cat("Network:", network, "\n")
}

# Specify how many nodes we want h2o to use.
h2o_num_nodes = length(ips)

# Options to pass to java call:
args = c(
  # -Xmx30g allocate 30GB of RAM per node. Needs to come before "-jar"
  "-Xmx100g",
  # Specify path to downloaded h2o jar.
  "-jar /vol/cloud/osd2014/h2o-3.24.0.5/h2o.jar",
  # Specify a cloud name for the cluster.
  "-name h2o_r",
  "-port  55599",
  # Specify IPs of other nodes.
  paste("-network", network)
)
cat(paste0("Args:\n", paste(args, collapse="\n"), "\n"))

# Run once for each node we want to start.
for (node_i in 1:h2o_num_nodes) {
  cat("\nLaunching h2o worker on", ips[node_i], "\n")
  new_args = c(ips[node_i], "java", args)
  # Ssh into the target IP and launch an h2o worker with its own
  # output and error files. These could go in a subdirectory.
  cmd_result = system2("ssh", args = new_args,
                       stdout = paste0("h2o_out_", node_i, ".txt"),
                       stderr = paste0("h2o_err_", node_i, ".txt"),
                       # Need to specify wait=F so that it runs in the background.
                       wait = F)
  # This should be 0.
  cat("Cmd result:", cmd_result, "\n")
  # Wait one second between inits.
  Sys.sleep(1L)
}

# Wait 3 more seconds to find all the nodes, otherwise we may only
# find the node on localhost.
Sys.sleep(3L)

# Check if h2o is running. We will see ssh processes and one java process.
system2("ps", c("-ef", "| grep h2o.jar"), stdout = T)

r PCAmix数据中的PCA轮换

PCAmix数据中的PCA轮换

PCA_rotation.R
## Apply varimax rotation
res.pcarot <- PCArot(res.pcamix,
                     dim=4,
                     graph=FALSE)
                     
## Plot components before and after rotation
plot(res.pcamix, 
     choice="sqload", 
     coloring.var=TRUE, 
     axes=c(1, 2),
     leg=TRUE, posleg="topleft", main="Variables before rotation",
     xlim=c(0,1), ylim=c(0,1))

plot(res.pcarot, 
     choice="sqload", 
     coloring.var=TRUE,
     axes=c(1, 2),
     leg=TRUE, posleg="topright", main="Variables after rotation", 
     xlim=c(0,1), ylim=c(0,1))

r 在PCAmixdata PC地图中绘制单个观察结果

在PCAmixdata PC地图中绘制单个观察结果

indiv_obs_PCAmixdata.R
## Taken from tutorial at https://cran.r-project.org/web/packages/PCAmixdata/vignettes/PCAmixdata.html

## Import library
library(PCAmixdata)

## Split qualitative and quantitative variables
split <- splitmix(gironde$housing)
X1 <- split$X.quanti 
X2 <- split$X.quali 

## FAMD
res.pcamix <- PCAmix(X.quanti=X1, X.quali=X2,rename.level=TRUE,
                     graph=FALSE)

## Plot individual observations along first two PCs
plot(res.pcamix,
      choice="ind", 
      coloring.ind=X2$houses,
      label=FALSE,
      posleg="bottomright", 
      main="Observations")

r 在Jupyter中调整R图

resize_R_plot_jupyter.R
options(repr.plot.width=4, repr.plot.height=3)

r 写一个简单的xlsx数据电子表格

写一个简单的xlsx数据电子表格

writeXlsx.r
#-- wrapper function to create a worksheet with particular formatting
writeXlsx <- function(d, fn, sheet_name = ''){
	require(openxlsx)
	# creates xlsx workbook containing data
	# header styling
	hs1 <- createStyle(fgFill = "#4F81BD", halign = "LEFT", textDecoration = "Bold", border = "Bottom", fontColour = "white")
	cs1 <- createStyle(wrapText = TRUE, halign = 'LEFT', valign = 'top')
	wb <- createWorkbook() 
	addWorksheet(wb, sheetName=sheet_name)
	setColWidths(wb, 1, 1:ncol(d), 'auto')
	freezePane(wb, 1, firstRow = TRUE)
	writeData(wb, 1, d, headerStyle = hs1)
	addStyle(wb, 1, style = cs1, rows = 2:(nrow(d) + 1), cols = 1:ncol(d), gridExpand = TRUE)
	saveWorkbook(wb, fn, overwrite = TRUE)
}