r 保存ggplot大小

save_ggplot_size.R
## size for ppt presentations: 
# whole slide
ggsave(my_plot, file = "my_file.png", width = 20, height = 12, units = "cm")

# 4 per slide
ggsave(my_plot, file = "my_file.png", width = 12, height = 7, units = "cm")

# 2 per slide
ggsave(my_plot, file = "my_file.png", width = 12, height = 9, units = "cm")

r purrr读取目录

来自[https://www.gerkelab.com/blog/2018/09/import-directory-csv-purrr-readr/]

functional_read.r
library(tidyverse)
library(v)

data_dir %>% 
  dir_ls(regexp = "\\.csv$") %>% 
  map_dfr(read_csv, .id = "source") %>% 
  mutate(Month_Year = myd(Month_Year, truncated = 1))

r 漂亮的情节

漂亮的情节

prettyPlot.R

par(mar = c(4.5, 5.5, 4.5, 4.5))
plot(x,y,ylim=c(min.y,max.y),lty=1,col="#878787",type="l",ylab="",xlab="",axes=FALSE)
axis(2,las=2,col="White",col.ticks="Black")
axis(1,las=0,col="White",col.ticks="Black")
box()
title(ylab="",mgp=c(4,1,0)) 

lines(Croacia_Lilj_PartialPlot,ylim=c(min.y,max.y),lty=2,col="#878787")
lines(Italy_Trieste_PartialPlot,ylim=c(min.y,max.y),lty=3,col="Black")
lines(Italy_Venice_PartialPlot,ylim=c(min.y,max.y),lty=4,col="#878787")
abline(h = 0, lty=3, col="red")
legend(26.3, 0.017, legend=c("Ante (Croacia)", "Lilj (Croacia)", "Trieste (Italy)", "Venice (Italy)"),border = "gray",col=c("#878787", "#878787","Black","#878787"), lty=1:4, cex=0.8)

r 田口损失函数

taguchi.r
curve(0.002 * (x - 10)^2, 9, 11,
lty = 1,
lwd = 2,
ylab = "Cost of Poor Quality",
xlab = "Observed value of the characteristic",
main = expression(L(Y) == 0.002 ~ (Y - 10)^2))
abline(v = 9.5, lty = 2)
abline(v = 10.5, lty = 2)
abline(v = 10, lty = 2)
abline(h = 0)
text(10, 0.002, "T", adj = 2)
text(9.5, 0.002, "LSL", adj = 1)
text(10.5, 0.002, "USL", adj = -0.1)

r 帕累托图

pareto.r
#Pareto Chart
library(qcc)
defect <- c(60,20,16,14,8,9)
names(defect) <- c("A", "B", "C", "D","E","F")
pareto.chart(defect, ylab = "Defect frequency", col=heat.colors(length(defect)))

r 鱼骨图

fishbone.r
# Cause and Effect Diagram
##Create effect as string
effect <- "Low Quality product"
##Create vector of causes
causes.gr <- c("Measurement", "Material", "Methods", "Environment",
"Manpower", "Machines")
# Create indiviual cause as vector of list
causes <- vector(mode = "list", length = length(causes.gr))
causes[1] <- list(c("Lab error", "Contamination"))
causes[2] <- list(c("Raw Material", "Additive"))
causes[3] <- list(c("Sampling", "Analytical Procedure"))
causes[4] <- list(c("Rust near sample point"))
causes[5] <- list(c("Poor analyst","No guidance"))
causes[6] <- list(c("Leakage", "breakdown"))
# Create Cause and Effect Diagram
ss.ceDiag(effect, causes.gr, causes, sub = "Fish Bone Diagram Example")

r 六西格玛流程图

process.r
#Load package
library("SixSigma")
# Create vector of Input , Output and Steps  
inputs <-c ("Ingredients", "Cook", "Oven")
outputs <- c("temperature", "taste", "tenderness","weight", "radius", "time")
steps <- c("DOUGH", "TOPPINGS", "BAKE", "DELIVER")

#Save the names of the outputs of each step in lists 
io <- list()
io[[1]] <- list("X's")
io[[2]] <- list("Dough", "ingredients", "Cooker")
io[[3]] <- list("Raw Pizza", "Cooker", "Oven Plate")
io[[4]] <- list("Baked Pizza", "Plate")

#Save the names, parameter types, and features:
param <- list()
param[[1]] <- list(c("Cook", "C"),c("flour brand", "C"),c("prop Water", "P"))
param[[2]] <- list(c("Cook", "C"),c("Ing.Brand", "Cr"),c("amount", "P"),c("prep.Time", "Cr"))
param[[3]] <- list(c("Cook","C"),c("queue", "N"),c("BakeTime", "Cr"))
param[[4]] <- list(c("Waiter","C"),c("queue", "N"))

feat <- list()
feat[[1]] <- list("Density", "toughness", "thickness")
feat[[2]] <- list("Diameter", "Weight", "thickness")
feat[[3]] <- list("temperature", "tenderness", "taste")
feat[[4]] <- list("temperature", "taste", "tenderness","weight", "time")
# Create process map
ss.pMap(steps, inputs, outputs,io, param, feat,sub = "Pizza Process Example")

r [连接到Rstudio中的SQLite数据库] #R #SQLite #RStudio

[连接到Rstudio中的SQLite数据库] #R #SQLite #RStudio

RStudio_SQLite.r
## Import library
library(RSQLite)


## Connect to an existing database
Customers <- dbConnect(SQLite(), dbname = 'Customers.sqlite')


## Create and connect to a "virtual" in-memory database
Food <- dbConnect(SQLite())


## List all the tables currently in the database
dbListTables(Customers)


## Import CSV into SQLite database
'''
Currently, it seems that commas inside of a quoted string (like “Smith, John”) is still being recognized as a comma delimiter, so directly importing data from a CSV into a SQLite database is quite buggy. 
'''

Suppliers <- read.csv("./Suppliers.csv") # Import CSV into R dataframe
dbWriteTable(Customers, "suppliers", Suppliers) # Write data to a table in the Customers database


## Make a query in a SQL chunk
```{sql connection=Customers}
SELECT CustomerID, CustomerName, City
FROM customers
WHERE Country = "Germany";
```

r ggplot删除网格

ggplot_remove_grid.R
ggplot(iris, aes(x = Sepal.Length, y = Petal.Width, colour = Species)) +
  geom_point() +
  theme(panel.grid.major = element_blank(),
       panel.grid.minor = element_blank())

r 总结p2水平表型

对于P2水平表型,获取描述性信息

summarize_p2.r
args = commandArgs(trailingOnly=TRUE)
#Rscript p2_phenocode.r mrsc > test.txt

study <- args[1] 

#study='mrsc'

library(psych)
dat <- read.csv(paste(study,'_p2.csv',sep=''),stringsAsFactors=F,header=T,na.strings=c(NA,"#N/A","-9"))

print("Data dimension:")
 dim(dat) #Check that number of rows is approximately correct


##Check Score ranges 

#If any of these don't work, headers aren't uniform across files! Should also be chagned

print("Score ranges for dichotomous variables are:")
print("Case Status")
 try(table(dat$Case,useNA="ifany"),silent=TRUE)
print("Current PTSD Status")
 try(table(dat$Current_PTSD_Dx,useNA="ifany"),silent=TRUE)
print("Lifetime PTSD Status")
 try(table(dat$Lifetime_PTSD_Dx,useNA="ifany"),silent=TRUE)
print("Exposure")
 try(table(dat$Exposure,useNA="ifany"),silent=TRUE)
print("Sex")
 try(table(dat$Sex,useNA="ifany"),silent=TRUE)
print("Ancestry")
 try(table(dat$Self_report_ancestry,useNA="ifany"),silent=TRUE)

print("Descriptions of Continuous variables")
 describe(subset(dat,select=c(Age,CT_Count,LT_count,IT_Count,Lifetime_PTSD_Continuous,Current_PTSD_Continuous)))

#Compare case variable to lifetime PTSD and current PTSD
print("Compare case dx variable to current PTSD dx variable")
 try(table(dat$Case,dat$Current_PTSD_Dx,useNA="ifany"),silent=TRUE)

print("Compare case dx variable to lifetime PTSD dx variable")
 try(table(dat$Case,dat$Lifetime_PTSD_Dx,useNA="ifany"),silent=TRUE)


##Histograms of data

#Current PTSD cont stratified by current DX
if(!all(is.na(dat$Current_PTSD_Continuous)) & !all(is.na(dat$Current_PTSD_Dx) ))
{
pdf(paste(study,'_hist_Current_PTSD_Continuous.pdf'),7,7)
 p0 <- hist(subset(dat, Current_PTSD_Dx == 1)$Current_PTSD_Continuous,plot=FALSE)
 p1 <- hist(subset(dat, Current_PTSD_Dx == 2)$Current_PTSD_Continuous,plot=FALSE)
 
 yli <- max(p0$density, p1$density  ) #p3$density,p4$density 
 transparency_level=0.5
 par(mar=c(5, 4, 4, 2) + 0.5)
 plot(p0, col=rgb(1,0,0,transparency_level),freq=FALSE,xlim=range(dat$Current_PTSD_Continuous,na.rm=T),ylim=c(0,yli),ylab="Frequency", xlab="Current_PTSD_Continuous",main="",cex.axis=1.45,cex.lab=1.6) 
 plot(p1, col=rgb(0,0,1,transparency_level),freq=FALSE,add=T)  # second
 legend('topright',legend=c("Control","Case"),col=c(rgb(1,0,0,transparency_level),rgb(0,0,1,transparency_level)),pch=c(19,19))
dev.off()
 
 }
#Lifetime stratified by lifetime dx
if(!all(is.na(dat$Lifetime_PTSD_Continuous)) & !all(is.na(dat$Lifetime_PTSD_Dx)))
{
 pdf(paste(study,'_hist_Lifetime_PTSD_Continuous.pdf'),7,7)

 p0 <- hist(subset(dat, Lifetime_PTSD_Dx == 1)$Lifetime_PTSD_Continuous,plot=FALSE)
 p1 <- hist(subset(dat, Lifetime_PTSD_Dx == 2)$Lifetime_PTSD_Continuous,plot=FALSE)
 
 yli <- max(p0$density, p1$density  ) #p3$density,p4$density 
 transparency_level=0.5
 par(mar=c(5, 4, 4, 2) + 0.5)
 plot(p0, col=rgb(1,0,0,transparency_level),freq=FALSE,xlim=range(dat$Lifetime_PTSD_Continuous,na.rm=T),ylim=c(0,yli),ylab="Frequency", xlab="Lifetime_PTSD_Continuous",main="",cex.axis=1.45,cex.lab=1.6) 
 plot(p1, col=rgb(0,0,1,transparency_level),freq=FALSE,add=T)  # second
 legend('topright',legend=c("Control","Case"),col=c(rgb(1,0,0,transparency_level),rgb(0,0,1,transparency_level)),pch=c(19,19))
dev.off()
}

#Trauma stratified by case status
if(!all(is.na(dat$CT_Count)) )
{
pdf(paste(study,'_hist_CT_by_case.pdf'),7,7)
 p0 <- hist(subset(dat,Case == 1)$CT_Count,plot=FALSE)
 p1 <- hist(subset(dat, Case == 2)$CT_Count,plot=FALSE)
 
 yli <- max(p0$density, p1$density  ) #p3$density,p4$density 
 transparency_level=0.5
 par(mar=c(5, 4, 4, 2) + 0.5)
 plot(p0, col=rgb(1,0,0,transparency_level),freq=FALSE,xlim=range(dat$CT_Count,na.rm=T),ylim=c(0,yli),ylab="Frequency", xlab="CT_Count",main="",cex.axis=1.45,cex.lab=1.6) 
 plot(p1, col=rgb(0,0,1,transparency_level),freq=FALSE,add=T)  # second
 legend('topright',legend=c("Control","Case"),col=c(rgb(1,0,0,transparency_level),rgb(0,0,1,transparency_level)),pch=c(19,19))
 dev.off() 
} else print("No CT variable, won't plot histograms")

 if(!all(is.na(dat$IT_Count)) )
{
pdf(paste(study,'_hist_IT_by_case.pdf'),7,7)
 p0 <- hist(subset(dat,Case == 1)$IT_Count,plot=FALSE)
 p1 <- hist(subset(dat, Case == 2)$IT_Count,plot=FALSE)
 
 yli <- max(p0$density, p1$density  ) #p3$density,p4$density 
 transparency_level=0.5
 par(mar=c(5, 4, 4, 2) + 0.5)
 plot(p0, col=rgb(1,0,0,transparency_level),freq=FALSE,xlim=range(dat$IT_Count,na.rm=T),ylim=c(0,yli),ylab="Frequency", xlab="IT_Count",main="",cex.axis=1.45,cex.lab=1.6) 
 plot(p1, col=rgb(0,0,1,transparency_level),freq=FALSE,add=T)  # second
 legend('topright',legend=c("Control","Case"),col=c(rgb(1,0,0,transparency_level),rgb(0,0,1,transparency_level)),pch=c(19,19))
 dev.off() 
}else print("No IT variable, won't plot histograms")

if(!all(is.na(dat$LT_count)) )
{
pdf(paste(study,'_hist_LT_by_case.pdf'),7,7)
 p0 <- hist(subset(dat,Case == 1)$LT_count,plot=FALSE)
 p1 <- hist(subset(dat, Case == 2)$LT_count,plot=FALSE)
 
 yli <- max(p0$density, p1$density  ) #p3$density,p4$density 
 transparency_level=0.5
 par(mar=c(5, 4, 4, 2) + 0.5)
 plot(p0, col=rgb(1,0,0,transparency_level),freq=FALSE,xlim=range(dat$LT_count,na.rm=T),ylim=c(0,yli),ylab="Frequency", xlab="LT_count",main="",cex.axis=1.45,cex.lab=1.6) 
 plot(p1, col=rgb(0,0,1,transparency_level),freq=FALSE,add=T)  # second
 legend('topright',legend=c("Control","Case"),col=c(rgb(1,0,0,transparency_level),rgb(0,0,1,transparency_level)),pch=c(19,19))
 dev.off()
 } else print("No LT variable, won't plot histograms")
 
#Mean trauma per case status

print("Compare LT across case/control")
try(t.test(subset(dat,Case == 1)$LT_count,subset(dat,Case == 2)$LT_count,paired=F),silent=TRUE)
print("Compare CT across case/control")
try(t.test(subset(dat,Case == 1)$CT_Count,subset(dat,Case == 2)$CT_Count,paired=F),silent=TRUE)
print("Compare IT across case/control")
try(t.test(subset(dat,Case == 1)$IT_Count,subset(dat,Case == 2)$IT_Count,paired=F),silent=TRUE)

#Regression of PTSD on trauma measures

print("Regression of Current PTSD on LT")
if(!all(is.na(dat$Current_PTSD_Continuous)) & !all(is.na(dat$LT_count) ) )
{
 summary(lm(Current_PTSD_Continuous ~ LT_count,data=dat))
} else print ("Missing either current PTSD or LT. No regression!")

print("Regression of Current PTSD on CT")
if(!all(is.na(dat$Current_PTSD_Continuous)) & !all(is.na(dat$CT_Count) ) )
{
 summary(lm(Current_PTSD_Continuous ~ CT_Count,data=dat))
} else print ("Missing either current PTSD or CT. No regression!")


print("Regression of Current PTSD on IT")
if(!all(is.na(dat$Current_PTSD_Continuous)) & !all(is.na(dat$IT_Count) ) )
{
 summary(lm(Current_PTSD_Continuous ~ IT_Count,data=dat))
} else print ("Missing either current PTSD or IT. No regression!")


print("Regression of Lifetime PTSD on LT")
if(!all(is.na(dat$Lifetime_PTSD_Continuous)) & !all(is.na(dat$LT_count) ) )
{
 summary(lm(Lifetime_PTSD_Continuous ~ LT_count,data=dat))
} else print ("Missing either Lifetime PTSD or LT. No regression!")

print("Regression of Lifetime PTSD on CT")
if(!all(is.na(dat$Lifetime_PTSD_Continuous)) & !all(is.na(dat$CT_Count) ) )
{
 summary(lm(Lifetime_PTSD_Continuous ~ CT_Count,data=dat))
} else print ("Missing either Lifetime PTSD or CT. No regression!")


print("Regression of Lifetime PTSD on IT")
if(!all(is.na(dat$Lifetime_PTSD_Continuous)) & !all(is.na(dat$IT_Count)) )
{
 summary(lm(Lifetime_PTSD_Continuous ~ IT_Count,data=dat))
} else print ("Missing either Lifetime PTSD or IT. No regression!")



##Dx variable


print("Regression of Current PTSD DX on LT")
if(!all(is.na(dat$Current_PTSD_Dx)) & !all(is.na(dat$LT_count) ) )
{
 summary(glm(as.factor(Current_PTSD_Dx) ~ LT_count,data=dat,family=binomial()))
} else print ("Missing either current PTSD or LT. No regression!")

print("Regression of Current PTSD DX on CT")
if(!all(is.na(dat$Current_PTSD_Dx)) & !all(is.na(dat$CT_Count) ) )
{
 summary(glm(as.factor(Current_PTSD_Dx )~ CT_Count,data=dat,family=binomial()))
} else print ("Missing either current PTSD or CT. No regression!")


print("Regression of Current PTSD DX on IT")
if(!all(is.na(dat$Current_PTSD_Dx)) & !all(is.na(dat$IT_Count) ) )
{
 summary(glm(as.factor(Current_PTSD_Dx) ~ IT_Count,data=dat,family=binomial()))
} else print ("Missing either current PTSD or IT. No regression!")


print("Regression of Lifetime PTSD  DX on LT")
if(!all(is.na(dat$Lifetime_PTSD_Dx)) & !all(is.na(dat$LT_count) ) )
{
 summary(glm(as.factor(Lifetime_PTSD_Dx) ~ LT_count,data=dat,family=binomial()))
} else print ("Missing either Lifetime PTSD or LT. No regression!")

print("Regression of Lifetime PTSD DX on CT")
if(!all(is.na(dat$Lifetime_PTSD_Dx)) & !all(is.na(dat$CT_Count) ) )
{
 summary(glm(as.factor(Lifetime_PTSD_Dx) ~ CT_Count,data=dat,family=binomial()))
} else print ("Missing either Lifetime PTSD or CT. No regression!")


print("Regression of Lifetime PTSD DX on IT")
if(!all(is.na(dat$Lifetime_PTSD_Dx)) & !all(is.na(dat$IT_Count)) )
{
 summary(glm(as.factor(Lifetime_PTSD_Dx) ~ IT_Count,data=dat,family=binomial()))
} else print ("Missing either Lifetime PTSD or IT. No regression!")



print("Regression of Case  DX on LT")
if(!all(is.na(dat$Case)) & !all(is.na(dat$LT_count) ) )
{
 summary(glm(as.factor(Case) ~ LT_count,data=dat,family=binomial()))
} else print ("Missing either Case or LT. No regression!")

print("Regression of Case DX on CT")
if(!all(is.na(dat$Case)) & !all(is.na(dat$CT_Count) ) )
{
 summary(glm(as.factor(Case) ~ CT_Count,data=dat,family=binomial()))
} else print ("Missing either Case or CT. No regression!")


print("Regression of Case DX on IT")
if(!all(is.na(dat$Case)) & !all(is.na(dat$IT_Count)) )
{
 summary(glm(as.factor(Case) ~ IT_Count,data=dat,family=binomial()))
} else print ("Missing either Case or IT. No regression!")