r 基于频率的顶部平行坐标图

https://cran.r-project.org/web/packages/cdparcoord/vignettes/cdparcoord.html

top_freq_parcoord.R
library(cdparcoord)

dd <- discretize(df, nlevels=4)

discparcoord(dd, k=100) 

r 将数字向上或向下舍入到一个漂亮的数字

将数字向上或向下舍入到一个漂亮的数字

roundNicely.r
#-- function to round up or down a number nicely
roundNicely <- function(x, nice=c(1,1.5,2,2.5,3,4,5,6,8,10), down = TRUE) {
    stopifnot(length(x) == 1)
    if(down){
        10^floor(log10(x)) * nice[which(x <= 10^floor(log10(x)) * nice)[1] - 1]
    } else {
        10^floor(log10(x)) * nice[which(x <= 10^floor(log10(x)) * nice)[1]]
    }
}

r 将每个因子的表导出到zip文件中

将每个因子的表导出到zip文件中

writeTablesPerCat.r
#-- function to export tables per factor in a zip
writeTablesPerCat <- function(df, cat, fn, zip_flags = "-j", na_string = "", replace = TRUE){
	stopifnot(cat %in% colnames(df))
	df_ls <- split(df, df[[cat]])
	names(df_ls) <- tolower(gsub(" ", "-", names(df_ls)))
	files <- sapply(names(df_ls), function(x) paste0(tempdir(), "/", basename(fn), "-", x, ".txt"))
	for(i in names(df_ls)){
		write_tsv(df_ls[[i]][,!colnames(df_ls[[i]]) %in% cat], files[i], na = na_string)
	}
	if(replace == TRUE){
		suppressWarnings(file.remove(paste0(fn, ".zip")))
	}
	zip(paste0(fn, ".zip"), files, flags = zip_flags)
	file.remove(files)
}

r 写入带有日期附加到文件名的CSV文件

写入带有日期附加到文件名的CSV文件

writeCSV.r
#-- function to write out csv file with date appended to filename
writeCSV <- function(df, fn){
    ffn <- paste0(fn, "-", Sys.Date(), ".csv")
    write.csv(df, ffn, row.names = F)
}

r data.frame列转换

使用基数R的随机转换

convert_cols.R
# To convert a list of defined columns
vars <- c("Sepal.Length", "Petal.Width")
iris[vars] <- lapply(iris[vars], as.character)

r R比例x时间小时:分钟

scale_x_time.R
+ scale_x_time(labels = function(x) substring(x, 0, 5))

r 按特定订单的列订购data.frame

按特定订单的列订购data.frame

order_df_with_specific_order.R
# I want to order iris according to Species order below :
my_order <- c("versicolor", "virginica", "setosa")

iris$Species <- as.character(iris$Species)
iris[order(match(iris$Species, my_order)),]

# see https://stackoverflow.com/questions/1568511/how-do-i-sort-one-vector-based-on-values-of-another

r ggplot的助手

ggplot的助手

00_ggplot_helpers.R
Ces gists regroupent les problemes et solutions rencontres sur ggplot2

Sommaire :

- Gestion de la bordure des geom_point() : 01_geom_point_borders.R
01_geom_point_borders.R
# Faire la bordures des points avec geom_point()

# Comme expliqué ici (https://stackoverflow.com/questions/15965870/fill-and-border-colour-in-geom-point-scale-colour-manual-in-ggplot)
# SEULEMENT les formes (shape) de 21 à 25 ont les proprietés colour & fill !

# Exemple qui ne marche pas (car le shape par défault est 16 !)
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
  geom_point(aes(fill = Species), colour = "black")

# Exemple qui fonctionne
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
  geom_point(aes(fill = Species), colour = "black", shape = 21)

r 在Rstudio中使用SQLite

SQLite_Rstudio.r
##Import library
library(RSQLite)

##Connect to an existing database
Customers <- dbConnect(SQLite(), dbname = 'Customers.sqlite')

##Create and connect to a "virtual" in-memory database
Food <- dbConnect(SQLite())

## List all the tables currently in the database
dbListTables(Customers)

## Import CSV into SQLite database
##Currently, it seems that commas inside of a quoted string (like “Smith, John”) is still being recognized as a comma delimiter, 
##so directly importing data from a CSV into a SQLite database is quite buggy. 
Suppliers <- read.csv("./Suppliers.csv") # Import CSV into R dataframe
dbWriteTable(Customers, "suppliers", Suppliers) # Write data to a table in the Customers database

## Make a query in a SQL chunk
```{sql connection=Customers}
SELECT CustomerID, CustomerName, City
FROM customers
WHERE Country = "Germany";
```

r 使用指定的repo安装R库

安装某些库,例如`ggraph`,需要指定一个repo。

install_library_repo
install.packages('RMySQL', repos='http://cran.us.r-project.org')