R,Code片段,代码实例,代码示例,代码片段库

R 常用的便利功能

## these are functions that I sometimes use in my code
## they are intended to be able to be run on any computer
## with R installed.


#################################
###
### Settings
###
#################################
options(stringsAsFactors=FALSE)


#################################
###
### Aliases for frequently used functions
###
#################################
s &lt;- base::summary;
h &lt;- utils::head;
n &lt;- base::names;
as.dataframe&lt;-base::data.frame;


#################################
###
### FUNCTION: sourceDir
###
#################################
# sources any *.R files in a given directory
#
# Args: path 
# trace is a boolean to print file names as they are sourced
#
# Returns: nothing
# 
# this function is copied from the &quot;source&quot; help text
# 
sourceDir &lt;- function(path, trace = TRUE, ...) {
    for (nm in list.files(path, pattern = &quot;\\.[Rr]$&quot;)) {
       if(trace) cat(nm,&quot;:&quot;)           
       source(file.path(path, nm), ...)
       if(trace) cat(&quot;\n&quot;)
    }
 }


#################################
###
### FUNCTION: write.txt
###
#################################
# write.table with my most frequently used settings
#
# Args: 
# ... (vector of items to paste)
#
# Returns:
# 
write.txt&lt;-function (x, file=&quot;&quot;, quote=FALSE, sep=&quot;\t&quot;, row.names=FALSE, ...){
	
	write.table(x, file, quote=quote, sep=sep, row.names=row.names, ...)
	
	}


#################################
###
### FUNCTION: read.txt
###
#################################
# read.table with my most frequently used settings
#
# Args: 
# ... (vector of items to paste)
#
# Returns:
# data frame
# 
read.txt&lt;-function (file=&quot;&quot;, sep=&quot;\t&quot;, header=TRUE,row.names=NULL, ...){
	
	read.table(file, sep= sep, header= header, row.names= row.names, ...)
	
	}


#################################
###
### FUNCTION: pasteNS
###
#################################
# Paste arguments with no spaces between them
#
# Args: 
# ... (vector of items to paste)
#
# Returns:
# character vector result of paste
# 
pasteNS&lt;-function (...){
	
	paste(..., sep=&quot;&quot;)
	
	}
	
	
#################################
###
### FUNCTION: convertToComplement
###
#################################
# Converts string into its DNA complement
#
# Args: 
# string
# fGenotype (boolean; if TRUE, orders the string alphabetically)
#
# Returns:
# string
# 
convertToComplement&lt;-function(x,fGenotype=TRUE){
	allComps&lt;-NULL
	for (nucString in x){
		
		bases=c(&quot;A&quot;,&quot;C&quot;,&quot;G&quot;,&quot;T&quot;)
		xx&lt;-unlist(strsplit(toupper(nucString),NULL))
		thisComp=unlist(lapply(xx,function(bbb){
				if(bbb==&quot;A&quot;) compString&lt;-&quot;T&quot;		
				if(bbb==&quot;C&quot;) compString&lt;-&quot;G&quot;		
				if(bbb==&quot;G&quot;) compString&lt;-&quot;C&quot;		
				if(bbb==&quot;T&quot;) compString&lt;-&quot;A&quot;
				if(!bbb %in% bases) compString&lt;-&quot;N&quot;
				
				return(compString)
			}))
		if(fGenotype) thisComp = thisComp [order(thisComp)]
		thisComp =paste(thisComp,collapse=&quot;&quot;)
		allComps=c(allComps, thisComp)
	}
	return(allComps)	
}

#################################
###
### FUNCTION: convertColsToNumeric
###
#################################
#   Converts specified columns to numeric
#
# Args: 
# data frame, column indices
#
# Returns:
# original DF with specified columns changed
# 
convertColsToNumeric&lt;-function(dfConv,colIndices){
	for (i in colIndices){
		dfConv[,i]=as.numeric(dfConv[,i])
		}
		return(dfConv)
		}
		
#################################
###
### FUNCTION: printNumberedVector
###
#################################
# 
# prints a vector to screen, one item per line with each line numbered
#
# Input: a vector
#
# Returns:
# nothing
# 
##
printNumberedVector&lt;-function(someVector){
cat(paste(1:length(someVector ), someVector,&quot;\n&quot;))
}



#################################
###
### FUNCTION: cnvSummary
###
#################################
# 
# creates a summary of cnvs identified by the cnv-seq package
# writes the summary to a file
# returns the files
#
# Input: cnvFileName, outputFileName
#
# Returns:
# nothing
# 
##
summarizeCNV&lt;-function(cnvFileName,outputFileName){

## for testing
## cnvFileName =&quot;*CNV.hits.log2-0.6.pvalue-0.001.minw-4.cnv&quot;
if (! file.exists(outputFileName)) {
	library(cnv)
	cnvFileData &lt;- read.delim(cnvFileName)
	if (sum(cnvFileData $cnv!=0)){
		cnvSummary&lt;-NULL
		for (i in seq(max(min(cnvFileData$cnv), 1), max(cnvFileData$cnv))) {
	        sub &lt;- subset(cnvFileData, cnv == i)
	        start &lt;- ceiling(mean(c(min(sub$start), min(sub$position))))
	        end &lt;- floor(mean(c(max(sub$end), max(sub$position))))
	        cnvSummary &lt;- rbind(cnvSummary ,c(paste(&quot;CNVR_&quot;, i, sep = &quot;&quot;), unique(sub$chromosome), start, end, end - start + 1, unique(sub$cnv.log2), unique(sub$cnv.p.value)))#, sep = &quot;\t&quot;, file = file, 
	    }
		colnames(cnvSummary)=   c(&quot;cnv&quot;, &quot;chromosome&quot;, &quot;start&quot;, &quot;end&quot;, &quot;size&quot;, &quot;log2&quot;,  &quot;p.value&quot;)
		cnvSummary=data.frame(cnvSummary)
		for (i in 3:7){
			cnvSummary[,i]=as.numeric(cnvSummary[,i])
		}
	} else {
		return(NULL)
	}
	write.table(cnvSummary, outputFileName, row.names=FALSE, quote=FALSE, sep=&quot;\t&quot;)
	} else { 
	cnvSummary =read.table(outputFileName, header=TRUE, sep=&quot;\t&quot;)
	}
return(cnvSummary)
}



#################################
###
### FUNCTION: fixRowNamesColumn
###
#################################
# 
# moves data in &quot;Row.names&quot; column to rownames
#
# Args: df
#
# Returns:
# dataframe
# 
##



fixRowNamesColumn&lt;-function(mergeResultsDF){
	
	#mergeResultsDF= popByAgeWide
	rownames(mergeResultsDF)= mergeResultsDF$Row.names
	mergeResultsDF= mergeResultsDF[,2:ncol(mergeResultsDF)]
	}


#################################
###
### FUNCTION: getLabelsAndMidPointsOfGroups
###
#################################
# 
# useful for labeling axes and creating 
#
# Args: df, valname, byname, passFUN, fnDesc
# Example Vals
	# df= creatDF
	# valname=&quot;creat&quot;
	# byname=&quot;breed&quot;
	# passFUN=length
	# fnDesc=&quot;count&quot;
	# fReplaceValName=FALSE  ## the default is to append
#
# Returns:
# dataframe
# 
##


getLabelsAndMidPointsOfGroups&lt;- function(charVec,alternatingColors=c(&quot;grey&quot;, &quot;black&quot;)){

plotAnnotation&lt;-data.frame(values= charVec)
previousVal=	 c(&quot;&quot;, charVec[1:(length(charVec )-1)])
switchPoint= charVec != previousVal
groupBreaks=data.frame(start=which(switchPoint))
groupBreaks$length= c(groupBreaks $start[2:nrow(groupBreaks)],length(charVec)+1)-groupBreaks$start
groupBreaks$end= groupBreaks$start+   (groupBreaks$length-1)
groupBreaks$midpoint=round(rowMeans(groupBreaks[,c(&quot;start&quot;,&quot;end&quot;)]),0)
groupBreaks$label= plotAnnotation$values[groupBreaks$midpoint]


groupBreaks$color= alternatingColors[1]
groupBreaks$color[ c(T,F) ]=alternatingColors[2]
   	
   	
plotAnnotation$color=as.character(unlist(apply(groupBreaks,1,function(x){
	    rep(x[&quot;color&quot;],x[&quot;length&quot;])
	    })))
	   
plotAnnotation$labelnames= &quot;&quot;

plotAnnotation$labelnames[groupBreaks$midpoint]=plotAnnotation$value[groupBreaks$midpoint]
list(labelsDF=plotAnnotation, breaksDF=groupBreaks)
}



#################################
###
### FUNCTION: prettyAggregate
###
#################################
# wrapper for aggregate
# renames columns based on input
#
# Args: df, valname, byname, passFUN, fnDesc
# Example Vals
	# df= creatDF
	# valname=&quot;creat&quot;
	# byname=&quot;breed&quot;
	# passFUN=length
	# fnDesc=&quot;count&quot;
	# fReplaceValName=FALSE  ## the default is to append
#
# Returns:
# dataframe
# 
##

prettyAggregate&lt;-function(df, valname, byname, passFUN, fnDesc, fReplaceValName=FALSE){
	if (fReplaceValName) {
		agColName=fnDesc
	} else {
		agColName=pasteNS(valname,&quot;_&quot;, fnDesc)
	}
	result=aggregate(df[, valname],by=list(df[, byname]), FUN= passFUN)
	colnames(result)=c(byname, agColName)
	return(result)
	}



#################################
###
### FUNCTION: cbindList/rbindList
###
#################################
# tries to return a dataframe from a list of bind-able vectors or dataframes
# based on first example in Reduce help
#
# Args: 
# List
#
# Returns:
# dataframe
# 
##

cbindList &lt;- function(x) Reduce(&quot;cbind&quot;, x)
rbindList &lt;-function(x) Reduce(&quot;rbind&quot;,x)


#################################
###
### FUNCTION: bindRepeat
###
#################################
# returns a dataframe composed of n 
# copies of the vector

# Args: 
# vector2Rep - vector to be repeated
# n - times to repeat
## abandoned:
# fBindAsColumns - optional, boolean
#
# Returns:
# dataframe
# 
##
bindRepeat &lt;- function(vector2Rep,n){
	a &lt;- list()
	 for(i in 1:n) a[[i]] &lt;- vector2Rep
 	rbindList(a)
}



#################################
###
### FUNCTION: inMB
###
#################################
# Convert a number of bases to megabases
#
# Args: 
# number(s)
#
# Returns:
# vector
# 
##
inMB&lt;-function(bases){
	bases/1000000
	}




#################################
###
### FUNCTION: textListToVec
###
#################################
# Convert a chunk of text (one item per line) to a vector
# Just type textListToVec(&quot; and then paste in the lines and type &quot;)
#
# Args: 
# newline-separated list
#
# Returns:
# vector
# 
##


textListToVec &lt;-function(a) strsplit(a,&quot;\n&quot;)[[1]]



#################################
###
### FUNCTION: tableDF
###
#################################
# performs the table function
# but returns a well-formatted data.frame
#
# Args: 
# valVector
#
# Returns:
# dataframe
# 
##

tableDF&lt;-function(valVector=c(&quot;a&quot;, &quot;a&quot;, &quot;b&quot;)){
	t=data.frame(as.matrix(table(valVector)))
	t2=data.frame(value=row.names(t), frequency=t[,1])
	row.names(t2)=row.names(t)#	colnames(t)=c(&quot;frequency&quot;)
	return(t2)
	}




#################################
###
### FUNCTION: interleave
###
#################################
# Interleave two vectorsAdd timestamp and data to plot
#
# Args: 
# v1
# v2
#
# Returns:
# vector
# 
##

interleave &lt;- function(v1,v2)
{
ord1 &lt;- 2*(1:length(v1))-1
ord2 &lt;- 2*(1:length(v2))
c(v1,v2)[order(c(ord1,ord2))]
} 



#################################
###
### FUNCTION: stampPlot
###
#################################
# Add timestamp and data to plot
#
# Args: 
# desc.txt=&quot;&quot;
# fOuter=FALSE
#
# Returns:
# nothinbg
# 
##

stampPlot=function(desc.txt=&quot;&quot;,fOuter=FALSE	){
	if(fOuter) {
		pAdj=-2
		} else {
		pAdj=-0.6
		}
mtext(paste(desc.txt, hbTimeStamp()),side=4,outer= fOuter,padj= pAdj,cex=.8)
}



#################################
###
### FUNCTION: snap
###
#################################
# like head, but also limits the number of columns. default is 6 columns 6 rows.

 snap &lt;-function (df,rowLim=6,colLim=6) {
	# troubleshooting
		#age=18:29
		#height=runif(12,62,74)
		#df =data.frame(age=age,height=height)
	
	
	if (is(df)[1]==&quot;data.frame&quot; | is(df)[1]==&quot;matrix&quot;){
		nrowDF =nrow(df)
		ncolDF =ncol(df)
		if (rowLim&lt;nrowDF) nrowDF=rowLim
		if (colLim&lt;ncolDF) ncolDF=colLim
	print(df[1:nrowDF,1:ncolDF])	
		} else {
		print(&quot;cannot parse input&quot;)
			}
	}
	

#################################
###
### FUNCTION: getAllListSubItemsByIndex
###
#################################
# get the nth sub item from each item
#
# Args: 
# theList, list to pull from
# theIndex, the index of desired items
#
# Returns:
# vector or results
# 
getAllListSubItemsByIndex &lt;-function (theList, theIndex){

paste(lapply(theList,function(x) x=x[[theIndex]]))


}



#################################
###
### FUNCTION: greaterOf
###
#################################
# get the greater of two items
#
# Args: 
# values to compare
#
# Returns:
# greater value
# 
greaterOf &lt;- function (x,y) {
	
	if (x&gt;y | y==x){
		x
	} else {
		if (y&gt;x) {
			y
		} else {
			NA
		}
	}
		
}


#################################
###
### FUNCTION: lesserOf
###
#################################
# get the lesser of two items
#
# Args: 
# values to compare
#
# Returns:
# lesser value
# 
lesserOf &lt;-  function (x,y) {
	
	if (x&lt;y | y==x ){
		x
	} else {
		if (y&lt;x) {
			y
		} else {
				NA
		}
	}
		
}



### FUNCTION: isBetween
###
#################################
# is one value between two others
#
# Args: 
# 3 values
#
# Returns:
# TRUE or FALSE
#
isBetween &lt;- function (x,y,z) {
	
	x&gt;y &amp; x&lt;z	
		
}


#################################
###
### FUNCTION: initialCap
###
#################################
# Converts each string in a vector to initial upper case
#
# Args: 
# wordsToConvert: vector of genes symbols
#
# Returns:
# vector with changed words
# 
initialCap &lt;- function(wordsToConvert) {

	return_list&lt;-NULL
	for (i in 1:length(wordsToConvert)){
		r&lt;-tolower(wordsToConvert[i])
	    s &lt;- strsplit(r, &quot; &quot;)[[1]]
    	return_list[i]=paste(toupper(substring(s, 1,1)), substring(s, 2), sep=&quot;&quot;, collapse=&quot; &quot;)
       }
       return(return_list)
}

#################################
###
### FUNCTION: allIdentical
###
#################################
# tests whether each item in a vector is identical
#
# Args: 
# vectorToTest: vector 
#
# Returns:
# TRUE or FALSE
# 


allIdentical &lt;- function(vectorToTest){
	sum(! vectorToTest %in% vectorToTest[1])}
	
	
#################################
###
### FUNCTION: set_up_plot
###
#################################
# Determines axis  values from data
# adds a time stamp
#
# Args: 
# x &amp; y data
# fPlotFromZero
# fPlotEvenAxes

# Returns:
# plot
# 


set_up_plot &lt;-function(x,y,fPlotFromZero=TRUE, fPlotEvenAxes=FALSE,fDateStamp=TRUE,stampText=&quot;&quot;,fOuter=FALSE,xAxLabel=colnames(x), yAxLabel=colnames(y), ...){
#print(names(x))
#	if 
# xAxName=colnames(x)
# yAxLabel=colnames(y)
if(fPlotEvenAxes){
	x=c(min(c(x,y)),max(c(x,y)))
	y=x
	}
if (fPlotFromZero) {
	plot_y_min=0
	plot_x_min=0
} else {
	plot_y_min=min(y)
	plot_x_min=min(x)
} 
	 
	plot_y_max=max(y)
	plot_x_max=max(x)

	plot(c(plot_x_min, plot_x_max),c(plot_y_min, plot_y_max), xlab= xAxLabel , ylab= yAxLabel , type=&quot;n&quot;, ... )
	
	if (fDateStamp) stampPlot(stampText,fOuter= fOuter)
	
	 }
	 
	 
	 

	
#################################
###
### FUNCTION: hbTimeStamp
###
#################################
# Get formatted time and date
#
# Args (all optional): 
# sepYMD 
# sepHMS
# sepDateTime

# Returns:
# character vector
# 


hbTimeStamp&lt;-function(sepYMD=&quot;-&quot;, sepHMS=&quot;.&quot;, sepDateTime=&quot;_&quot;, dateVars=c(&quot;Y&quot;,&quot;m&quot;,&quot;d&quot;), timeVars=c(&quot;I&quot;, &quot;M&quot;, &quot;S&quot;)){
	dateSpec=paste(paste(&quot;%&quot;, dateVars, sep=&quot;&quot;), collapse= sepYMD)
	timeSpec=paste(paste(&quot;%&quot;, timeVars, sep=&quot;&quot;), collapse= sepHMS)

	paste(format(Sys.time(), dateSpec),paste(format(Sys.time(), timeSpec),substring(format(Sys.time(), &quot;%r&quot;),10,11),sep=&quot;&quot;),sep= sepDateTime)
	}

	
#################################
###
### FUNCTION: breakPlotIntoPages
###
#################################
# Breaks data into page-size chunks and calls a plotting function for each chunk
#
# Args: 
# dataframe with plot data
# optional: desired rows per page

# Returns:
# multiple plots
# 
		
breakPlotIntoPages&lt;-function(multipagePlotData,rowsPerPage=40,varList){

#if (! &quot;myPlot&quot; %in% varList) {
#	print(&quot;the 'myPlot' function must exist&quot;)
#	return()
#	}

plotRows=nrow(multipagePlotData)
dataBreaks=unique(c(seq(from=1,to=plotRows,by=rowsPerPage),plotRows))
breakCount=length(dataBreaks)
penultimateBreakCount= breakCount-1
lastPageRowCount= dataBreaks[breakCount ]-dataBreaks[penultimateBreakCount]


for (i in 1:(length(dataBreaks)-1)){
	brokenData=multipagePlotData[dataBreaks[i]:dataBreaks[i+1],]
	xLimVals=c(0,rowsPerPage*1.6)
	barPos=myPlot(brokenData,xLimVals,1)
	}
	
}



## a useful function: rev() for strings
strReverse &lt;- function(x)
        sapply(lapply(strsplit(x, NULL), rev), paste, collapse=&quot;&quot;)
##strReverse(c(&quot;abc&quot;, &quot;Statistics&quot;))



#make transparenbt




################################
##
## FUNCTION: makeTransparent

##
################################
#Add transparency to colors

# Args: 
# vector of colors in name or hex format, e.g. grey or #FFCDFF
# desiredPctTranparent

# Returns:
# same colors with transparency added

#

makeTransparent&lt;-function(colorVector=c(&quot;#FFCDFF&quot;, &quot;#C0108C&quot;, &quot;#CB7600&quot;), desiredPctTranparent=50){

#colorVector=c(&quot;grey&quot;, &quot;blue&quot;)
if (substr(colorVector[1],0,1)!=&quot;#&quot;) {
	colorVector =rgb(t(col2rgb(colorVector)/255))
	} 
s=seq(0,255,length.out=21)
transparencyCodes=data.frame(pctTransparent=100*(1-(s/255)))
t2=unlist(lapply(s,function(x) rgb(0,100,0,x,maxColorValue=255)))
transparencyCodes$hexSuffix= substring(t2,8,9)
paste(rgb(t(col2rgb(colorVector)),maxColorValue=255), transparencyCodes[transparencyCodes$pctTransparent== as.character(desiredPctTranparent),2],sep=&quot;&quot;)

}



	
#################################
###
### FUNCTION: plotMultipleCors
###
#################################
# Plots multiple vectors against 
# a single vector and returns 
# correlations

#
# Args: 
# plotData 
## plot data should have x values in first col, and any number of cols after that can be Y
## column names determine legend text
# vColors -- a vector of colors the length of each Y col in plotData
# ylabText -- text for Y label
# legendPos -- legend position, like &quot;topleft&quot;
# fNormalizeY -- normalize each Y value so the max val is 100 

# Returns:
# plot
# 

plotMultipleCors &lt;-function(plotData, vColors=&quot;&quot;,ylabText=&quot;&quot;, xlabText =&quot;Breed average creatinine level&quot;,legendPos=&quot;topleft&quot;,fNormalizeY=TRUE,...){
## plot data should have x values in first col, and any number of cols after that can be Y
#test plotData= gAcdDesc[,c(&quot;mean&quot;,&quot;se&quot;,&quot;sd&quot;,&quot;Height&quot;,&quot;Weight&quot;)]
 library(plotrix)
 x= plotData[,1]
 if (fNormalizeY) {
		plotData[,2:ncol(plotData)]=apply(plotData[,2:ncol(plotData)],2,function(y){
# 			100*y/max(y)
 			rescale(y,c(0,100))
 			})
 		}

 maxY=max(plotData[,2:ncol(plotData)],na.rm=TRUE)
 set_up_plot( x,0: maxY,xAxLabel= xlabText, yAxLabel=ylabText ,...)

 
# if (vColors =&quot;&quot;){
 	nColorsNeeded= ncol(plotData)-1
 	pal1=brewer.pal(8,&quot;Set1&quot;)
	vColors=pal1[rep(1:length(pal1), nColorsNeeded)[1: nColorsNeeded]]
#	}
	
	plotSettings=data.frame(dataName =colnames(plotData[,2:ncol(plotData)]),color= vColors)
	plotSettings$notNA=apply(plotData[,2:ncol(plotData)],2,function(x){
		sum(!is.na(x))
		})
 	plotSettings$legendText= plotSettings$dataName
	plotSettings$pch=0:(nrow(plotSettings)-1)
 for (i in 1:nrow(plotSettings)){
 	y=plotData[,(i+1)]
	points(x,y,col= plotSettings$color[i],pch=plotSettings$pch[i])
 abline(lm(y ~ x),col= plotSettings$color[i])
 rsq &lt;-cor(x,y, use=&quot;complete.obs&quot;)
  plotSettings$corr_value[i]  &lt;- format(c(rsq, 0.123456789), digits=2)[1]
 ##text(mean(x),mean(y),substitute(paste(&quot;R&quot;^{2}, &quot; = &quot;*x),list(x= corr_value) ),cex=1,col= vColors[i-1])
 ##legend(210, 110, bquote(r^2 ==.(format(summary(regression)$adj.r.squared,digits=3)))) 
	print(plotSettings$color[i])
  	 } #end for loop
  legend(legendPos,	legend=pasteNS(plotSettings$legendText,&quot;, Rsq=&quot;,plotSettings$corr_value, &quot;, n=&quot;,plotSettings$notNA), pch= plotSettings$pch, col= plotSettings$color)
  
 # colnames(plotData[,2:ncol(plotData)]), lty=1, col=vColors)
  	
  	 } ## end function
  	
#################################
###
### FUNCTION: plotCors
###
#################################
# Plots multiple vectors against 
# a single vector and returns 
# correlations

#
# Args: 
# plotData 
## plot data should have x values in first col, and any number of cols after that can be Y
## column names determine legend text
# vColors -- a vector of colors the length of each Y col in plotData
# ylabText -- text for Y label
# legendPos -- legend position, like &quot;topleft&quot;
# fNormalizeY -- normalize each Y value so the max val is 100 

# Returns:
# plot
# 

plotCors&lt;-function(plotData, vColors=&quot;&quot;,ylabText=colnames(plotData)[2], xlabText =colnames(plotData)[1],legendPos=&quot;topleft&quot;,fNormalizeY=TRUE,...){
## plot data should have x values in first col, and any number of cols after that can be Y
#test plotData= gAcdDesc[,c(&quot;mean&quot;,&quot;se&quot;,&quot;sd&quot;,&quot;Height&quot;,&quot;Weight&quot;)]
#plotData=na.omit(plotData)
 x= plotData[,1]
 	if (fNormalizeY) {
 		plotData[,2]=100*plotData[,2]/max(plotData[,2])
 		}
y=plotData[,2]

 set_up_plot( x,y,xAxLabel= xlabText, yAxLabel=ylabText ,...)

	points(x,y)#,col= plotSettings$color[i])
 abline(lm(y ~ x)) #,col= plotSettings$color[i])
 rsq &lt;-cor(x,y, use=&quot;complete.obs&quot;)
  rsqTxt  &lt;- format(c(rsq, 0.123456789), digits=2)[1]
 ##text(mean(x),mean(y),substitute(paste(&quot;R&quot;^{2}, &quot; = &quot;*x),list(x= corr_value) ),cex=1,col= vColors[i-1])
 ##legend(210, 110, bquote(r^2 ==.(format(summary(regression)$adj.r.squared,digits=3)))) 
	#print(plotSettings$color[i])
 title(sub=pasteNS(&quot;Rsq=&quot;, rsqTxt ))
  
 # colnames(plotData[,colSpec]), lty=1, col=vColors)
  	
  	 } ## end function
  	 	 




#################################
###
### FUNCTION: groupSNPsIntoLoci
###
#################################
# 
# find groups of nearby SNPs
#
# Args: 
# hitListSnpChrID 	any kind of chromosome identifier
# hitListSnpPos 	position of snp, either a number of a snp id like chr15.44226659
# maxInterSNPRange	the maximum acceptable distance between SNPs
#
# Returns:
# data.frame vector result of paste
# data frame contains columns - c(&quot;chr&quot;, &quot;minPos&quot;, &quot;maxPos&quot;, &quot;minPVal&quot;, &quot;snpCount&quot;, &quot;locID&quot;, &quot;locusSize&quot;)
# 
# find windows in snps
groupSNPsIntoLoci&lt;-function(hitListSnpChrID, hitListSnpPos, hitListSnpPVals, maxInterSNPRange=1E6){
#test hitListSnpChrID=
# hitListSnpChrID =KCsize_candidates$chrChar
# hitListSnpPos=KCsize_candidates$pos
# hitListSnpPVals=KCsize_candidates$KC.LOGP	
if (sum(grepl(&quot;chr&quot;, hitListSnpPos))&gt;1){
	hitListSnpPos=as.numeric(gsub(&quot;^[^\\.]*\\.&quot;,&quot;&quot;, hitListSnpPos))	
}


hitList=data.frame(cbind(hitListSnpChrID, hitListSnpPos, hitListSnpPVals))
colnames(hitList)=c(&quot;chr&quot;,&quot;pos&quot;,&quot;pval&quot;)
hitList$pos=as.numeric(hitList$pos)
hitList$pval =as.numeric(hitList$pval)
# set up variables and record info for first row
#assocLocus columns: chr, minPos, minPos, minPVal, snpCount
assocLocus&lt;-NULL
hitList= hitList[order(hitList$chr, hitList$pos),]
currAssocLocus= data.frame(c(hitList[1,c(&quot;chr&quot;,&quot;pos&quot;,&quot;pos&quot;,&quot;pval&quot;)]))
colnames(currAssocLocus)=c(&quot;chr&quot;, &quot;minPos&quot;, &quot;maxPos&quot;, &quot;minPVal&quot;)
currAssocLocus$snpCount[1]=1
#round(hitList[1,&quot;pos&quot;]/1E6,0)
hitList$locusID=NA

currAssocLocus$locID[1]=pasteNS(hitList[1,&quot;chr&quot;],&quot;.&quot;,round(hitList[1,&quot;pos&quot;]/1E6,0),&quot;Mb&quot;)
for (i in 2:nrow(hitList)){
	# check each snp
	if (hitList[i,&quot;chr&quot;]==currAssocLocus$chr[1] &amp; (abs(hitList[i,&quot;pos&quot;]-currAssocLocus$minPos[1])&lt;maxInterSNPRange | abs(hitList[i,&quot;pos&quot;]-currAssocLocus$minPos[1])&lt;maxInterSNPRange)){
		# they are on the same chromosome and within maxInterSNPRange of each other
	 	# extend the current Association Locus values (update currAssocLocus)
	 	currAssocLocus$snpCount[1]=currAssocLocus$snpCount[1]+1
 		if (hitList[i,&quot;pval&quot;]&lt;currAssocLocus$minPVal[1]) currAssocLocus$minPVal[1]=hitList[i,&quot;pval&quot;]
 		if (hitList[i,&quot;pos&quot;]&lt;currAssocLocus$minPos[1]) currAssocLocus$minPos[1]=hitList[i,&quot;pos&quot;]
		if (hitList[i,&quot;pos&quot;]&gt;currAssocLocus$maxPos[1]) currAssocLocus$maxPos[1]=hitList[i,&quot;pos&quot;]
		if (i==nrow(hitList)) 	assocLocus&lt;-rbind(assocLocus, currAssocLocus) ## add this last locus to the list
		
	} else {
		assocLocus&lt;-rbind(assocLocus, currAssocLocus)		
		currAssocLocus[1,c(&quot;chr&quot;, &quot;minPos&quot;, &quot;maxPos&quot;, &quot;minPVal&quot;)]=c(hitList[i,c(&quot;chr&quot;,&quot;pos&quot;,&quot;pos&quot;,&quot;pval&quot;)])
		currAssocLocus$snpCount[1]=1
		currAssocLocus$locID[1]=pasteNS(hitList[i,&quot;chr&quot;],&quot;.&quot;,round(hitList[i,&quot;pos&quot;]/1E6,0),&quot;Mb&quot;)
	}
	hitList$locusID[i]=currAssocLocus$locID[1]
}

assocLocus=data.frame(assocLocus)
assocLocus$locusSize= assocLocus$maxPos-assocLocus$minPos+1
return(assocLocus)
}