/*Print the SAS version to your logs*/
%put &sysver;
%put &sysvlong;
%put &sysvlong4;
/*
Data analysis is a process of inspecting, cleaning, transforming, and modelling data with the goal of highlighting useful information,
suggesting conclusions, and supporting decision making.
The first step in this process is to know about your data...
- Know what kind of data are you dealing with i.e. financial, pharmaceutical etc.
- If you have access to the data..quickly run a freq / means to get data-level information.
- Depending upon your business specific questions we might have to slice and dice / summarize the data.
Here's a simple SAS Macro that would help you to quickly find information as to
- what are the variables available / last modification date etc.
- Give a frequency of occurences / min / max etc info depending on the type of the SAS column info...
- Produce a HTML report.
Steps to code the macro...
1. Get the type of the variables proc contents
2. Apply a Proc means with min max of numeric variables...Note: Proc Means does not generate a report for Character variables.
3. Apply a Proc Freq for Character Variables
4. Print an ODS HTML report for the results
*/
/* Explanation of the parameters of the Macro...
%analyzeSASdsn(dsn,numobs,varlist);
dsn - name of the dataset to analyze
numobs - number of observations to include in the dataset
varlist - List of Variables that you need to analyze...use _ALL_ if you want to include all the variables.
*/
%macro analyzeSASdsn(dsn,numobs,varlist);
%let numvars=; /* Initialize the number of numeric Variables */
%let charvars=; /* Initialize the number of character Variables */
%if %upcase(&varlist) eq _ALL_ %then /* If the user request for all the variables then assign appropriately */
%do;
%let numvars=_numeric_;
%let charvars=_character_;
%end;
%else /* then find if the vars requested are numeric type or character type */
%do;
/* split the varlist into individual macro var names vars1 vars2 etc*/
%let num=1;
%let vars&num=%scan(&varlist,&num,' ');
%do %while(&&vars&num ne );
%let num=%eval(&num + 1);
%let vars&num=%upcase(%scan(&varlist,&num,' '));
%end;
/* Get the List of variables in the &dsn dataset and put
All char variables in charvarlist macro variables
All Num variables in numvarlist macro variable
*/
%let dsid=%sysfunc(open(&dsn,i));
%let numvarlist=;
%let charvarlist=;
%do i=1 %to %sysfunc(attrn(&dsid,nvars));
%if (%sysfunc(vartype(&dsid,&i)) = N) %then %let numvarlist=&numvarlist %upcase(%sysfunc(varname(&dsid,&i)));
%if (%sysfunc(vartype(&dsid,&i)) = C) %then %let charvarlist=&charvarlist %upcase(%sysfunc(varname(&dsid,&i)));
%end;
%let rc=%sysfunc(close(&dsid));
%put numvarlist=&numvarlist charvarlist=&charvarlist;
/* Now check the variables required to report in the above list and assign them to the right macro variables...
All char variables in charvarlist macro variables
All Num variables in numvarlist macro variable
*/
%do i=1 %to %eval(&num - 1);
%if %index(&numvarlist,&&vars&i) %then %let numvars=&&vars&i &numvars;
%if %index(&charvarlist,&&vars&i) %then %let charvars=&&vars&i &charvars;
%end;
%put numvars=&numvars charvars=&charvars;
%end;
ods listing close;
ods html body="&htmlfilepath";
/* Now analyze the dataset with the Specified variables */
proc contents data=&dsn;run; /* Put a Contents procedure */
%if &numvars ne %then
%do;
/* Get Summary statistics of All the Numeric Variables with means procedure */
proc means data=&dsn(obs=&numobs) n mean max min range;
var &numvars;
title 'Summary Statistics of all Numeric variables in the dataset';
run;
%end;
%if &charvars ne %then
%do;
/* Get Summary statistics of All the Character Variables with Freq procedure */
proc freq data=&dsn(obs=&numobs);
tables &charvars;
title1 'Summary Statistics of all Character variables in the dataset';
run;
%end;
ods html close;
%mend analyzeSASdsn;
/* Edit the Path of the Output Report */
%let htmlfilepath=C:\out.html;
options nodate pageno=1 linesize=80 pagesize=60 mprint symbolgen;
data cake;
input LastName $ 1-12 Age 13-14 PresentScore 16-17
TasteScore 19-20 Flavor $ 23-32 Layers 34 ;
datalines;
Orlando 27 93 80 Vanilla 1
Ramey 32 84 72 Rum 2
Goldston 46 68 75 Vanilla 1
Roe 38 79 73 Vanilla 2
Larsen 23 77 84 Chocolate .
Davis 51 86 91 Spice 3
Strickland 19 82 79 Chocolate 1
Nguyen 57 77 84 Vanilla .
Hildenbrand 33 81 83 Chocolate 1
Byron 62 72 87 Vanilla 2
Sanders 26 56 79 Chocolate 1
Jaeger 43 66 74 1
Davis 28 69 75 Chocolate 2
Conrad 69 85 94 Vanilla 1
Walters 55 67 72 Chocolate 2
Rossburger 28 78 81 Spice 2
Matthew 42 81 92 Chocolate 2
Becker 36 62 83 Spice 2
Anderson 27 87 85 Chocolate 1
Merritt 62 73 84 Chocolate 1
;
run;
/*Eg. Analyze the dataset cake to report on all variables */
%analyzeSASdsn(cake,max,_ALL_);
/* Eg. Analyze the dataset cake to report on particular variable Age Layers only */
%analyzeSASdsn(cake,max,Age Layers);
options nosource nomprint nomlogic nosymbolgen;
/**
Usage of the Macro-
%splitdsnbyobs(DatasetName, No ofobservation to split by)
**/
/* creating a dataset with 100000 observations*/
%macro splitdsnbyobs(dsn,splitby);
data _null_;
set &dsn nobs=num;
call symput('no_obs',num);
run;
%let no_obs=&no_obs; /*Get the number of observations in &dsn*/
/* keep the observations from firstobs= and obs=*/
%do i=1 %to %sysfunc(ceil(&no_obs/&splitby));
data &dsn.&i.;
set &dsn (firstobs=%sysfunc(floor(%eval((&i.-1)*&splitby.+1))) obs=%sysfunc(ceil(%eval(&i * &splitby.))));
run;
%end;
%mend splitdsnbyobs;
/* Eg. Create a Dsn with 100 observations */
data loops;
do i=1 to 100;
output;
end;
run;
/*Now call the macro to split the observations every 20 records*/
%splitdsnbyobs(loops,20);
/*
Read more: http://sastechies.blogspot.com/2009/11/sas-macro-to-split-dataset-by-number-of.html
*/
options nosource nomprint nomlogic nosymbolgen;
%macro reorder(dsn);
/* Get the variables names to a dataset using proc contents and keeping the variable name only */
proc contents data=&dsn
out=varnames(keep=name) noprint;
run;
/* It is very much important that you UPCASE or LOWCASE the variable names...
otherwise you get a different order...Remove this datastep and see for yourself... */
data varnames;
set varnames;
name=lowcase(name);
run;
/* Sort the variable names in alphabetical order */
proc sort data=varnames;
by name;
run;
/* Get the observation count */
data _null_;
set varnames nobs=num;
call symput('obscnt',num);/* Get the observation count */
call symput(compress('macvar'||_n_),trim(left(name))); /* Get the variable names into macro variables */
run;
%let obscnt=&obscnt; /*remove the leading and trailing blankspaces generated when numeric is converted to Best12. format */
%put obscnt=&obscnt;
/*Please NOTE that the step of getting all variable names into a macro variable could be simply done by using SQL instead of a macro
proc sql noprint;
select trim(left(name)) into:macvar separated by ' '
from varnames;
quit;
and the next datastep simply
data &dsn;
retain &macvar;
set &dsn;
run;
But the cons here is that if there are too many variables and the total length of all the names put together crosses 32767 bytes the SQL approach would'nt work...
*/
data &dsn;
retain %do i=1 %to &obscnt;
&&macvar&i /* NOTE: there should be a blank space after &&macvar&i to separate the variable names by space
eg. retain subject a b c; NOTE: NO semicolon should be typed here*/
%end;;
set &dsn;
run;
%mend reorder;
/* Example dataset with variety of variable names */
data flags;
set sashelp.flags;
a=2;
b=4;
_common=10;
Cool=30;
SubJecT=40;
run;
ods listing close;
ods html body="C:\Reorder.html";
title 'Order of the Variable Before Re-ordering';
proc contents data=flags; run;
%reorder(flags);
title 'Order of the Variable after Re-ordering';
proc contents data=flags; run;
ods html close;
ods listing;
<!DOCTYPE html>
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>Design Shack Template</title>
<link rel="stylesheet" href="css/reset.css" />
<link rel="stylesheet" href="css/text.css" />
<link rel="stylesheet" href="css/960.css" />
<link rel="stylesheet" href="style.css" type="text/css">
<!--<link rel="stylesheet" href="css/uncompressed/demo.css" />-->
</head>
<body>
<div id="header"></div>
<div class="container_12">
</div>
<div id="footer"></div>
<!-- end .container_24 -->
</body>
</html>
data _null_;
name="1. Long text with no. at start - and some double spaces";
name = compress(strip(name), " ", 'kn'); ** Keep numbers, underscore character and letters (A - Z, a - z);
name = translate(compbl(strip(name)),'_'," "); ** Convert dupe blanks to single and replace blanks with underscore;
name = substrn(strip(name), 1, 31); ** Keep the first 31 chars (var name limit is 32);
if input(substr(strip(name),1,1), ?? 3.) then name = "_"||name; ** Add "_" to names that start with a number;
put name=;
run;
LIBNAME EDA ORACLE PATH=XE SCHEMA=EDA USER=eda PASSWORD=eda;
DATA WAT;
SET EDA.WAT;
KEEP PRODUCT_NAME SPEC_FILE LOT_ID WAFER_ID SITE_ID ATT1 ATT2 ATT3;
WHERE LOT_ID like 'P0M78.1';
RUN;
DATA _NULL_;
file print PS=32767;
set WAT end=lastrec;
if _N_ eq 1 then do;
put '[';
end;
put '{"productName":"' PRODUCT_NAME '",';
put '"lotId":"' LOT_ID '",';
put '"waferId":' WAFER_ID ',';
put '"siteId":"' SITE_ID '",';
put '"specFile":"' SPEC_FILE '",';
put '"attrs": [';
put '{"attr1":' ATT1 '},';
put '{"attr2":' ATT2 '},';
put '{"attr3":' ATT3 '}]}';
if lastrec eq 1 then do;
put ']';
end;
else do;
put ',';
end;
RUN;