***** creating many records from 1 Using statistics BY Statement to get separate summaries Directing summaries to a new dataset.. Extracting them for plotting ********************************************************* ; DATA alberta; KEEP m kg power BMI I_female; INFILE "Macintosh HD:Courses:613(StatSoftware):alberta_data.txt"; INPUT Age Height Weight IDNumber I_Female ; m = (Height * 2.54)/100; kg = Weight / 2.2; DO power = 1.0 to 3.0 by 0.1; bmi = kg / (m**power ); OUTPUT; * puts 1 observation into dataset ; END; RUN; PROC SORT DATA=alberta; BY I_Female power; PROC CORR DATA=alberta OUTP=stats NOPRINT ; BY I_female power; VAR BMI; WITH m; run; * notice how PROC CORR creates dataset of summaries; DATA corrlns; KEEP I_Female power r; SET stats ; IF _TYPE_ = "CORR" THEN r = ABS(BMI); * notice how PROC CORR OUTPUTS THE CORRELATION; If _TYPE_ = "CORR" ; RUN; PROC PLOT DATA=corrlns; PLOT r * power = I_female; RUN; ********************* ; /* converting Galton family records into - separate parent .txt file (205 records, one per family) - separate offspring .txt file (1 record per offspring ) helpful if want to send say raw text in an email to someone who doesnt have excel or sas */ Data family; INFILE "Macintosh HD:Courses:613(StatSoftware):galton_data_raw.txt" delimiter = ","; *FILE path:name; * take out comment and supply path and name for txt file; attrib s1-s10 d1-d9 Length=$10; * specifies character variables ; Array S(10) s1-s10; * allows us to refer to them with 'subscripts' ; Array d(9) d1-d9; * useful for looping over entire set ; INPUT Family $ Father Mother S1-S10 D1-D9 n; /* PUT will print the specifed items to the LOG or to a txt file if specify the name in a FILE statement */ DO i=1 to 10; /* Start with i=1 and keep going until i=10 */ if s(i) ne "" then PUT family 1-5 " Son " s(i) 17-26; END; /* of loop ... if i < 10, goes back to start of loop, with 1 added to i */ Do i=1 to 9; if d(i) ne "" then PUT family 1-5 " Daughter" d(i) 17-26; end; RUN; *************** treating s1-s10 and d1-d9 as numeric, and adding 60 inches to each calculate how many sons and daughters and mean height of sons and daghters ******************************** ; Data family; INFILE "Macintosh HD:Courses:613(StatSoftware):galton_data_raw.txt" delimiter = ","; Array S(10) s1-s10; * allows us to refer to them with 'subscripts' ; Array d(9) d1-d9; * useful for looping over entire set ; INPUT Family $ Father Mother S1-S10 D1-D9 n; DO i=1 to 10; /* Start with i=1 and keep going until i=10 */ s(i) = s(i) + 60; END; /* of loop ... if i < 10, goes back to start of loop, with 1 added to i */ Do i=1 to 9; d(i) = d(i) + 60; end; n_sons = N(of s1-s10); * N MEAN MAX MIN etc. are inbuilt fns. ; ave_sons = MEAN(of s1-s10); * MEAN(s1,s2,s3,s4,s5,s6,s7,s8,s9,s10) is long way; PROC PRINT DATA=family (obs = 4); RUN; ******** creating SAS dataset, 1 obsn per offspring ************* ; Data children; KEEP family male height; INFILE "Macintosh HD:Courses:613(StatSoftware):galton_data_raw.txt" delimiter = ","; Array S(10) s1-s10; * allows us to refer to them with 'subscripts' ; Array d(9) d1-d9; * useful for looping over entire set ; INPUT Family $ Father Mother S1-S10 D1-D9 n; /* OUTPUT will send all the variables -- or just those specified in a KEEP statement -- to the dataset specified in the DATA statement */ male = 1; DO i=1 to 10; /* Start with i=1 and keep going until i=10 */ IF s(i) ne . then DO; /* e.g. of a Do block */ height = s(i)+60; OUTPUT; END; END; /* of loop ... if i < 9, goes back to start of loop, with 1 added to i */ male = 0; Do i=1 to 9; IF d(i) ne . then DO; height = d(i)+60; OUTPUT; END; END; PROC PRINT DATA=children (obs = 12); RUN;