/* this program reads in an obscured version of some AH data as an example for the RWJ network class. Here we do some simple recoding and write the data out to PAJEK for further study. Author: Moody Date: March, 2007 */ libname in1xpt xport 'c:\jwm\conferences\columbia\workshop\schldat.xpt'; libname out1xpt xport 'c:\jwm\conferences\columbia\workshop\sdatcln.xpt'; libname out1 'c:\jwm\conferences\columbia\workshop\'; /* read the data in from the perm. file */ data sdat; set in1xpt.schldat; run; /* just print the first few rows to have a look */ proc print data=sdat ; where nid<10; var nid mf1nid mf2nid mf3nid mf4nid mf5nid ff1nid ff2nid ff3nid ff4nid ff5nid; run; /* clean the data for missing codes & create new vars */ data sdat; set sdat; /* create a white/non-white dummy */ /* race was created from other code long ago...*/ if race5 = 1 then white=1; if race5 > 1 then white=0; if race5 = . then white=.; /* recode missing values in other variables */ if s2 > 2 then s2=.; /* sex */ female=0; if s2=2 then female = 1; if s2=. then female = .; if s3 > 12 then s3=.; /* grade */ if s9 > 6 then s9=.; /* time at this school */ if s48 > 4 then s48=.; /* try hard at school */ s48=5-s48; /* reverse so hi=tries hard */ if s62r > 5 then s62r=.; /* feel safe in school */ s62r=6-s62r; /* reverse, high=feel safe */ if s64 > 4 then s64=.; /* fights in last year */ /* the nomination data includes ties to "special codes" ties outside the school. In this file they are all set to "9999" so we need to recode those */ array noms mf1nid mf2nid mf3nid mf4nid mf5nid ff1nid ff2nid ff3nid ff4nid ff5nid; do over noms; if noms = 9999 then noms = .; end; run; proc means data=sdat; run; /* now create an adjacency matrix and a set of indicator variables to write to PAJEK */ proc iml; /* read in some preprogrammed SAS modules for nets */ %include 'c:\jwm\sas\modules\pajwrite.mod'; %include 'c:\jwm\sas\modules\pajpart.mod'; %include 'c:\jwm\sas\modules\adj.mod'; use work.sdat; read all var{nid} into nid; read all var{mf1nid mf2nid mf3nid mf4nid mf5nid ff1nid ff2nid ff3nid ff4nid ff5nid} into noms; /* create the adjacency matrix */ amat=adj(nid,noms); amat_id=amat[,1]; amat=amat[,2:ncol(amat)]; file 'c:\jwm\conferences\columbia\workshop\adj_allnoms.net'; call pajwrite(amat,amat_id,2); /*NOTE: The adj. matrix includes nominations to kids who were not in the school. Later we'll take them out, but for now let's just flag them so we can see what their 'missingness' looks like. */ inschool=j(nrow(amat),1,0); /* simple vector to flag in-school */ do i=1 to nrow(amat_id); if type(xsect(amat_id[i],nid))='N' then do; inschool[i]=1; end; end; file 'c:\jwm\conferences\columbia\workshop\adjallnoms_ischl.clu'; call pajpart(inschool); /* now let's create a version that is in-school only */ schlloc=loc(inschool=1); /* rows/cols of inschool kids */ amatis=amat[schlloc,schlloc]; /* just pull those rows/cols */ amatis_id=amat_id[schlloc,1]; /* now let's double check our sort order, to be sure, then create race, grade and sex plotting vars */ idchk=(nid^=amatis_id)[+]; /* find all places where nid is not equal to adj id and count them */ print idchk; /* should equal zero, if not, we have a sort error and need to figur out why */ /* now write out the file to PAJEK */ file 'c:\jwm\conferences\columbia\workshop\adjis.net'; call pajwrite(amatis,amatis_id,2); /* create some vars due to attributes */ read all var{white} into white; read all var{female} into female; read all var{s3} into grade; read all var{s64} into fights; /* pajek cannot deal with . missing values, so we need to recode them */ white=choose(white=.,99,white); female=choose(female=.,99,female); grade=choose(grade=.,99,grade); fights=choose(fights=.,99,fights); file 'c:\jwm\conferences\columbia\workshop\adjis_white.clu'; call pajpart(white); file 'c:\jwm\conferences\columbia\workshop\adjis_female.clu'; call pajpart(female); file 'c:\jwm\conferences\columbia\workshop\adjis_grade.clu'; call pajpart(grade); file 'c:\jwm\conferences\columbia\workshop\adjis_fights.clu'; call pajpart(fights); /* I'm going to store the adjacency matrix for later, just to save time. Lots of ways to do this. Since this is a fairly small network, I'll just save the adjacency matrix. If it were larger, I'd convert it to a more efficient format and save that instead. */ reset storage=out1.netstore; store amatis amatis_id; quit; /* let's write out the clean data so we don't have to do that again.. */ data out1xpt.sdatcln; set work.sdat; run;