/* this program creates data for a QAP model of peer influence. The program runs in SAS, but also writes the data out as .NET files to read into UCINET. Again, I want to limit the data to sets without any missing data, so I'm going to simply impute the missing values as the means -- I'm not advocating for that as "best practice" but it's simple here.... */ libname in1xpt xport 'c:\jwm\conferences\columbia\workshop\sdatcln.xpt'; data sdat; set in1xpt.sdatcln; run; /* get means for imputation */ proc means data=sdat noprint; class white s2 s3; var s48 s9 s64; output out=impmean mean=s48x s9x s64x; run; data sdat; set in1xpt.sdatcln; if white = . then delete; if s3 = . then delete; if s3 = 6 then delete; /* one case */ if female = . then delete; run; proc sort data=sdat; by white s2 s3; run; data impmean; set impmean; where _type_ = 7; run; data sdat; merge sdat (in=indat) impmean; by white s2 s3; if indat; run; /* if missing, set to mean */ data sdat; set sdat; if s48 = . then s48 = s48x; if s9 = . then s9 = s9x; if s64 = . then s64 = s64x; run; /* must resort by ID!!! */ proc sort data=sdat; by nid; run; /* create the relevant networks */ %let spanloc = c:\jwm\sas\modules\; proc iml; %include "&spanloc.adj.mod"; %include "&spanloc.pajwrite.mod"; use work.sdat; read all var{nid} into nid; read all var{mf1nid mf2nid mf3nid mf4nid mf5nid ff1nid ff2nid ff3nid ff4nid ff5nid} into noms; read all var{white} into white; read all var{s3} into grade; read all var{female} into female; read all var{s9} into tenure; read all var{s48} into tryhard; read all var{s64} into fights; /* create the adjacency matrix */ amat=adj(nid,noms); amat_id=amat[,1]; amat=amat[,2:ncol(amat)]; /* limit to just within-sample ties */ inschool=j(nrow(amat),1,0); /* simple vector to flag in-school */ do i=1 to nrow(amat_id); if type(xsect(amat_id[i],nid))='N' then do; inschool[i]=1; end; end; /* now let's create a version that is in-school only */ schlloc=loc(inschool=1); /* rows/cols of inschool kids */ amatis=amat[schlloc,schlloc]; /* just pull those rows/cols */ amatis_id=amat_id[schlloc,1]; chk=(amatis_id^=nid); chksum=chk[+]; print amatis_id nid chk chksum; /* should be 0 */ /* now we need to create the similarity matrices */ race=white||(1-white); /* 2 colums, =1 if white/nonwhite */ samerace=(race*(race`)); /* will = 1 if they are the same race */ samerace=samerace-diag(samerace); sex=female||(1-female); samesex=(sex*(sex`)); samesex=samesex-diag(samesex); /* can also get these w. a simple loop */ nr=nrow(samesex); /* initialize the mats */ samegrade=j(nr,nr,0); timedif=samegrade; trydif=samegrade; fightdif=samegrade; do i=1 to nr-1; do j=i+1 to nr; /* since all of these are sym, save some looping */ samegrade[i,j]=(grade[i]=grade[j]); timedif[i,j]=abs(tenure[i]-tenure[j]); trydif[i,j]=abs(tryhard[i]-tryhard[j]); fightdif[i,j]=abs(fights[i]-fights[j]); end; end; /* fill in bott triangle */ samegrade=samegrade+samegrade`; timedif=timedif+timedif`; trydif=trydif+trydif`; fightdif=fightdif+fightdif`; /* let's make a sym indicator of adjacency */ symnet=(amatis+amatis`); /* for the SAS QAP, store the mats */ reset storage = work.temp; store amatis samesex samerace samegrade timedif trydif fightdif symnet; /* for UCINET, we'll write out the matrix and read in there. UCINET reads PAJEK format, so we'll use that */ file 'c:\jwm\conferences\columbia\workshop\amat_qap.net'; call pajwrite(amatis,nid,2); file 'c:\jwm\conferences\columbia\workshop\samesex_qap.net'; call pajwrite(samesex,nid,2); file 'c:\jwm\conferences\columbia\workshop\samegrade_qap.net'; call pajwrite(samegrade,nid,2); file 'c:\jwm\conferences\columbia\workshop\samerace_qap.net'; call pajwrite(samerace,nid,2); file 'c:\jwm\conferences\columbia\workshop\timedif_qap.net'; call pajwrite(timedif,nid,2); file 'c:\jwm\conferences\columbia\workshop\trydif_qap.net'; call pajwrite(trydif,nid,2); file 'c:\jwm\conferences\columbia\workshop\fightdif_qap.net'; call pajwrite(fightdif,nid,2); file 'c:\jwm\conferences\columbia\workshop\symnet_qap.net'; call pajwrite(symnet,nid,2); /* UCINET also does some simple network autocorrelation stats let's output a simple vector on trying, fighting & race and see if we get a match... */ file 'c:\jwm\conferences\columbia\workshop\fight.txt'; do i=1 to nrow(fights); vi=round(fights[i],1); put vi; end; file 'c:\jwm\conferences\columbia\workshop\white.txt'; do i=1 to nrow(white); vi=round(white[i],1); put vi; end; file 'c:\jwm\conferences\columbia\workshop\tryhard.txt'; do i=1 to nrow(tryhard); vi=round(tryhard[i],1); put vi; end; quit; /* now try the SAS macro version of QAP regression */ %let spanmac = c:\jwm\sas\macros\; %include "&spanmac.qapreg.mac"; %include "&spanmac.words.mac"; %qapreg(trydif, /* dep var */ symnet samerace samesex samegrade, /* names of indep vars */ work.temp, /* where stored */ 100, /* how many itterations */ Y); /* treat data as symmetric */ /* after looking at obs. value, plot as ref line */ proc univariate data=work._bdat noprint; where qapid>0; histogram symnet / href=-0.039 cfill=red; run; quit;