/* this program exports data from SAS network to R, for use w. carter's LNAM 
   routine for analyzing network autocorrelation effects. */


/* set directories and read in files */
libname in1xpt xport 'c:\jwm\conferences\columbia\workshop\sdatcln.xpt';


data sdat;
 set in1xpt.sdatcln;
 if white = . then delete;
 if s3 = . then delete;
 if s3 = 6 then delete; /* one case */
 if female = . then delete;

 run;

/* get a set of node-level covariates for the network model */
data nodevars;
 set sdat;
 keep nid female white s3 s48 s64 s9;
 if s64=. then s64=0; /* just to remove missing data... */
 run;

/* now generate the edgelist data.  Here we'll write this out as an edge attribute
   and work with that.  We'll load the previously saved network, though
   of course you could also create the net from the noms here.  

   here I write one file w. just the edge data, and a second with dyad-level covarariates.
   This is because the edge-file is used to create the network in R.


 */

%let spanmod = c:\jwm\sas\modules\;
 proc iml;
   %include "&spanmod.adj.mod";

  use work.sdat;
   read all var{nid} into nid;
   read all var{mf1nid mf2nid mf3nid mf4nid mf5nid ff1nid ff2nid ff3nid ff4nid ff5nid} into noms;

   /* create the adjacency matrix */
   amat=adj(nid,noms);
   amat_id=amat[,1];
   amat=amat[,2:ncol(amat)];

   /* limit to just within-sample ties */
   inschool=j(nrow(amat),1,0); /* simple vector to flag in-school */
  do i=1 to nrow(amat_id);
    if type(xsect(amat_id[i],nid))='N' then do;
	 inschool[i]=1;
	end;
  end;

  /* now let's create a version that is in-school only */
  schlloc=loc(inschool=1); /* rows/cols of inschool kids */
  amatis=amat[schlloc,schlloc]; /* just pull those rows/cols */
  amatis_id=amat_id[schlloc,1];



  /* now write out files */
  odg=amatis[,+];


 
  rnormmat=j(nrow(amatis),nrow(amatis),0);
  /* write out the edgelist */
  create work.edgedat var{"n1" "n2" "rnorm"};
  do i=1 to nrow(amatis);
    iloc=loc(amatis[i,]>0);
	if type(iloc)='N' then do;
	  do j=1 to ncol(iloc);
	     n1=i;
		 n2=iloc[j];
		 rnorm=amatis[i,n2]/odg[i]; /* get a row-normalized valued mat */
		 rnormmat[i,n2]=rnorm;
		 append;
	  end;
	end;
   end;

   /* just checking */
   rnodg=rnormmat[,+];
   print odg rnodg; /*should equal 1 */


   /* statnet works on an id list from 1 to N, so if you have an isolate for your 
      largest N, it will miss it in the network.  To fix this, I simply add an
      edge from ego to himself for the highest N in the net. STATNET ignores these
      unless I explicitly tell it to look for loops, but it does set the ID range.*/

   n1=nrow(amatis);
   n2=nrow(amatis);
   rnomr=0;
   append;


  /* now create a matrix of overlapping extra-curricular activities.
     We'll write that out as a dyad file too.  Note that STATNET can 
     create lots of dyad-like measures based on node attributes (differences,
     matching, etc.), so you only need a file like this for dyad-level covariates
     that are not simple computations from the node-level file */

 use work.sdat;
  read all var("s44a1":"s44a33") into acts; /* person by activity indicator matrix */
  ovrlpacts=acts*acts`; /* person to person projection of the activity matrix */

  nr=nrow(ovrlpacts);
  print nr;


  create work.dydat var{"n1" "n2" "ovlpec"};
  do n1=1 to nrow(ovrlpacts);
    do n2=1 to nrow(ovrlpacts);
	  if n1^=n2 & ovrlpacts[n1,n2]>0 then do;
	    ovlpec=ovrlpacts[n1,n2];
		append;
	  end;
	end;
  end;
  /* again, same trick for largest value */
  n1=nrow(ovrlpacts);
  n2=nrow(ovrlpacts);
  ovlpec=0;
  append;

quit; /* exit IML */

/* now use the send2r macro to write out the data and 
   create the r script */

%let spanmac = c:\jwm\sas\macros\;

%include "&spanmac.send2r.mac";
%send2r(libdir = c:/jwm/conferences/columbia/workshop/ ,  /*physical directory where the xport files will be put */
	    xptedat= edgepi,   /*SAS export dataset name for edge file*/
        xptdydat= dycovpi,  /*SAS export dataset name for all-dyadcovar file*/
        xptvdat= nodespi,   /*SAS export dataset name for node-level covar file*/
        redatnm= s_edge,   /*Name for R data object w. edge information*/
        rdydatnm= s_dycov,  /*Name for R data object w. all-dyadcovar info*/
        rvdatnm= s_nodes,   /*Name for R data object w. all node covar info*/
        rscript=sas2r_lnam.r,   /*name of the R script file you will run. Will be placed in libdir*/
		rnetnm= s_friends, /* name for R network object -- the thing created from the edges */
        rnetcovnm=s_clubs, /* name for R network covariate object -- thing created from dycov */
        sasedat=work.edgedat,   /*name of the SAS edge-level file, that includes edge-present covars*/
        sasvdat=work.nodevars,   /*name of the SAS node-level file*/
        sasdydat=work.dydat,  /*name of the SAS all-dyad file for dyad-level covar*/
		node1="n1",     /*ID name for one end of edges (sender)*/
        node2="n2");   /*ID name for other end of eges (recver)*/