libname system04 "c:\sasdat\system04\"; options linesize=256; options pagesize=200; data system04.cd1(compress=yes); infile "c:\sasdat\system04\rawcd1.csv" delimiter="," missover; length respid $ 10; input year respid $ agency $ ltype prop $ lpurp occup lamt preapp $ actype msa $ state $ county $ tract $ apeth $ caeth $ aprace1 $ aprace2 $ aprace3 $ aprace4 $ aprace5 $ carace1 $ carace2 $ carace3 $ carace4 $ carace5 apsex casex apinc $ ptype $ den1 $ den2 $ den3 $ ratespr $ hoepa $ lien $ edit $ sequence $ pop $ minpct $ hudmfi $ trmsa $ ownocc $ sfunits $ apflag; if (apflag=0 and msa ne "NA"); run; data system04.cd2(compress=yes); infile "c:\sasdat\system04\rawcd2.csv" delimiter="," missover; length respid $ 10; input year respid $ agency $ ltype prop $ lpurp occup lamt preapp $ actype msa $ state $ county $ tract $ apeth $ caeth $ aprace1 $ aprace2 $ aprace3 $ aprace4 $ aprace5 $ carace1 $ carace2 $ carace3 $ carace4 $ carace5 apsex casex apinc $ ptype $ den1 $ den2 $ den3 $ ratespr $ hoepa $ lien $ edit $ sequence $ pop $ minpct $ hudmfi $ trmsa $ ownocc $ sfunits $ apflag; if (apflag=0 and msa ne "NA"); run; data system04.cd3(compress=yes); infile "c:\sasdat\system04\rawcd3.csv" delimiter="," missover; length respid $ 10; input year respid $ agency $ ltype prop $ lpurp occup lamt preapp $ actype msa $ state $ county $ tract $ apeth $ caeth $ aprace1 $ aprace2 $ aprace3 $ aprace4 $ aprace5 $ carace1 $ carace2 $ carace3 $ carace4 $ carace5 apsex casex apinc $ ptype $ den1 $ den2 $ den3 $ ratespr $ hoepa $ lien $ edit $ sequence $ pop $ minpct $ hudmfi $ trmsa $ ownocc $ sfunits $ apflag; if (apflag=0 and msa ne "NA"); run; data system04.master(compress=yes); set system04.cd1 system04.cd2 system04.cd3; nstate=0; nstate=state; ncounty=0; ncounty=county; ntract=0; ntract=tract; nfips=(nstate*10000000)+(ncounty*10000)+ntract; fips=state||county||tract; rate=.; if ratespr ne "NA" then rate=ratespr; trxmsa=.; if trmsa ne "NA" then trxmsa=trmsa; hicost=0; if rate gt 0 then hicost=1; rvhicost=1; if rate gt 0 then rvhicost=0; msainc=0; if hudmfi ne "NA" then msainc=hudmfi; napinc=.; temp=.; if (apinc ne "NA" and apinc ne "na") then temp=apinc; if temp ne . then napinc=(temp*1000); nlamt=.; temp2=.; if (lamt ne "NA" and lamt ne "na") then temp2=lamt; if temp2 ne . then nlamt=(temp2*1000); vx1=0; if (prop ne '3') then vx1=1; label vx1="all sf records"; vx2=0; if (prop ne '3') and (ltype=2 or ltype=3 or ltype=4) then vx2=1; label vx2="all sf fha records"; vx3=0; if (prop ne '3') and (ltype=1) and (actype lt 6) then vx3=1; label vx3="sf conventional excl purch preapp"; vx4=0; if (prop ne '3') and (ltype=1) and (actype = 3) then vx4=1; label vx4="sf conventional denials"; vx5=0; if (prop ne '3') and (ltype=1) and (actype lt 6) and (lpurp=1) then vx5=1; label vx5="sf conventional hp apps"; vx6=0; if (prop ne '3') and (ltype=1) and (actype=1) and (lpurp=1) then vx6=1; label vx6="sf conventional hp origs"; vx7=0; if (prop ne '3') and (ltype=1) and (actype=1) and (lpurp=1) and hicost=1 then vx7=1; label vx7="hicost sf conventional hp origs"; vx8=0; if (prop ne '3') and (ltype=1) and (actype lt 6) and (lpurp=2) then vx8=1; label vx8="sf conventional hi apps"; vx9=0; if (prop ne '3') and (ltype=1) and (actype=1) and (lpurp=2) then vx9=1; label vx9="sf conventional hi orig"; vx10=0; if (prop ne '3') and (ltype=1) and (actype=1) and (lpurp=2) and hicost=1 then vx10=1; label vx10="hicost sf conventional hi origs"; vx11=0; if (prop ne '3') and (ltype=1) and (actype lt 6) and (lpurp=3) then vx11=1; label vx11="sf conventional refi apps"; vx12=0; if (prop ne '3') and (ltype=1) and (actype=1) and (lpurp=3) then vx12=1; label vx12="sf conventional refi origs"; vx13=0; if (prop ne '3') and (ltype=1) and (actype=1) and (lpurp=3) and hicost=1 then vx13=1; label vx13="hicost sf conventional refi origs"; vx14=0; if (prop = '2') then vx14=1; label vx14="all mobile home records"; vx15=0; if (prop ne '3') and (apeth='2') and (aprace1='3') then vx15=1; label vx15="nonhisp black records"; vx16=0; if (prop ne '3') and (apeth='1') then vx16=1; label vx16="hispanic records"; vx17=0; if (prop ne '3') and (apeth='2') and (aprace1='5') then vx17=1; label vx17="nonhispanic white records"; vx18=0; if (prop ne '3') and (apeth='2') and (aprace1='2') then vx18=1; label vx18="nonhispanic asian records"; vx19=0; if (prop ne '3') and (apeth='2') and (aprace1='1' or aprace1='4') then vx19=1; label vx19="nonhispanic native records"; vx20=0; if (prop ne '3') and (ltype=1) and (actype=1) and (apeth='2') and (aprace1='5') then vx20=1; label vx20="NH white sf originations"; vx21=0; if (prop ne '3') and (ltype=1) and (actype=1) and (apeth='2') and (aprace1='5') and (hicost=1) then vx21=1; label vx21="NH white hicost originations"; vx22=0; if (prop ne '3') and (ltype=1) and (actype=1) and (apeth='2') and (aprace1='3') then vx22=1; label vx22="NH black sf originations"; vx23=0; if (prop ne '3') and (ltype=1) and (actype=1) and (apeth='2') and (aprace1='3') and (hicost=1) then vx23=1; label vx23="NH black hicost originations"; vx24=0; if (prop ne '3') and (ltype=1) and (actype=1) and (apeth='1') then vx24=1; label vx24="Hispanic sf originations"; vx25=0; if (prop ne '3') and (ltype=1) and (actype=1) and (apeth='1') and (hicost=1) then vx25=1; label vx25="Hispanic hicost originations"; ransel=rannor(0); *Denial Reverse Dummy Codes. Note that denial reason reporting is only required for lenders supervised; *by OCC, OTS, and NCUA. So we only define reverse dummies for them, and we only model the credit history; *regression model for applications at these types of lenders; rvcred1=1; if actype=3 and (den1='3' or den2='3' or den3='3') and (agency='1' or agency='4' or agency='5') then rvcred1=0; label rvcred1="rv dummy1 bad credit"; rvcred2=1; if actype=3 and (den1='3') and (agency='1' or agency='4' or agency='5') then rvcred2=0; label rvcred2="rv dummy2 bad credit"; rvemploy=1; if actype=3 and (den1='2' or den2='2' or den3='2') and (agency='1' or agency='4' or agency='5') then rvemploy=0; label rvemploy="rv dummy emply hist"; rvcollat=1; if actype=3 and (den1='4' or den2='4' or den3='4') and (agency='1' or agency='4' or agency='5') then rvcollat=0; label rvcollat="rv dummy collateral"; rvcash=1; if actype=3 and (den1='5' or den2='5' or den3='5') and (agency='1' or agency='4' or agency='5') then rvcash= 0; label rvcash="rv dummy cash"; rvinfo=1; if actype=3 and (den1='6' or den1='7') and (agency='1' or agency='4' or agency='5') then rvinfo=0; label rvinfo="rv dummy info"; *For this one we define it for all lenders, because it is always required reporting; rvaband=1; if actype=5 then rvaband=0; label rvaband="rv dummy file incomplete"; napinc2=napinc*napinc; incrat=napinc/msainc; lratio=napinc/nlamt; lratio2=lratio*lratio; *note that preapproval reporting is only required for certain 'covered' preapproval programs; *therefore, preapp can be 1, yes, 2, no, or 3, not applicable, if the lender does not have a 'covered'; *pre-approval procedure; xpre=0; if preapp='1' then xpre=1; *Note, demunk is coded 1 if any application information is unreported -- sex, race, or ethnicity; demunk=0; if (apeth='3' or apeth='4') or (aprace1='6' or aprace1='7') or (apsex=3 or apsex=4) then demunk=1; female=0; if apsex=2 then female=1; hispan=0; if apeth='1' then hispan=1; native=0; if (apeth='2' and aprace1='1') then native=1; asian=0; if (apeth='2' and (aprace1='2' or aprace1='4')) then asian=1; black=0; if (apeth='2' and aprace1='3') then black=1; xoccup=0; if occup=1 then xoccup=1; conv=0; if ltype=1 then conv=1; lien2=0; if lien='2' then lien2=1; lien0=0; if lien='3' then lien0=1; r_occ=0; if agency='1' then r_occ=1; r_fdc=0; if agency='3' then r_fdc=1; r_ots=0; if agency='4' then r_ots=1; r_ncu=0; if agency='5' then r_ncu=1; r_hud=0; if agency='7' then r_hud=1; *reference category for regulator is hud; imprv=0; if lpurp=2 then imprv=1; refin=0; if lpurp=3 then refin=1; s_port=0; if actype=1 and ptype='0' then s_port=1; s_gses=0; if actype=1 and ptype='1' or ptype='2' or ptype='3' or ptype='4' then s_gses=1; s_priv=0; if actype=1 and ptype='5' then s_priv=1; s_bank=0; if actype=1 and ptype='6' then s_bank=1; s_fcom=0; if actype=1 and ptype='7' then s_fcom=1; s_affl=0; if actype=1 and ptype='8' then s_affl=1; s_othr=0; if actype=1 and ptype='9' then s_othr=1; cr_logit= -3.5010 +(napinc*(-6.43E-6)) +(lratio*0.1828) +(lratio2*-0.00007) +(xoccup*0.3193) +(lien2*0.3587) +(lien0*1.2814) +(xpre*-0.8372) +(r_ots*-0.0331) +(r_ncu*-1.4243) +(imprv*1.6149) +(refin*0.8124) +(lratio*imprv*-0.1254) +(lratio*refin*-0.0724) +(lratio*xoccup*-0.0252) +(demunk*0.4055) +(female*0.1141) +(hispan*0.8116) +(native*0.7220) +(asian*0.0131) +(black*1.1351); cr_ins=(exp(cr_logit))/(1+exp(cr_logit)); cr_ins2=cr_ins*cr_ins; **********************************; *Code distance of each state from New Mexico in the seven-dimensional space of predatory lending; *regulation; **********************************; mdsdist=.; if nstate= 1 then mdsdist= 0.8571 ; if nstate= 2 then mdsdist= 0.7820 ; if nstate= 4 then mdsdist= 0.9248 ; if nstate= 5 then mdsdist= 0.4290 ; if nstate= 6 then mdsdist= 0.9248 ; if nstate= 8 then mdsdist= 0.8571 ; if nstate= 9 then mdsdist= 0.7451 ; if nstate= 10 then mdsdist= 0.9718 ; if nstate= 11 then mdsdist= 0.3779 ; if nstate= 12 then mdsdist= 0.8290 ; if nstate= 13 then mdsdist= 0.3515 ; if nstate= 15 then mdsdist= 0.8571 ; if nstate= 16 then mdsdist= 0.8361 ; if nstate= 17 then mdsdist= 0.5432 ; if nstate= 18 then mdsdist= 0.9718 ; if nstate= 19 then mdsdist= 0.7143 ; if nstate= 20 then mdsdist= 0.8831 ; if nstate= 21 then mdsdist= 0.8621 ; if nstate= 22 then mdsdist= 1.0000 ; if nstate= 23 then mdsdist= 0.9248 ; if nstate= 24 then mdsdist= 0.7143 ; if nstate= 25 then mdsdist= 0.0724 ; if nstate= 26 then mdsdist= 0.7143 ; if nstate= 27 then mdsdist= 0.5553 ; if nstate= 28 then mdsdist= 0.8571 ; if nstate= 29 then mdsdist= 0.7685 ; if nstate= 30 then mdsdist= 0.9248 ; if nstate= 31 then mdsdist= 0.8571 ; if nstate= 32 then mdsdist= 0.8571 ; if nstate= 33 then mdsdist= 0.8571 ; if nstate= 34 then mdsdist= 0.2809 ; if nstate= 35 then mdsdist= 0.0000 ; if nstate= 36 then mdsdist= 0.1754 ; if nstate= 37 then mdsdist= 0.0911 ; if nstate= 38 then mdsdist= 1.0000 ; if nstate= 39 then mdsdist= 0.8831 ; if nstate= 40 then mdsdist= 0.9248 ; if nstate= 41 then mdsdist= 0.9248 ; if nstate= 42 then mdsdist= 0.9718 ; if nstate= 44 then mdsdist= 0.8571 ; if nstate= 45 then mdsdist= 0.3322 ; if nstate= 46 then mdsdist= 0.9248 ; if nstate= 47 then mdsdist= 0.8571 ; if nstate= 48 then mdsdist= 0.6391 ; if nstate= 49 then mdsdist= 0.8571 ; if nstate= 50 then mdsdist= 0.8290 ; if nstate= 51 then mdsdist= 0.6438 ; if nstate= 53 then mdsdist= 0.9248 ; if nstate= 54 then mdsdist= 0.0182 ; if nstate= 55 then mdsdist= 1.0000 ; if nstate= 56 then mdsdist= 0.8571 ; mdsdist2=mdsdist*mdsdist; ***********************************; *Code the individual msas that are; *part of the black belt exploitation; *cluster 7, in order of distance; *from cluster centroid; m33860=0; if msa='33860' then m33860=1; m43340=0; if msa='43340' then m43340=1; m33740=0; if msa='33740' then m33740=1; m10780=0; if msa='10780' then m10780=1; m40580=0; if msa='40580' then m40580=1; m12260=0; if msa='12260' then m12260=1; m44940=0; if msa='44940' then m44940=1; m19260=0; if msa='19260' then m19260=1; m22500=0; if msa='22500' then m22500=1; m47580=0; if msa='47580' then m47580=1; m17980=0; if msa='17980' then m17980=1; m17900=0; if msa='17900' then m17900=1; m27140=0; if msa='27140' then m27140=1; m13820=0; if msa='13820' then m13820=1; m46220=0; if msa='46220' then m46220=1; m10500=0; if msa='10500' then m10500=1; m15500=0; if msa='15500' then m15500=1; m12940=0; if msa='12940' then m12940=1; m33660=0; if msa='33660' then m33660=1; m27180=0; if msa='27180' then m27180=1; m20020=0; if msa='20020' then m20020=1; m19460=0; if msa='19460' then m19460=1; m31420=0; if msa='31420' then m31420=1; m30020=0; if msa='30020' then m30020=1; m22180=0; if msa='22180' then m22180=1; m24140=0; if msa='24140' then m24140=1; m38220=0; if msa='38220' then m38220=1; m32820=0; if msa='32820' then m32820=1; m25980=0; if msa='25980' then m25980=1; m19804=0; if msa='19804' then m19804=1; run; *********************************************; *Logistic Models to Create Denial Instruments; *Estimated only for conventional applications, excluding purchased loans, with no edit failures,; *and excluding Puerto Rico note that a narrower definition of rvcred2 performs about the same; *just a bit poorer overall model fit; *********************************************; proc logistic data=system04.master; where (actype < 6) and (ransel gt 1.95) and (conv=1) and (nstate<72) and (nfips ne .) and (edit ne '5') and (edit ne '6') and (edit ne '7') and (agency='1' or agency='4' or agency='5'); model rvcred1=napinc lratio lratio2 xoccup lien2 lien0 xpre r_ots r_ncu imprv refin lratio*imprv lratio*refin lratio*xoccup demunk female hispan native asian black / expb rsquare; units napinc=SD lratio=SD lratio2=SD; title 'Credit model, only OCC, OTS, NCUA lenders'; run; **********************************; *Segmentation Models; *This is the very final stage of the whole analysis, after all the intermediate stuff which appears below; *all the way down to the mds procedure. this step completed march 6, 2007; **********************************; proc logistic data=system04.master; where (actype = 1) and (ransel gt 2.95) and (conv=1) and (nstate<72) and (nfips ne .) and (edit ne '5') and (edit ne '6') and (edit ne '7'); model rvhicost=napinc napinc2 lratio lratio2 xoccup lien2 xpre r_occ r_ots r_fdc r_ncu r_hud imprv refin / expb rsquare; units napinc=SD napinc2=SD lratio=SD lratio2=SD; title 'Subprime Segmentation Model 1'; run; proc logistic data=system04.master; where (actype = 1) and (ransel gt 2.95) and (conv=1) and (nstate<72) and (nfips ne .) and (edit ne '5') and (edit ne '6') and (edit ne '7'); model rvhicost=napinc napinc2 lratio lratio2 xoccup lien2 xpre r_occ r_ots r_fdc r_ncu r_hud imprv refin cr_ins cr_ins2 / expb rsquare; units napinc=SD napinc2=SD lratio=SD lratio2=SD cr_ins=SD cr_ins2=SD; title 'Subprime Segmentation Model 2'; run; proc logistic data=system04.master; where (actype = 1) and (ransel gt 2.95) and (conv=1) and (nstate<72) and (nfips ne .) and (edit ne '5') and (edit ne '6') and (edit ne '7'); model rvhicost=napinc napinc2 lratio lratio2 xoccup lien2 xpre r_occ r_ots r_fdc r_ncu r_hud imprv refin demunk female hispan native asian black s_port s_priv s_bank s_fcom s_affl s_othr cr_ins cr_ins2 mdsdist mdsdist2 / expb rsquare; units napinc=SD napinc2=SD lratio=SD lratio2=SD cr_ins=SD cr_ins2=SD mdsdist=SD mdsdist2=SD; title 'Subprime Segmentation Model 3'; run; proc means data=system04.master noprint; class msa; var vx1-vx25; output out=system04.sums(compress=yes) sum=vx1-vx25; run; data system04.msa(compress=yes); infile "c:\sasdat\system04\msas.csv" delimiter="," missover; length msaname $ 75; input msa $ msaname $; run; proc sort data=system04.msa; by msa; run; proc sort data=system04.sums; by msa; run; data system04.msasum(compress=yes); merge system04.msa system04.sums; by msa; if (msa ne '99999' and _type_=1); m_fha=0; m_fha=vx2/vx1; label m_fha="fha share"; m_den=0; m_den=vx4/vx3; label m_den="denial rate, conventional sf"; m_hi=0; m_hi=vx8/vx3; label m_hi="hi share, conventional sf"; m_rf=0; m_rf=vx11/vx3; label m_rf="rf share, conventional sf"; m_mob=0; m_mob=vx14/vx1; label m_mob="mobile home share, all sf"; m_blk=0; m_blk=vx15/vx1; label m_blk="nh black share, all sf"; m_hsp=0; m_hsp=vx16/vx1; label m_hsp="hispanic share, all sf"; m_wht=0; m_wht=vx17/vx1; label m_wht="nh white share, all sf"; m_asn=0; m_asn=vx18/vx1; label m_asn="nh asian share, all sf"; m_ntv=0; m_ntv=vx19/vx1; label m_ntv="nh natve share, all sf"; m_hihp=0; m_hihp=vx7/vx6; label m_hihp="hicost share home purchase"; m_hihi=0; m_hihi=vx10/vx9; label m_hihi="hicost share home imprvemt"; m_hirf=0; m_hirf=vx13/vx12;label m_hirf="hicost share refinance"; run; proc print data=system04.msasum; var msa msaname vx1 m_fha m_den m_hi m_rf m_mob m_blk m_hsp m_wht m_asn m_ntv m_hihp m_hihi m_hirf; run; data temp; set system04.msasum; put msa msaname vx1 m_fha m_den m_hi m_rf m_mob m_blk m_hsp m_wht m_asn m_ntv m_hihp m_hihi m_hirf; run; proc factor data=system04.msasum method=principal score out=system04.fact n=4; var m_fha m_den m_hi m_rf m_mob m_blk m_hsp m_wht m_asn m_ntv m_hihp m_hihi m_hirf; title2 'unweighted pca of lending variables'; run; proc print data=system04.fact; run; *******************************************; *Diagnostics to determine how many clusters; *******************************************; proc cluster data=system04.fact method=median pseudo ccc print=30; var Factor1 Factor2 Factor3 Factor4; freq _type_; id msaname; title2 'diagnostics for MEDIAN'; run; proc cluster data=system04.fact method=average pseudo ccc print=30; var Factor1 Factor2 Factor3 Factor4; freq _type_; id msaname; title2 'diagnostics for AVERAGE'; run; proc cluster data=system04.fact method=centroid pseudo ccc print=30; var Factor1 Factor2 Factor3 Factor4; freq _type_; id msaname; title2 'diagnostics for CENTROID'; run; proc cluster data=system04.fact method=density k=3 pseudo ccc print=30; var Factor1 Factor2 Factor3 Factor4; freq _type_; id msaname; title2 'diagnostics for DENSITY'; run; proc fastclus data=system04.fact out=system04.cluster maxclusters=10 maxiter=500; freq _type_; id msaname; var Factor1 Factor2 Factor3 Factor4; run; proc sort data=system04.cluster; by cluster distance; run; proc print data=system04.cluster; var msaname cluster distance m_fha m_den m_hi m_rf m_mob m_blk m_hsp m_wht m_asn m_ntv m_hihp m_hihi m_hirf; title2 "first try, predatory urban system"; run; *************************************************************; *Classification of census tracts; *Excludes puerto rico and missing tract info; *************************************************************; proc means data=system04.master noprint; where (nstate<72) and (nfips ne .); class nfips; id msa; var vx1-vx25; output out=system04.trcttemp(compress=yes) sum=vx1-vx25; run; data system04.msa(compress=yes); infile "c:\sasdat\system04\msas.csv" delimiter="," missover; length msaname $ 75; input msa $ msaname $; run; proc sort data=system04.msa; by msa; run; proc sort data=system04.trcttemp; by msa; run; data system04.trctsum(compress=yes); merge system04.msa system04.trcttemp; by msa; if (msa ne '99999' and _type_=1); t_fha=0; if vx2 gt 0 then t_fha=vx2/vx1; label t_fha="fha share"; t_den=0; if vx4 gt 0 then t_den=vx4/vx3; label t_den="denial rate, conventional sf"; t_hi=0; if vx8 gt 0 then t_hi=vx8/vx3; label t_hi="hi share, conventional sf"; t_rf=0; if vx11 gt 0 then t_rf=vx11/vx3; label t_rf="rf share, conventional sf"; t_mob=0; if vx14 gt 0 then t_mob=vx14/vx1; label t_mob="mobile home share, all sf"; t_blk=0; if vx15 gt 0 then t_blk=vx15/vx1; label t_blk="nh black share, all sf"; t_hsp=0; if vx16 gt 0 then t_hsp=vx16/vx1; label t_hsp="hispanic share, all sf"; t_wht=0; if vx17 gt 0 then t_wht=vx17/vx1; label t_wht="nh white share, all sf"; t_asn=0; if vx18 gt 0 then t_asn=vx18/vx1; label t_asn="nh asian share, all sf"; t_ntv=0; if vx19 gt 0 then t_ntv=vx19/vx1; label t_ntv="nh natve share, all sf"; t_hihp=0; if vx7 gt 0 then t_hihp=vx7/vx6; label t_hihp="hicost share home purchase"; t_hihi=0; if vx10 gt 0 then t_hihi=vx10/vx9; label t_hihi="hicost share home imprvemt"; t_hirf=0; if vx13 gt 0 then t_hirf=vx13/vx12;label t_hirf="hicost share refinance"; run; **********************************************; *Print out the worst hicost census tracts; **********************************************; proc sort data=system04.trctsum; by descending t_hihp; run; proc print data=system04.trctsum; where (vx7>=25 and t_hihp>0.5); var nfips vx7 t_hihp vx1 t_fha t_den t_hi t_rf t_mob t_blk t_hsp t_wht t_asn t_ntv t_hihi t_hirf ; run; proc sort data=system04.trctsum; by descending t_hihi; run; proc print data=system04.trctsum; where (vx10>=25 and t_hihi>0.5); var nfips vx10 t_hihi vx1 t_fha t_den t_hi t_rf t_mob t_blk t_hsp t_wht t_asn t_ntv t_hihp t_hirf ; run; proc sort data=system04.trctsum; by descending t_hirf; run; proc print data=system04.trctsum; where (vx13>=25 and t_hirf>0.5); var nfips vx13 t_hirf vx1 t_fha t_den t_hi t_rf t_mob t_blk t_hsp t_wht t_asn t_ntv t_hihp t_hihi; run; **********************************************; *Factor and Cluster Analysis for TRACTS; **********************************************; proc factor data=system04.trctsum method=principal score out=system04.trfact n=4; var t_fha t_den t_hi t_rf t_mob t_blk t_hsp t_wht t_asn t_ntv t_hihp t_hihi t_hirf; title2 'unweighted pca of TRACT lending variables'; run; proc factor data=system04.msasum method=principal score out=system04.fact n=4; where (msa ne '10380' and msa ne '21940' and msa ne '25020' and msa ne '32420' and msa ne '38660' and msa ne '41900' and msa ne '41980' and msa ne '49500'); var m_fha m_den m_hi m_rf m_mob m_blk m_hsp m_wht m_asn m_ntv m_hihp m_hihi m_hirf; title2 "unweighted MSA pca without PR"; run; proc cluster data=system04.trfact method=centroid pseudo ccc print=300; var Factor1 Factor2 Factor3 Factor4; freq _type_; title2 'diagnostics for tract CENTROID'; run; proc fastclus data=system04.trfact out=system04.trclust maxclusters=54 maxiter=500; freq _type_; id nfips; var Factor1 Factor2 Factor3 Factor4; run; proc sort data=system04.trclust; by cluster distance; run; proc means data=system04.trclust; by cluster; run; data temp; set system04.trclust; put _ALL_; RUN; *proc print data=system04.trclust; *where cluster=43; *title2 'Cluster 43 case studies'; *run; *proc freq data=system04.trclust order=freq; *where cluster=43; *tables msaname; *weight vx1; *run; *%plotit(data=system04.fact, labelvar=cluster,plotvars=Factor3 Factor2, color=black, colors=blue); *run; ***********************************************************************; *Classification of State Predatory Lending Laws and Foreclosure Regimes; ***********************************************************************; data system04.states(compress=yes); infile "c:\sasdat\system04\states.csv" delimiter="," missover; input nfips st_ab $ pl_cov pl_pts pl_ppp pl_flp pl_hcp pl_rem pl_for; run; proc sort data=system04.states; by nfips; run; proc print data=system04.states; run; %include "c:\Program Files\Sas Institute\SAS\V8\stat\sample\xmacro.sas"; %include "c:\Program Files\Sas Institute\SAS\V8\stat\sample\stdize.sas"; %include "c:\Program Files\Sas Institute\SAS\V8\stat\sample\distnew.sas"; %distance(data=system04.states,out=system04.gowstate,id=st_ab,method=dgower,ord=pl_cov pl_pts pl_ppp pl_flp pl_hcp pl_rem pl_for); proc print data=system04.gowstate; run; proc mds data=system04.gowstate level=absolute pfinal out=system04.mdsout outres=system04.mdsres; id st_ab; run; proc sort data=system04.mdsres; by descending residual; run; proc print data=system04.mdsres; where _ROW_="NM" or _COL_="NM"; run; title1 'Plot of Overall fit'; axis1 label=(angle=90 rotate=0) minor=none order=(0 to 1 by 0.1); axis2 minor=none order=(0 to 1 by 0.1); proc print data=system04.mdsres; where proc gplot data=res; plot fitdata*fitdist/vaxis=axis1 haxis=axis2; %plotit (data=system04.mdsout, datatype=mds, labelvar=st_ab, vtoh=1.75, labfont=times); run; proc cluster data=system04.gowstate method=average rsquare pseudo; var AL--WY; id st_ab; title2 'States clustered by Average Linkage'; run; proc cluster data=system04.gowstate method=centroid rsquare pseudo; var AL--WY; id st_ab; title2 'States clustered by Centroid'; run; proc cluster data=system04.gowstate method=ward rsquare pseudo; var AL--WY; id st_ab; title2 'States clustered by Ward'; run; proc cluster data=system04.gowstate method=centroid rsquare pseudo ccc outtree=tree; var AL--WY; id st_ab; title2 'cluster of states using DGower'; run; proc tree data=tree; id st_ab; run;