backup: June 2011

Tuesday, June 21, 2011

Credit default swap pricing by PROC FCMP

Sometimes I feel curious about how running a simple VBA macro in Excel could beat my 8-core desktop to indefinite waiting time with 100% CPU usage. On those occasions, I wish SAS could be a rescue, since I am more familiar and confident with SAS. The good news is that in SAS 9.2, many essential Excel functions were translated by Proc FCMP and stored in a built-in dataset named sashelp.slkwxl. Then it will be more convenient for Proc FCMP to port code from Excel to SAS as a bridge. The sashelp.slkwxl dataset contains 41 functions derived from Excel as below:


Type        Function              
----------------------------------
Finance     Excel ACCRINT         
            Excel ACCRINTM        
            Excel AMORDEGRC       
            Excel AMORLINC        
            Excel COUPDAYBS       
            Excel COUPDAYS        
            Excel COUPDAYSNC      
            Excel COUPNCD         
            Excel COUPNUM         
            Excel COUPPCD         
            European DATDIF       
            Excel DB              
            Excel DISC            
            Excel DOLLARDE        
            Excel DOLLARFR        
            Excel DURATION        
            Excel EFFECT          
            Excel MDURATION       
            Excel ODDFPRICE       
            Excel ODDFYIELD       
            Excel ODDLPRICE       
            Excel ODDLYIELD       
            Excel PRICE           
            Excel PRICEDSIC       
            Excel PRICE           
            Excel RECEIVED        
            Excel TBILLEQ         
            Excel TBILLPRICE      
            Excel TBILLYIELD      
            Excel YIELD           
            Excel YIELDDISC       
            Excel YIELDMAT        
Mathematics Excel EVEN            
            Excel FACTDOUBLE      
            Excel FLOOR           
            Excel MULTINOMIAL     
            Excel ODD             
            Excel PRODUCT         
Statistics  Excel AVEDEV          
            Excel DEVSQ           
            Excel VARP

With the help of user-defined function and some financial functions from sashelp.slkwxl, we can probably develop some pretty complicated SAS programs to replace VBA macros in Excel. For example, credit default swap, a popular instrument in credit derivative market, is like a contract to exchange default risk using spread between buyer and seller. Implementing the pricing mechanism may need a number of modules, like what Gunter and Peter showed with fixed risk-neutral probabilities of default [Ref. 1]. SAS macro can hardly fit in the role as a module, since nested macro with leaky macro variables is a big headache for SAS programmers. In the codes below, I used coupdaysnc_slk() and coupncd_slk() functions from sashelp.slkwxl, which correspond to the coupdaysnc() and coupncd() functions in Excel, and another 3 user-defined functions to build a system for CDS pricing. Besides the features of manufacturing home-made function and encapsulating macros, Proc FCMP proves to be a better tool for vector/matrix operations than Data Step array. The result shows that for some financial applications, the migration from Excel to SAS is smoothed by Proc FCMP.

References:
1. Gunter Loeffler and Peter Posch. ‘Credit Risk Modeling using Excel and VBA’. The 2nd edition. Wiley, 2011.

 
/*******************READ ME*********************************************
* -  Credit default swap pricing by Proc FCMP -
*
* SAS VERSION:    9.2.2
* DATE:           22jun2011
* AUTHOR:         hchao8@gmail.com
*
****************END OF READ ME******************************************/

****************(1) MODULE-BUILDING STEP********************************;
******(1.1) CREATE A FUNCTION FOR YEAR FRACTION*************************;
options cmplib = (sashelp.slkwxl work.myfunclib);
proc fcmp outlib = work.myfunclib.finance;
   function yearfrac0(sdate, edate);
      return(datdif(sdate, edate, '30/360') / 360);
   endsub;
quit;

******(1.2) CREATE A FUNCTION FOR ACCRUED INTEREST AT SETTLEMENT*******;
proc fcmp outlib = work.myfunclib.finance;
   function aci(settlement_date, maturity_date, rate, freq);
      if settlement_date < maturity_date then 
         aci = 100 * rate / freq * (1 - coupdaysnc_slk(settlement_date, maturity_date, freq, 0) 
               / coupdays_slk(settlement_date, maturity_date, freq, 0));
      if aci = 0 or settlement_date = maturity_date then aci = 100 * rate / freq;
      return(aci);
   endsub;
quit;

******(1.3) CREATE A FUNCTION FOR NON-FLAT INTEREST RATE STRUCTURE******;
option mstored sasmstore = work;
%macro intspot_macro() / store source;
   %let data = %sysfunc(dequote(&data));
   proc sql noprint;
      select count(*) into :nobs from &data;
   quit;
%mend;

proc fcmp outlib = work.myfunclib.finance;
   function intspot(data $,  year);
      array spots[1, 2] / nosymbols;
      rc1 = run_macro('intspot_macro', data, nobs);
      call dynamic_array(spots, nobs, 2);
      rc2 = read_array(data, spots, 't', 'spotrate');
      if nobs = 1 then intspot = spots[1, 2];
      else do;
         if year le spots[1, 1] then intspot  = spots[1, 2];
         else if year ge spots[nobs, 1] then intspot  = spots[nobs, 2];
         else do;
            i = 1; 
            do until(spots[i, 1] gt year);
                 i + 1; 
                 intspot = spots[i-1, 2] + (spots[i, 2] - spots[i-1, 2])*(year - spots[i-1, 1]) 
                          / (spots[i, 1] - spots[i-1, 1]) ;
            end;
         end;
      end;
      return(intspot);
   endsub;
quit;

******(1.4) CREATE A MACRO TO EVALUATE CREDIT DEFAULT SWAP SPREAD******;
%macro cdsprice(n = 20, Settlement_date = '15jul2006'd, Maturity_date = '15jul2013'd, 
                rate = 0.07125, freq = 2, recovery_rate = 0.4, 
                compounding = 2, pay_freq = 4, pd = 0.0197, 
                outfile = );
   options mlogic mprint cmplib = (sashelp.slkwxl work.myfunclib) 
           nocenter mstored sasmstore = work;
   proc fcmp;
      mixed_date = mdy(month(&settlement_date), day(&settlement_date), year(&maturity_date) + 1);
      array default_date[&n] / nosymbols;
      default_date[1] = coupncd_slk(&settlement_date, mixed_date, &pay_freq, 0);
      do i = 2 to &n;
         default_date[i] = coupncd_slk(default_date[i-1], mixed_date, &pay_freq, 0);
      end;
      rc1 = write_array('_tmp01', default_date, 'default_date');
   quit;

   data _tmp02;
      set _tmp01;
      datdif = yearfrac0(&Settlement_date, default_date);
      spotrate = intspot('rate', datdif);  
      aci = aci(default_date, &Maturity_date, &rate, &freq) / 100;
      retain sum_pd;
         if _n_ = 1 then sum_pd = 0;
         else sum_pd =  sum_pd + &pd;
      fees = 1/&pay_freq * (1 - sum_pd) / (1 + spotrate/&compounding)**(&compounding*datdif);
      default_pay = (1 - &recovery_rate - &recovery_rate*aci)*&pd 
                    / (1 + spotrate/&compounding)**(&compounding*datdif);
   run;

   proc sql noprint;
      select sum(default_pay) / sum(fees) format = percent8.3 into: cds_spread from _tmp02;
      select intck('year', min(default_date), max(default_date)) into: period from _tmp02;
   quit;

   ods html file = "&outfile" style = money;
   title; footnote;
   proc report data = _tmp02 nowd headline split = "|";
      columns default_date aci spotrate fees default_pay ;
      define default_date / display format = date9. "Dates of|default";
      define aci          / format = percent9.2 "Accruted interest|rate";
      define spotrate     / format = percent9.2 "Non-flat interest|rate";
      define fees         / format = percent9.2 "Accruted fees";
      define default_pay  / format = percent9.2 "Default payments";
      compute after;
         line @2 "The %sysfunc(strip(&period)) year CDS Spread is:&cds_spread";
         line " ";
         line @2 "Settlement date is :%sysfunc(putn(&Settlement_date, date11.))";
         line @2 "Maturity date is :%sysfunc(putn(&Maturity_date, date11.)) ";
         line @2 "Payment frequency is :&pay_freq";
         line @2 "Reference bond coupon rate is :%sysfunc(putn(&rate, percent9.2)) ";
         line @2 "Reference bond coupon freqency is :&freq  ";
         line @2 "Compounding frequency is :&compounding " ;
         line @2 "Risk-neutral probabilities of default is :%sysfunc(putn(&pd, percent9.2))";
         line @2 "Recover rate is : %sysfunc(putn(&recovery_rate, percent9.2))";
      endcomp;
   run;
   ods html close;
%mend cdsprice;

****************(2) TESTING STEP****************************************;
******(2.1) INPUT DATA OF A TERM STRUCTURE OF INTEREST RATE*************;
data rate;
   format t 6.2 spotrate percent7.2;
   input t: SpotRate best32.;
   cards;
   0.083333333   0.055609
   /*To buy Gunter and Peter's book will have complete data*/
   10   0.057603
;;;
run;

******(2.2) RUN THE MACRO TO HAVE RESULT*******************************;
%cdsprice(outfile = c:\tmp\result.xls);

****************END OF ALL CODING***************************************;

Thursday, June 16, 2011

Macros to estimate value at risk

Value-at-risk (VaR) measures the risk of loss on a specific portfolio of financial asset. Jon Danielsson introduced how to apply the nonparametric(historic simulation) and parametric methods to estimate univariate and multivariate VaRs [Ref. 1]. And the simplest parametric ways are probably to imagine the daily returns as a normal distribution (or t-distribution) and therefore find the locations by probability. Then following his steps, for a single asset portfolio, I used Google’s adjusted returns since 2006; for a two-asset account, I used Google and Apple with a 0.3:0.7 weight ratio. The probabilities are both 1% and the money value sets at $1000. For the example below, parameters of the t-distribution were inferred by a routine of Proc NLMIXED in the first macro for univariate VaR [Ref. 2]; in the second macro, variance-covariance matrix was obtained again by the Cov() function of Proc IML for bi-variate VaRs.

SAS actually has two components for matrix computation: Proc IML and Proc FCMP. My personal experience is:
-- if code porting from VBA to SAS is needed, Proc FCMP should be the No.1 choice, because Proc FCMP is exactly built on the logic of EXCEL/VBA;
-- if code porting from Matlab or R to SAS is needed, Proc IML should be the preference, because they speak the same language.

Generally, Proc IML is more efficient than Proc FCMP for either programmer or machine (I was driven crazy by the nested loops of Proc FCMP). Of course, SAS/IML has a higher learning curve. For those who are willing to learn SAS/IML, Rick Wicklin’s new book is classical [Ref. 3]. I learned a lot by reading the free 2nd chapter online and the author’s blog. And I ordered one from Amazon and am waiting for it. Hope with it, I can delve into SAS/IML more.

References:
1. Jon Danielsson. ‘Financial Risk Forecasting’. Wiley, 2011
2. Steven Gilbert and Ling Chen. ‘Using SAS Proc NLMIXED for Robust Regression’. SAS Global 2007
3. Rick Wicklin. ’ Statistical Programming with SAS/IML Software’. SAS Publishing, 2010


/*******************READ ME*********************************************
* -  Macros to estimate value at risk  -
*
* SAS VERSION:    9.2.2
* DATE:           17jun2011
* AUTHOR:         hchao8@gmail.com
*
****************END OF READ ME******************************************/

****************(1) MODULE-BUILDING STEP********************************;
%macro univar(data = , var = , p = , value = , filepath = );
   proc sort data = &data out = _tmp01;
      by &var;
   run; 
   data _tmp02;
      set _tmp01 nobs = nobs;
      if _n_ = round(%sysevalf(&p)* nobs);
   run;
      
   ods select none;
   proc nlmixed data = _tmp01;
      parms mu 0 sigma2 1 dft 1;
      y = &var;
      pi = arcos(-1);
      z = (y-mu)/sqrt(sigma2);
      logl = lgamma(.5*(dft+1))-.5*log(sigma2)-.5*log(dft*pi)-lgamma(dft/2)
            -( (dft+1)/2 )* log(1+ (1/dft)*z**2);
      model y ~ general(logl);
      ods output ParameterEstimates = _tmp03;
   run;

   proc sql;   
      select std(&var) into: std from _tmp01;
      select estimate  into :dft from _tmp03 where parameter = 'dft';
      select estimate  into :sigma2 from _tmp03 where parameter = 'sigma2';
   quit;
   ods select all;

   data _tmp04;
      length var_desc $30;
      set _tmp02; var_value = - &var * &value; var_desc = 'Historical simulation VaR'; drop &var date;
      output;
      var_value = - &std * probit(&p) * &value; var_desc = 'Normal VaR'; 
      output;
      var_value = -sqrt(&sigma2) * tinv(&p, &dft) * &value; var_desc = 't-distribution VaR'; 
      output;
   run;

   ods html  file = "&filepath\&data..xls" gpath = "&filepath\" style = money;
   title; footnote;
   proc report data = _tmp04 nowd;
      col var_desc var_value ;
      define  var_desc /  'VaR type' style(column) = [foreground=lime just=center];
      define var_value / 'Value' format = dollar8.2 style(column) = [font_weight=bold] ;
   run;
   proc sgplot data = &data;
        histogram &var;
        density &var;
        density &var / type = kernel;
   run;
   ods html close;
%mend univar;

%macro mulvar(data =, var1 =, var2 =, p =, value =,  firstweight =, filepath = );
   %let secondweight =  %sysevalf(1 - &firstweight) ;
   data _tmp01;
      merge &data;
      by date;
      sum = &var1*&firstweight + &var2*&secondweight;
      if missing(&var1) + missing(&var2) > 0 then delete; 
   run;

   proc sort data = _tmp01 out = _tmp02;
      by sum;
   run; 
   data _tmp02;
      set _tmp02 nobs = nobs;
      if _n_ = round(%sysevalf(&p)* nobs);
   run;

   proc iml;
      start Cov(A);            
         n = nrow(A);         
         C = A - A[:,];       
         return( (C` * C) / (n-1) );
      finish;
      use _tmp01;
      read all var{&var1 &var2};
      y = &var1 || &var2;
      w = {&firstweight , &secondweight};
      sigma = sqrt(t(w)* Cov(y)* w);
      call symput('sigma', left(char(sigma)));
   quit;

   data _tmp03;
      length var_desc $30;
      set _tmp02; var_value = - sum * &value; var_desc = 'Historical simulation VaR'; keep var_:;
      output;
      var_value = - &sigma * probit(&p) * &value; var_desc = 'Normal VaR'; 
      output;
   run;

   ods html  file = "&filepath\%sysfunc(compress(&data)).xls" gpath = "&filepath\" style = money;
   title; footnote;
   proc report data = _tmp03 nowd;
      col var_desc var_value ;
      define  var_desc /  'VaR type' style(column) = [foreground=lime just=center];
      define var_value / 'Value' format = dollar8.2 style(column) = [font_weight=bold] ;
   run;
   ods graphics on;
   ods select ContourPlot BivariateHistogram;
   proc kde data = _tmp01; 
      bivar &var1 &var2 / plots= all;
   run;
   ods graphics off;
   ods html close;
%mend mulvar;

****************(2) TESTING STEP****************************************;
%getr(startday = '01jan2006'd, squote = goog, filepath = c:\tmp, 
      rpath = c:\Program Files\R\R-2.13.0\bin);
%getr(startday = '01jan2006'd, squote = aapl, filepath = c:\tmp, 
      rpath = c:\Program Files\R\R-2.13.0\bin);
%univar(data = goog, var = goog_r, p = 0.01, value = 1000, filepath = c:\tmp );
%mulvar(data = goog aapl, value = 1000,  p = 0.01, var1 = goog_r, var2 = aapl_r, 
      firstweight = 0.3, filepath = c:\tmp);
      
****************END OF ALL CODING***************************************;

Saturday, June 11, 2011

Using PROC IML to deploy EWMA model for market risk

Exponentially weighted moving average (EWMA) model, which is a straight-forward moving average method in market risk, could memorize the finite market movements and address the relationship among multiple asset prices. Comparing with other multivariate volatility solutions like OGARCH or DCC, it usually shows the strongest fluctuations during the time span [Ref.1].

Since EWMA would utilize the variance-covariance matrices, Proc IML in SAS is the best match. In his blog, Rick Wicklin introduced a Cov() function in SAS/IML to create the sample covariance matrix for a given matrix [Ref. 2]. Thus I implemented this IML function in an EWMA() macro below and analyzed the correlations of the daily market returns between Google and Apple since 2005. As the result, SAS/IML proves very efficient and robust. Interestingly, in this experiment, I found that if a matrix or a vector is declared with specified size before the computation step, the program’s efficiency would be significantly improved. It may suggest that declaring matrices explicitly could be a good habit for SAS/IML programming.

References:
1. Jon Danielsson. ‘Financial Risk Forecasting’. Wiley, 2011.
2. Rick Wicklin. ‘Computing Covariance and Correlation Matrices’. The Do Loop. 08DEC2010.

/*******************READ ME*********************************************
* -  Using Proc IML to deploy EWMA model for market risk  -
*
* SAS VERSION:    9.2.2
* DATE:           12jun2011
* AUTHOR:         hchao8@gmail.com
*
****************END OF READ ME******************************************/

****************(1) MODULE-BUILDING STEP********************************;
%macro EWMA(squote1 =, squote2 =, slambda =, filepath = );
   /****************************************************************
   *  MACRO:      EWMA()
   *  GOAL:       estimate correlations between two stocks 
   *              according to market daily returns by EWMA
   *  PARAMETERS: squote1   = first stock share 
   *              squote2   = second stock share
   *              slambda   = weight coefficient
   *              filepath  = file path to input data and output plot
   *****************************************************************/
   options mlogic mprint;
   data &squote1;
      infile "&filepath\&squote1..csv" delimiter = ',' missover dsd firstobs=2;
      format date yymmdd10.; informat date yymmdd10.; 
      format r best32.; informat r best32.;
      input date $ r;
   run;
   data &squote2;
      infile "&filepath\&squote2..csv" delimiter = ',' missover dsd firstobs=2;
      format date yymmdd10.; informat date yymmdd10.; 
      format r best32.; informat r best32.;
      input date $ r;
   run;
   proc sql;
      create table _tmp01 as 
      select a.date, a.r as r1, b.r as r2
      from &squote1 as a, &squote2 as b
      where a.date = b.date
   ;quit;

   proc iml;
      start Cov(A);            
         n = nrow(A);         
         C = A - A[:,];       
         return( (C` * C) / (n-1) );
      finish;
      use _tmp01;
      read all var{date r1 r2};
      N = nrow(r1);
      y = r1 || r2;
      lambda = &slambda;
      EWMA=J(N, 3);
      S = J(2, 2);
      S = Cov(y);
      EWMA[1, ] = S[1,1] || S[2,2] || S[1, 2];
      do i = 2 to N;
         S = lambda * S  + (1-lambda) * t(y[i, ]) * y[i, ];
         EWMA[i, ] = S[1,1] || S[2,2] || S[1, 2];
      end;
     create _tmp02 from EWMA ;
     append from EWMA;
   quit;

   data  _tmp03;
      set  _tmp02;
      rho = col3 / sqrt(col1 * col2);
      keep rho;
   run;
   data _tmp04;
      set _tmp01;
      set _tmp03;
   run;

   proc template;
      define statgraph ewmaplot;
      begingraph / designwidth=1000px designheight=800px;
      layout lattice / columns=1 columndatarange=union rowweights=(0.3 0.3 0.4);
         layout overlay / cycleattrs=true xaxisopts=(label="Returns of %upcase(&squote1)") 
            yaxisopts=(griddisplay=on label=" " display=(line) displaysecondary=all);
            seriesplot x=date y=r1;
         endlayout;
         layout overlay / cycleattrs=true xaxisopts=(label="Returns of %upcase(&squote2)")
            yaxisopts=(griddisplay=on label=" " display=(line) displaysecondary=all); 
            seriesplot x=date y=r2 ;
         endlayout;
         layout overlay / cycleattrs=true  xaxisopts=(label="Correlation by EWMA") 
            yaxisopts=(griddisplay=on label=" " display=(line) displaysecondary=all);
             seriesplot x=date y=rho;
         endlayout;
      endlayout;
      endgraph;
      end;
   run;
   ods html  gpath = "&filepath" style = harvest;
   proc sgrender data=_tmp04 template=ewmaplot; 
   run;
   ods html close;
%mend EWMA;

****************(2) TESTING STEP****************************************;
%getr(startday = '01jan2005'd, squote = goog, filepath = c:\tmp, 
      rpath = c:\Program Files\R\R-2.13.0\bin);
%getr(startday = '01jan2005'd, squote = aapl, filepath = c:\tmp, 
      rpath = c:\Program Files\R\R-2.13.0\bin);
%ewma(squote1 = goog, squote2 = aapl, slambda = 0.94, filepath = c:\tmp);

****************END OF ALL CODING***************************************;

Wednesday, June 8, 2011

Bootstrap prediction models for probability of default

Not like consumer credit scoring, corporate default study is usually jeopardized by the low-n-low-p data sizes. In the fourth chapter of their book, Gunter and Peter, demonstrated an example about how to construct prediction models for IDR (invesment grade default rate) using VBA and therefore evaluate them by residual sum of squares [Ref. 1]. The only shortcoming is that the variables are limited and the observations are scarce (25 total, 22 valid), which makes me feel awkward to estimate the distribution. In this case, bootstrapping may be a good alternative, since it is a simple and straightforward method to increase predictability. Previously, Wensui showed the logit bootstrapping for credit risk by Proc LOGISTIC and Proc GENMOD [Ref. 2]. Data transformation was conducted by Data Step merge first raised by Liang Xie [Ref. 3].

The GLMSELECT procedure in SAS 9.2.2 harnesses the power of variable selection and bootstrapping together. In the example, the trend of IDR is hard to tell, even with a 3-year moving average chart. Thus, I chose 10000 times of resampling. As the result, 3 of the four predictors remained with their corresponding satisfying parameters.

References:
1. Gunter Loeffler and Peter Posch. ‘Credit Risk Modeling using Excel and VBA’. The 2nd edition. Wiley.
2. Wensui Liu. ‘Improving credit scoring by generalized additive model’. SAS Global 2007.
3. Liang Xie. http://www.sas-programming.com

/*******************READ ME*********************************************
* - Bootstrap prediction models for probability of default -
*
* SAS VERSION:    9.2.2
* DATE:           09jun2011
* AUTHOR:         hchao8@gmail.com
****************END OF READ ME******************************************/

****************(1) DATA INTEGRATION/TRANSFORMATION STEP*****************;
data idr;
   infile datalines delimiter = ',' missover dsd lrecl=32767;
   format year idr prf age bbb spr best12.;
   input year idr prf age bbb spr;
   datalines;
   1981,0,-7.340255411,,27.02456779,2.77
   /*To buy Gunter and Peter's book will have the full data*/
   2005,0.030637255,3.183410997,6.673717385,45.77112235,1.91
;;;
run;

data idr_t;
   merge idr(keep=idr firstobs=2 ) 
      idr(rename=(idr=_idrforward year=_yearforward));
   year = _yearforward + 1;
   label idr = 'invesment grade default rate'
        prf = 'forecasted change in corporate profits'
        age = 'fraction of new issuers'
        bbb = 'fraction of bbb-rated issuers'
        spr = 'spread on baa bonds';
run;

****************(2) MODULE-BUILDING STEP********************************;
%macro idrbs(data =, nsamp =, out =);
   /*****************************************************************
   *  MACRO:      idrbs()
   *  GOAL:       build prediction model by variable selection and 
   *              bootstrapping
   *  PARAMETERS: data     = dataset to use
   *              nsamp    = numbers of bootstrapping
   *              out      = name of scored dataset
   *****************************************************************/
   proc sgplot data = &data;
      title 'the invesment grade default rates by years';
      series x = year y = idr;
      yaxis grid;
   run;

   ods graphics on;
   proc macontrol data = &data;
      title 'three year moving average chart for invesment grade default rate';
      machart idr*year / span = 3 odstitle = title;
   run;
   proc glmselect data = &data plots = all;
      model idr = prf age bbb spr/selection = stepwise(select = press);
      modelaverage nsamples = &nsamp subset(best = 1);
      output out = &out(drop = _:) p = pred_idr;
   run;
   ods graphics off;
%mend idrbs;

%idrbs(data = idr_t, nsamp = 10000, out = idr_scored);

****************END OF ALL CODING***************************************;

Tuesday, June 7, 2011

Automate univariate volatility modeling by macros

GARCH (generalized autoregressive conditional heteroscedasticity) models are widely used in market risk industry to estimate and forecast the volatility of returns. GARCH, including many variants like A-GARCH, GJR-GARCH and E-GARCH, is especially suitable for predicting short and medium term volatility forecasts, since it is based on sound economic model and capable of capturing volatility clustering [Ref. 1].

Matlab and R dominate the skies of market risk [Ref. 2], while SAS is frequently mentioned in the area of credit risk. For univariate volatility modeling of market risk, I personally feel that SAS’s AUTOREG procedure performs better than either the fGarch package in R or the garchset() function in Matlab [Ref. 3], because in my experience it is more robust and code-efficient. The pitfall for SAS to analyze volatility is that it does not have functionality to download financial market data. Fortunately the tseries package from R provides a get.hist.quote() function which obtains data from YAHOO or OANDA [Ref. 4]. In the example below, I embedded the corresponding modules of R into a SAS’s macro getr(), and therefore downloaded and transformed the historical data of S&P 500 adjusted close prices. Then I implemented a GARCH(4,1) model in another univol() macro to estimate and forecast the volatilities until the end of this year. The results suggest that the combination of SAS, R and Excel would be useful to generate and diagnose the desired GARCH model for volatility.

References:
1. Carol Alexander. ‘Market Risk Analysis, Practical Financial Econometrics’. Wiley.
2. Jon Danielsson. ‘Financial Risk Forecasting: The Theory and Practice of Forecasting Market Risk with Implementation in R and Matlab’. Wiley.
3. The AUTOREG procedure. ‘SAS/ETS 9.2 User's Guide’. http://support.sas.com
4. The get.hist.quote() function. http://finzi.psych.upenn.edu/R/library/tseries

/*******************READ ME*********************************************
* - Automate univariate volatility modeling by macros -
*
* SAS VERSION:  9.2.2
* R VERSION:    2.13.0 (library: 'tseries', 'foreign')
* EXCEL VERSION:2007
*
* DATE:         08jun2011
* AUTHOR:       hchao8@gmail.com
****************END OF READ ME******************************************/

****************(1) MODULE-BUILDING STEP*******************************;
%macro getr(startday = '01jan2005'd, squote =, filepath =, rpath =);
   /****************************************************************
   *  MACRO:      getr()
   *  GOAL:       download adjusted close prices for specified quote
   *              and convert them into returns 
   *  PARAMETERS: startday  = start day of data to be downloaded
   *              squote     = quote symbol to be downloaded
   *              filepath  = file path for data exchange
   *              rpath     = installation path for R
   *****************************************************************/
   options mprint mlogic;
   data _null_;
      day = &startday;
      call symput('startdate', %str(put(&startday,yymmdd10.)));
      call symput('enddate', %str(put(today(),yymmdd10.)));
   run;

   proc sql;
      create table _tmp0 (string char(800));
      insert into _tmp0
      values("library(tseries,foreign)")
      values("p=get.hist.quote(instrument='quote_choose',")
      values("start='start_date',end='end_date',quote ='AdjClose',quiet=T)")
      values("y=diff(log(p))")
      values("y=y-mean(y)")
      values("result=as.data.frame(y)")
      values("write.csv(result,file='sas_path/find_result.csv')")
   ;quit;

   data _tmp1;
      set _tmp0;
      string = tranwrd(string, "start_date", "&startdate");
      string = tranwrd(string, "end_date", "&enddate");
      string = tranwrd(string, "quote_choose", lowcase("&squote"));
      string = tranwrd(string, "sas_path", translate("&filepath", "/", "\"));
      string = tranwrd(string, "find_result", "&squote");
   run;
   data _null_;
      set _tmp1;
      file "&filepath\sas_r.r";
      put string;
   run;

   options xsync xwait;
   x "cd &rpath";
   x "R.exe CMD BATCH --vanilla --slave &filepath\sas_r.r"; 
%mend getr;

%macro univol(predday = '31dec2011'd, q = 1, p = 0, filepath =);
   /****************************************************************
   *  MACRO:      univol()
   *  GOAL:       estimate or forcast volatilities by garch(q, p) 
   *              and plot diagnosis
   *  PARAMETERS: predday   = end day for volatility prediction
   *              q         = the subset of ARCH terms to be fitted
   *              p         = the subset of GARCH terms to be fitted
   *              filepath  = file path to exchange data
   *****************************************************************/
   options mprint mlogic;
   data _tmp01;
      infile "&filepath\result.csv" delimiter = ',' missover dsd 
         lrecl=32767 firstobs=2 end = eof;
      format date yymmdd10.; informat date yymmdd10.; 
      format r best32.; informat r best32.;
      input date $ r;
      if eof then call symput('maxdaygot', date);
   run;

   data _null_;
      format date yymmdd10.;
      date = &maxdaygot;
      dayinterval = intck('day', date, &predday);
      call symput('dayinterval', dayinterval);
   run;

   %if %eval(&dayinterval) gt 0 %then %do; 
      data _tmp02(drop=i);
         format date yymmdd10.;
         date = &maxdaygot;
         do i = 1 to &dayinterval;
            r = .;
            date + 1;
            output;
         end;
      run;
      data _tmp01;
         set _tmp01 _tmp02;
      run;
   %end;

   ods html file = "&filepath\output.xls" gpath = "&filepath\" style = harvest;
   title; footnote;
   ods graphics on;
   ods select standardresidualplot fitplot qqplot residualhistogram acfplot;
   proc autoreg data=_tmp01 all plots(unpack);
      model r = / noint garch=(q=&q, p=&p);
      output out=_tmp02 cev=v;
   run;
   ods graphics off;
   data _tmp03; 
      set _tmp02;
      length type $ 8.;    
      if r ne . then do;
         type = 'estimate'; output; end;
      else do;
         type = 'forecast'; output; end;
   run;
   
   proc print data = _tmp03 noobs;
   run;
   proc sgplot data=_tmp01;
      series y=r x=date /lineattrs=(color=blue);
      refline 0/ axis = y lineattrs = (pattern=shortdash);
   run;
   proc sgplot data=_tmp03;
      series x=date y=v/group=type;
      refline &maxdaygot/ axis = x lineattrs = (pattern=shortdash);
   run;
   ods html close;
%mend univol;

****************(2) TESTING STEP****************************************;
%getr(startday = '01jan2005'd, squote = ^gspc, filepath = c:\tmp, 
      rpath = c:\Program Files\R\R-2.13.0\bin);
%univol(predday = '31dec2011'd, q = 4, p = 1, filepath = c:\tmp);

****************END OF ALL CODING***************************************;

Saturday, June 4, 2011

The eve of Lehman Brothers' demise

Recently Moody’s warned the US government to degrade its credit rating if the nation’s debt limit increase is not approved [Ref. 1]. The news came right after Standard & Poor’s lowered US’s sovereign rating from AAA to AA. Those rating changes suggest the accumulation of default risk and may cause some butterfly effect.

Before the start of this Great Recession, without much notice, Lehman Brothers’ default probability increased drastically according to a classic model by Merton [Ref. 2]. And this change failed to be disclosed by either Standard & Poor’s rating or the stock price. Here I translated Gunter and Peter’s VBA code for Merton’s model [Ref. 3] into SAS code and reproduced the trend plot. The result clearly shows that implementation of probability and statistics may catch alert within the narrow 'escape' window, and therefore help avoid or mitigate risks .

References:
1. ‘Moody’s Warns of Downgrade for U.S. Credit’. The New York Times. 02JUN2011
2. Merton, R.C. ‘On the pricing of corporate debt: The risk structure of interest rates’. The Journal of Finance. 29(2): 449-470. 1974
3. Gunter Loeffler and Peter Posch. ‘Credit Risk Modeling using Excel and VBA’. The 2nd edition. Wiley.


/*******************READ ME*********************************************
* - The eve of Lehman Brothers' demise -
*
* SAS VERSION:    9.2.2
* DATE:           04jun2011
* AUTHOR:         hchao8@gmail.com
****************END OF READ ME******************************************/

proc fcmp outlib = work.cg.func;
   function cg_ps(s, sigma_s, d, lambda, sigma_b, t);
      d1 = (s + lambda*d) * exp(sigma_b ** 2) / (lambda *d);
      alpha = (((sigma_s*s) / (s + lambda * d))**2 * t + sigma_b**2)**0.5;
      x = probnorm(-(alpha/2) + (log(d1)/alpha)) - 
          d1 * probnorm(-(alpha/2) - (log(d1) / alpha));
      return(x);
   endsub;
run;

data record;
   input @1 date $7. @11 sp 4.2 @18 dps 20.8 @32 Vol30d  4.2 @43 s_p $2.;
   cards;
   Q4 2003   36.11  69.95612419   25.26      A+
   /*To buy Gunter and Peter's book will have the full data*/
   Q2 2008   36.81        127.8   99.93      A  
;;;
run;

options cmplib = work.cg;
data scored;
   set record;
   global_rcv = 0.5;
   vol_barrier = 0.1;
   time = 1;
   vol30d = vol30d / 100;
   pd = 1 - cg_ps(sp, vol30d, dps,  global_rcv, vol_barrier, time);
   label sp = 'Lehman Brothers'' stock price'
         pd = 'Probability of default';
run;

proc sgplot data = scored;
   series x = date  y = sp;
   series x = date  y = pd / y2axis;
   yaxis values= (0 to 90 by 10) label = 'stock price($)';
   y2axis values= (0 to 0.4 by 0.05) grid 
          label = 'Default rating by Merton''s model';
   refline  'Q4 2005' / axis = x;
   inset "Time period when S&P rating as A" / position = topright ;
   inset "Time period when S&P rating as A+" / position = topleft;
run;

Thursday, June 2, 2011

Using Proc IML for credit risk validation

Validation step is crucial for a scorecard in credit risk industry. Gunter and Peter mentioned in their fantastic book [Ref. 1] that cumulative accuracy profile (CAP) and receiver operating characteristic (ROC) are two popular methods. Thus, the values of accuracy ratio from CAP (or I refer it as Gini coefficient) and area under curve(AUC) from ROC would be important metrics to evaluate the discriminatory power of the scorecard. And actually they can be derived from each other by their linear relationship.

In the latest post of his blog, Rick Wicklin introduced how to implement the trapezoidal rule or calculate trapezoid areas under curve by a function of Proc IML [Ref. 2]. Although the same methodology can be realized by Data Step array or Proc FCMP, the beauty of Rick’s method is that it avoids the loops through IML’s matrix operation and therefore is more efficient and scalable. In the example below, I built Rick’s function into a macro to calculate AUC and accuracy ratio for a tiny testing dataset. The ‘TrapIntegral’ function can be further applied for validation of large-scale credit risk records.

References:
1. Gunter Löeffler and Peter Posch. ‘Credit Risk Modeling using Excel and VBA’. The 2nd edition. Wiley.
2. Rick Wicklin. ‘The Trapezoidal Rule of Integration’. The Do Loop. 01JUN2011.

/*******************READ ME*********************************************
* - Using Proc IML for credit risk validation -
*
* SAS VERSION:    9.2.2
* EXCEL VERSION:  2007
* DATE:           02jun2011
* AUTHOR:         hchao8@gmail.com
*
****************END OF READ ME******************************************/

****************(1) MODULE-BUILDING STEP********************************;
%macro auc(data =, path =, filename =);
   /*****************************************************************
   *  MACRO:      auc()
   *  GOAL:       calcuate auc and accuracy ratio for default risk
   *  PARAMETERS: data     = dataset to use
   *              path     = output path
   *              filename = name for validation card
   *****************************************************************/
   options mprint mlogic;
   ods listing close;
   proc sql;
      select sum(default) into :totaldef
      from &data;
   quit;

   data _tmp01;
      set &data nobs = nobs;
      xratio = ifn(default = 0, 1, 0)/(nobs - &totaldef);
      yratio = default/&totaldef;
   run;
   proc sort data = _tmp01 out = _tmp02;
      by descending rating descending default;
   run;
   data _tmp03;
      set _tmp02;
      by descending rating;
      retain x y;
      if _n_ = 1 then do; x = 0; y = 0; end; 
      x + xratio;
      y + yratio;
      if last.rating;
   run;
   data _tmp04;
      if _n_ = 1 then do; x = 0; y = 0; end; output;
      set _tmp03(keep = x y); 
   run;

   proc iml;
      use _tmp04;
      read all var{x y};
      start TrapIntegral(x,y);
         N = nrow(x);
         dx = x[2:N] - x[1:N-1];
         meanY = (y[2:N] + y[1:N-1])/2;
         return( dx` * meanY );
      finish;
      area = TrapIntegral(x,y);
      acuratio = 2*area - 1;
      call symput('area', left(char(area)));
      call symput('acuratio', left(char(acuratio)));
   quit;

   ods html file = "&path&filename..xls" gpath = "&path" style = harvest;
   title; footnote;
   proc print data = &data label noobs;
   run;

   proc sgplot data = _tmp04;
      series x = x y = y ;
      scatter x = x y = y;
      band x =x upper = y lower = 0 / transparency=.5;
      xaxis grid;
      yaxis grid;
      inset "AUC is: %sysfunc(putn(&area, 8.4)); 
            Accuracy Ratio is: %sysfunc(putn(&acuratio, 8.4))" 
            / position = bottomright border;
      keylegend "scatter";
   run;
   ods html close;

   proc datasets;
      delete _:;
   quit;
   ods listing;
%mend auc;

****************(2) TESTING STEP****************************************;
data test;      
   input Observation Rating $ Default;
   label rating = 'Rating(A is best)'
        default = 'Default(1=default)';
   cards;
1   A   0
2   A   0
3   A   0
4   B   1
5   B   0
6   B   0
7   C   1
8   C   1
9   C   1
10  C   0
;;;
run;

%auc(data = test, path = h:\, filename = valid);
****************END OF ALL CODING***************************************;