Thursday, May 19, 2011

Create transition matrices by cohort approach and hazard rate approch

------------The cohort approach macro has a bug and I am working on it ------------------------------------
Cohort approach is a widely-used method in creating transition matrices for evaluation of credit risk. In the example below, I made a macro to automate this process. Although the code is for one-year period transition matrices, scorecards for multiple-year period can simply derive from it.
%macro cohort(filepath = );
   options mlogic mprint;
   data _tmp01;
      infile "&filepath\data.txt" delimiter='09'x missover dsd firstobs=2 ;
      informat id best12.; informat date mmddyy10.; informat rawrating $5.;
      input id date rawrating;
   run;
   proc format;
      value $rating
      'NR   ' =   0
      'AAA'   =   1
      'AA+'   =   2
      'AA   ' =   2
      'AA-'   =   2
      'A+   ' =   3
      'A   '  =   3
      'A-   ' =   3
      'BBB+'  =   4
      'BBB'   =   4
      'BBB-'  =   4
      'BB+'   =   5
      'BB   ' =   5
      'BB-'   =   5
      'B+   ' =   6
      'B   '  =   6
      'B-   ' =   6
      'CCC+'  =   7
      'CCC'   =   7
      'CCC-'  =   7
      'CC   ' =   7
      'C   '  =   7
      'D   '  =   8
   ;;;
   run;
   data _tmp02;
      set _tmp01;
      year = year(date);
      rating = put(rawrating, $rating.);
      drop rawrating;
   run;

   proc sort data = _tmp02;
      by id year date;
   run;
   data _tmp03;
      set _tmp02;
      by id year;
      if last.year;
      drop date;
   run;
   proc sql;
      select min(year) into :y_start from _tmp03;
      select max(year)-1 into :y_end from _tmp03;
   quit;

   proc transpose data = _tmp03 out = _tmp04 prefix = year;
      by id;
      var rating;
      id year;
   run;
   data _tmp05;
      set _tmp04;
      %do num1 = &y_start %to &y_end;
         %let num2 = %eval(&num1 + 1);
         if year&num1 not in (., 0, 8) and year&num2 = . then year&num2 = year&num1;
      %end;
   run;

   proc transpose data = _tmp05 out = _tmp06;
      by id;
      var year:;
   run;
   proc sort data = _tmp06;
      by id _name_;
      where rating ne .;
   run;
   data _tmp07;
      set _tmp06;
      year = input(substr(_name_, 5, 4), 4.);
      drop _name_;
   run;

   proc sql;
      create table _tmp08 as
      select a.id, a.year as start_year, a.rating as start_rating,
            b.year as end_year, b.rating as end_rating
      from _tmp07(where=(rating not in (0, 8))) as a, _tmp07 as b
      where a.id = b.id and b.year - a.year = 1
   ;quit;

   proc freq data = _tmp08;
      table start_rating * end_rating / nofreq nocol nopercent;
      ods output crosstabfreqs = _tmp09;
   run;
   data _tmp10;
      set _tmp09;
      length end_rating_char $3;
      end_rating_char = cats(end_rating, '');
      if end_rating = 0 then end_rating_char = 'NR';
      rowpercent = rowpercent / 100;
      where RowPercent is not missing;
   run;

   ods html file = "&filepath\tmatrice.xls" style = minimal;
   title; footnote;
   proc report data = _tmp10 nowd headline split = '|';
      col start_rating end_rating_char,RowPercent;
      define start_rating / group 'Rating at the|start of one year';
      define end_rating_char / across 'Rating at the end of one year';
      define rowpercent / '' format = percent8.2;
   run;
   ods html close;
%mend cohort;
For hazard rate approach, the generator matrix can be obtained by codes like:

data one;
    format date date9.;
    input Id Date : date9. Rating;
    datalines;
    1    30-May-00    7
    1    31-Dec-00    6
    2    21-May-03    1
    3    30-Dec-99    5
    3    30-Oct-00    6
    3    30-Dec-01    5
    4    30-Dec-01    5
    4    30-May-02    6
    5    24-May-00    2
    5    30-May-01    3
    5    30-Oct-01    2
    6    30-Dec-99    4
    6    30-Dec-01    4
    7    30-Dec-02    4
    7    23-Jun-03    5
    7    30-Dec-03    6
    7    21-May-04    5
    8    30-Dec-02    3
    9    21-May-00    2
    9    30-Dec-00    0
    10   30-Dec-04    5
    11  30-Dec-99    5
    11  30-Dec-01    6
    11  21-May-02    7
    11  30-Sep-02    8
    12  30-Dec-00    0
    13  30-Dec-99    4
    13  30-May-03    5
    14  21-Sep-99    5
    14  30-Dec-99    5
    14  30-Dec-01    5
    14  26-May-02    6
    14  21-May-04    5
    ;
run;

proc sort data = one;
   by id date;
run;

proc sql noprint;
   select max(date) into: maxdat
   from one;
quit;
data two(drop=nextid nextdat);
   merge one one(firstobs = 2 rename=(rating=nextrat id=nextid date=nextdat));
   if id ne nextid then nextrat = rating;
   if id = nextid then spell = nextdat - date;
   else spell = &maxdat - date;
   spell = spell / 365;
run;

proc sql;
   create table three as
   select a.*, a.count / b.sumspell as ratio
   from (select rating, nextrat, count(*) as count
           from two group by rating, nextrat) as a,
        (select rating, sum(spell) as sumspell
           from two group by rating) as b
   where a.rating = b.rating
;quit;

proc sql;
   create table four as
   select rating, rating as nextrat, sum(ratio) as sumratio
   from three
   where rating ne nextrat
   group by rating
;quit;

data five;
   merge three four;
   by rating nextrat;
   if rating = nextrat then ratio = -sumratio;
run;

options ls = 256 nocenter missing = 0;
proc report data = five nowd headline;
   title 'Generator maxtrix';
   columns rating nextrat,ratio ;
   define rating / group 'From';
   define nextrat / across 'To';
   define ratio / ' ' format = 8.2;
run;

2 comments:

  1. the cohort approach macro doesn't work. Any suggestions to fix the bug?

    ReplyDelete
  2. Hey, Charlie. thx for your sas code.

    Can you explain why we use max date that calculate from whole data rather than maximum date from each account?

    ReplyDelete