SAS macro %AickinAlpha: Aickin's Alpha agreement coefficient

%macro AickinAlpha(data, var1, var2, wt=1, values=, agree=&var1=&var2, outdata=, printalphanow=1, epsilon=1e-8, maxniter=5000, printfmt=8.6, title=, pseudocount=1, printtable=1, level=0.95);
    %local converged i lastchange lcount level100 m ncells niter nnonemptycells unlistedvalue unlistedvalues uservalue z;
    %local dsagree dscountsTable dscountsX dscountsXComplete dsdatavaluesNotInValues dsfComplete dsout dsuserlast dsuservalues dsvalues dsvaluesX;

    %let dsuserlast = &syslast;

    %let z = probit((1+&level)/2);
    %let level100 = %sysevalf(100*&level);

    %let dsout = %NewDatasetName(tmpout);

    /*
        This macro computes the alpha coefficient described in:

        Maximum Likelihood Estimation of Agreement in the Constant Predictive Probability Model, and Its Relation to Cohen-s Kappa
        Mikel Aickin
        Biometrics 46, 293-302, June 1990.
    */;

    %let dsvalues = %NewDatasetName(values);

    proc sql;
        create table &dsvalues as
        select distinct(x.__myxvar) as __myxvar
        from
            (select distinct(&var1) as __myxvar from &data
            outer union corresponding
            select distinct(&var2) as __myxvar from &data) as x;
    quit;

    %if %length(%superq(values)) %then %do;
        * user called this macro with values defined in values=: why not use them!?!?;
        %let nvalues = %eval(1 + %length(%sysfunc(compbl(&values))) - %length(%sysfunc(compress(&values))));
        %let dsuservalues = %NewDatasetName(uservalues);

        data &dsuservalues;
        %do i = 1 %to &nvalues;
            %let uservalue = %scan(&values, &i, %str( ));
            __myxvar = &uservalue;
            output;
        %end;
        run;

        proc sort data=&dsuservalues; by __myxvar; run;
        proc sort data=&dsvalues; by __myxvar; run;

        %let dsdatavaluesNotInValues = %NewDatasetName(datavaluesNotInValues);

        data &dsdatavaluesNotInValues;
            merge &dsvalues (in=in1) &dsuservalues (in=in2);
            by __myxvar;
            if in1 and not in2;
        run;

        proc sql noprint;
            select N(__myxvar) into: unlistedvalue
            from &dsdatavaluesNotInValues;
        quit;

        %if &unlistedvalue %then %do;
            proc sql noprint;
                select __myxvar into: unlistedvalues separated by ', '
                from &dsdatavaluesNotInValues;
            quit;

            %put ERROR: [macro AickinAlpha] values found in data &data for variable &var1 (or &var2) [=&unlistedvalues] not listed in values given through AickinAlpha values= argument;
            proc datasets nolist; delete &dsdatavaluesNotInValues &dsuservalues; run;
            %goto skip;
        %end;

        proc datasets nolist;
            delete &dsvalues &dsdatavaluesNotInValues;
            change &dsuservalues = &dsvalues;
        run;
    %end;

    %let dsvaluesX = %NewDatasetName(valuesX);

    proc sql;
        create table &dsvaluesX as
        select a.__myxvar as &var1, b.__myxvar as &var2
        from &dsvalues as a, &dsvalues as b
        where not missing(a.__myxvar) and not missing(b.__myxvar);
    quit;

    %let dscountsX = %NewDatasetName(countsX);

    proc sql;
        create table &dscountsX as
        select &var1, &var2, sum(&wt) as __count
        from &data
        where not missing(&var1) and not missing(&var2)
        group &var1, &var2;
    quit;

    %let dscountsXComplete = %NewDatasetName(countsXComplete);

    proc sql;
        create table &dscountsXComplete as
        select coalesce(k.&var1, u.&var1) as &var1, coalesce(k.&var2, u.&var2) as &var2, coalesce(k.__count, 0) as __count
        from &dscountsX as k
        full join &dsvaluesX as u
        on k.&var1 eq u.&var1 and k.&var2 eq u.&var2;
    quit;

    data &dscountsXComplete;
        set &dscountsXComplete;
        __agree = &agree;
    run;

    %if &printtable %then %do;
        %let dsfComplete = %NewDatasetName(fComplete);

        proc sql noprint;
            select ceil(log10(2+max(__count))) into :lcount from &dscountsXComplete;
        quit;

        proc sql;
            create table &dsfComplete as
            select &var1, &var2, __count, __count/sum(__count) as __p, . as __i
            from &dscountsXComplete;
        quit;

        proc report data=&dsfComplete nofs style={rules=none cellspacing=0} nowd headskip headline missing;
            column &var1 &var2,(__i __count __p);
            define &var1 / group;
            define &var2 / group across;
            define __i / display style={foreground=GWH background=GWH cellwidth=1pt font_size=1};
            define __count / analysis sum 'N' f=&lcount..0;
            define __p / analysis sum '%' f=percent8.2 style={font_style=italic foreground=GRB};
        run;

        proc datasets nolist; delete &dsfComplete; run;
    %end;

        * Treat special case where the two variables compared present only one value (how can there be disagreement in that case?!??!);

        proc sql noprint;
            select N(__count), sum(__count>0), sum(__count) into :ncells, :nnonemptycells, :m
            from &dscountsXComplete;
        quit;

        %if &m = 0 %then %do;
            %let converged = 1;
            data &dsout;
                alpha = .;
                sd_alpha = .;
                n = &m;
                niter = .;
                lastchange = .;
                converged = .;
                output;
            run;
            %goto report;
        %end;
        %else %if &ncells = 1 %then %do;
            %let converged = 1;
            data &dsout;
                alpha = 1;
                sd_alpha = .;
                n = &m;
                niter = 0;
                lastchange = .;
                converged = 1;
                output;
            run;
            %goto report;
        %end;

    %let dscountsTable = %NewDatasetName(countsTable);

    proc transpose data=&dscountsXComplete out=&dscountsTable (drop=&var1 _NAME_) prefix=__z;
        var __count;
        by &var1;
        id &var2;
    run;

    %let dsagree = %NewDatasetName(agree);

    proc transpose data=&dscountsXComplete out=&dsagree (drop=&var1 _NAME_) prefix=__d;
        var __agree;
        by &var1;
        id &var2;
    run;

        %if &nnonemptycells = 1 %then %do;
            * return alpha = 0|1 depending on the agreement as defined by the rule in agree=;
            proc iml;
                use &dscountsTable;
                read all into k;

                use &dsagree;
                read all into d;

                ssize = sum(k);
                P0 = sum(d # k) / ssize;
                sd_alpha = .;
                niter = 0;
                change = .;
                converged = 1;

                outres = P0 || sd_alpha || ssize || niter || change || converged;
                   create &dsout from outres [colname={"alpha" "sd_alpha" "n" "niter" "lastchange" "converged"}];
                append from outres;
            quit;
            proc datasets nolist; delete &dscountsTable &dsagree; run;
            %goto report;
        %end;

    proc iml;
        use &dscountsTable;
        read all into k;

        use &dsagree;
        read all into d;

        m = nrow(k);

        ssize = sum(k);
        k = k + &pseudocount/m/m; * Aickin-s suggestion to ensure computational convergence;
        n = ssize + &pseudocount;
        J = J(m,1,1);

        r = J` * k`;
        c = k` * J;

        A = sum(d # k);
        P0 = A / n;

        * Initial values;
        pr = r/n;
        pc = c/n;
        s = pr * d * pc;
        alpha = (P0 - s) / (1 - s);
        niter = 0;

        do until(change < &epsilon | niter = &maxniter);
            oldalpha = alpha;
            niter = niter + 1;

            pr = r / n / (1 - alpha + alpha/s*(d * pc)`);
            pr(|1|) = 1 - sum(pr(|2:m|));

            pc = c / n / (1 - alpha + alpha/s*(pr * d)`);
            pc(|1|) = 1 - sum(pc(|2:m|));

            s = pr * d * pc;
            alpha = (P0 - s) / (1 - s);

            change = abs(alpha-oldalpha);
        end;

        if change < &epsilon then converged = 1;
        else converged = 0;

        * estimate variance of alpha MLE;

        d2Lda2 = - n * (1-s) / (1-alpha) / ((1-alpha)*s + alpha); * scalar;
        T = (1-alpha) / alpha; * scalar;
        V = alpha / s / ((1-alpha)*s + alpha); * scalar;

        pcdiff = pc(|2:m|) - pc(|1|); * m-1 x 1;
        prdiff = pr(|2:m|) - pr(|1|); * m-1 x 1;

        d2Ldadpri        = - A * pcdiff / P0 / P0; * m-1 x 1;
        d2Ldadpcj        = - A * prdiff / P0 / P0; * m-1 x 1;
        d2Ldpridprj    = diag(- r(|2:m|) / (pr(|2:m|) # pr(|2:m|))) - r(|1|) / pr(|1|) / pr(|1|) + A * (2*s*T + 1) * V * V * (pcdiff * pcdiff`); * m-1 x m-1;
        d2Ldpcidpcj    = diag(- c(|2:m|) / (pc(|2:m|) # pc(|2:m|))) - c(|1|) / pc(|1|) / pc(|1|) + A * (2*s*T + 1) * V * V * (prdiff * prdiff`); * m-1 x m-1;
        d2Ldpridpcj    = - A * V * (J(m-1,m-1,1)+I(m-1)) + A * (2*s*T + 1) * V * V * (pcdiff * prdiff`); * m-1 x m-1;

        /*
            The matrix of second derivatives is composed of vectors and matrices
            computed above. The variance of alpha-s estimator is the top-left component of its inverse.
            The matrix is symetric and constructed as follows:

                        alpha                Pr1 Pr2 ... Prq                Pc1 Pc2 ... Pcq

                        ------------        --------------------     -------------------
            alpha     - d2Lda2 -     - t(d2Ldadpri) -     - t(d2Ldadpcj) -
                        ------------     --------------------     -------------------

            Pr1         --------------------     -------------------
            Pr2         - d2Ldpridprj -     - d2Ldpridpcj -
            ...             - -     - -
            Prq         --------------------     -------------------

            Pc1                     -------------------
            Pc2                     - d2Ldpcidpcj -
            ...                     - -
            Pcq                     -------------------
        */;

        var1 = d2Lda2 || d2Ldadpri` || d2Ldadpcj`; * 1 x 2m-1;
        var2 = d2Ldadpri || d2Ldpridprj || d2Ldpridpcj; * m-1 x 2m-1;
        var3 = d2Ldadpcj || d2Ldpridpcj` || d2Ldpcidpcj; * m-1 x 2m-1;
        var = var1` || var2` || var3`; * 2m-1 x 2m-1;
        var = INV(var);
        sd_alpha = sqrt(- var(|1,1|));

        outres = alpha || sd_alpha || ssize || niter || change || converged;
           create &dsout from outres [colname={"alpha" "sd_alpha" "n" "niter" "lastchange" "converged"}];
        append from outres;
    quit;

    proc datasets nolist; delete &dsagree &dscountsTable; run;

    proc sql noprint;
        select niter, lastchange, converged into :niter, :lastchange, :converged
        from &dsout;
    quit;

    %report:

    data &dsout;
        set &dsout;
        lcl = alpha - &z * sd_alpha;
        ucl = alpha + &z * sd_alpha;
        if lcl < 0 and not missing(lcl) then lcl = 0;
        if ucl > 1 then ucl = 1;
    run;

    %if &converged eq 0 %then %do;
        %put ERROR: [macro AickinAlpha] algorithm did not converge (last change = &lastchange > epsilon=&epsilon) after &maxniter iterations;
        %goto skip;
    %end;
    %else %if &printalphanow %then %do;
        proc report data=&dsout nofs style={rules=none cellspacing=0} nowd headskip headline missing;
            column alpha sd_alpha n lcl ucl;

            define alpha / analysis "Aickin's alpha" f=&printfmt;
            define sd_alpha / analysis "(sd)" f=&printfmt;
            define n / display "n";
            define lcl /display "&level100% Lower Conf Limit" f=&printfmt;
            define ucl / display "&level100% Upper Conf Limit" f=&printfmt;

            title1 '---- Agreement -----------------------------------------------';
            title2 &title;
        run;
    %end;

    %skip:
    proc datasets nolist; delete &dscountsX &dscountsXComplete &dsvalues &dsvaluesX; run;

    %if %length(%superq(outdata)) %then %do;
        proc datasets nolist; change &dsout=&outdata; run;
    %end;
    %else %do;
        proc datasets nolist; delete &dsout; run;
    %end;;

    %let syslast = &dsuserlast;
%mend AickinAlpha;

%macro NewDatasetName(datasetname);
    %*Finds the first unused dataset named *datasetname*, adding as many underscores as necessary at its beginning to make it unique!;
    %local newdatasetname;
    %let newdatasetname = %sysfunc(compress(&datasetname));

    %do %while(%sysfunc(exist(&newdatasetname)));
        %let newdatasetname=_&newdatasetname;
    %end;

    &newdatasetname
%mend NewDatasetName;