checker.h

/* This is the header file for the Checker module.  It is also
   included by the Cases module. */

/* external function */

extern void
Check_for_Attribute_Dependence(
      file probabilities_stream,
      file log_stream,
      case_law_specification case_law,
      boolean inputable_latex);

checker.c

/* This is the implementation file for the Checker module. */

#include <stdio.h>
#include <stdlib.h>
#include "shyster.h"
#include "cases.h"
#include "checker.h"

static void
error_exit(
      file stream,
      const string message)
{
   Write_Error_Message_And_Exit(stream, "Checker", message);
}

static void
warning(
      file stream,
      const string message)
{
   Write_Warning_Message(stream, "Checker", message, Top_Level);
}

static floating_point
factorial(
      cardinal number)

/* Returns number! */

{
   if (number == 0)
      return 1.0;
   else
      return (floating_point) number *factorial(number - 1);
}

static boolean
calculate_probabilities(
      attribute *attribute_pointer_X,
      attribute *attribute_pointer_Y,
      boolean *equivalence_function,
      boolean *inverse_function,
      floating_point *probability_that_or_fewer,
      floating_point *probability_that_or_more)

/* Calculates the probabilities for the two attributes pointed to by
   attribute_pointer_X and attribute_pointer_Y, and sets
   probability_that_or_fewer and *probability_that_or_more appropriately.
   Sets *equivalence_function to TRUE, if there is an equivalence function
   mapping attribute X to attribute Y. Sets *inverse_function to TRUE, if
   there is an inverse function mapping attribute X to attribute Y.  Returns
   FALSE, if there are no known pairs. */

{
   matrix_element *matrix_pointer_X,
     *matrix_pointer_Y;
   cardinal yes_count_X = 0,
      yes_count_Y = 0,
      yes_yes_count = 0,
      total_count = 0,
      count;
   floating_point multiplier,
      probability;

   /* assume that there is both an equivalence function and an inverse
      function */

   *equivalence_function = TRUE;
   *inverse_function = TRUE;

   matrix_pointer_Y = attribute_pointer_Y->matrix_head;

   /* for every attribute value for attribute X ... */

   for (matrix_pointer_X = attribute_pointer_X->matrix_head; matrix_pointer_X != NULL;
         matrix_pointer_X = matrix_pointer_X->attribute_next)
      if (matrix_pointer_Y != NULL) {
         if ((matrix_pointer_X->attribute_value != UNKNOWN) &&
               (matrix_pointer_Y->attribute_value != UNKNOWN)) {

            /* both attribute values for this case are known, so count the
               YESs */

            if (matrix_pointer_X->attribute_value == YES) {
               yes_count_X++;
               if (matrix_pointer_Y->attribute_value == YES)
                  yes_yes_count++;
            }
            if (matrix_pointer_Y->attribute_value == YES)
               yes_count_Y++;
            total_count++;

            if (matrix_pointer_X->attribute_value == matrix_pointer_Y->attribute_value)

               /* the attribute values are the same */

               *inverse_function = FALSE;
            else

               /* the attribute values are the different */

               *equivalence_function = FALSE;
         }

         /* move to the next attribute value for attribute Y */

         matrix_pointer_Y = matrix_pointer_Y->attribute_next;
      }
   if (total_count == 0)

      /* there are no known pairs */

      return FALSE;

   else {

      /* calculate the probability of there being exactly yes_yes_count
         YES/YES pairs */

      if (yes_count_X + yes_count_Y > total_count) {
         count = yes_count_X + yes_count_Y - total_count;
         probability = (factorial(yes_count_X) * factorial(yes_count_Y)) /
               (factorial(total_count) *
               factorial(yes_count_X + yes_count_Y - total_count));
      } else {
         count = 0;
         probability = (factorial(total_count - yes_count_X) *
               factorial(total_count - yes_count_Y)) /
               (factorial(total_count) *
               factorial(total_count - yes_count_X - yes_count_Y));
      }
      *probability_that_or_fewer = probability;

      while (count < yes_yes_count) {
         multiplier = (floating_point) ((count - yes_count_X) * (count - yes_count_Y)) /
               (floating_point) ((count + 1) *
               (total_count - yes_count_X - yes_count_Y + count + 1));
         probability *= multiplier;
         *probability_that_or_fewer += probability;
         count++;
      }

      *probability_that_or_more = 1.0 - (*probability_that_or_fewer - probability);
      return TRUE;
   }
}

static void
write_probabilities_matrix(
      file probabilities_stream,
      file log_stream,
      area *area_pointer)

/* Calculates the probabilities for the area pointed to by area_pointer, and
   writes a matrix of probabilities. */

{
   attribute *attribute_pointer_X,
     *attribute_pointer_Y;
   probability_element *probability_pointer;
   boolean equivalence_function,
      inverse_function;
   cardinal count;
   char message[Max_Error_Message_Length];

   if (probabilities_stream != NULL) {

      fprintf(probabilities_stream, "%s{%s area}\n\n",
            Heading, area_pointer->identifier);

      if (area_pointer->number_of_attributes < 2)
         return;

      Indent(probabilities_stream, 1);
      fprintf(probabilities_stream, "\\begin{small}\n");
      Indent(probabilities_stream, 2);
      fprintf(probabilities_stream, "\\def\\arraystretch{0}\n");
      Indent(probabilities_stream, 2);
      fprintf(probabilities_stream,
            "\\begin{tabular}{*{%u}{|c}|@{}p{\\doublerulesep}@{}|c|}"
            "\\cline{1-%u}\n",
            area_pointer->number_of_attributes - 1,
            area_pointer->number_of_attributes - 1);
      Indent(probabilities_stream, 3);

      /* write the column headings */

      for (count = 2; count <= area_pointer->number_of_attributes; count++)
         fprintf(probabilities_stream, "\\smash{\\raisebox{%s}{$A_{%u}$}}&",
               Raise_Height, count);

      fprintf(probabilities_stream,
            "\\multicolumn{2}{c}{\\raisebox{\\ht\\strutbox}{\\strut}}"
            "\\\\\\cline{1-%u}\n", area_pointer->number_of_attributes - 1);
      Indent(probabilities_stream, 3);
      fprintf(probabilities_stream,
            "\\multicolumn{%u}{c}{\\rule{0mm}{\\doublerulesep}}\\\\"
            "\\cline{1-%u}\\cline{%u-%u}\n",
            area_pointer->number_of_attributes + 1,
            area_pointer->number_of_attributes - 1,
            area_pointer->number_of_attributes + 1,
            area_pointer->number_of_attributes + 1);
   }

   /* for every attribute X ... */

   for (attribute_pointer_X = area_pointer->attribute_head;
         (attribute_pointer_X != NULL) && (attribute_pointer_X->next != NULL);
         attribute_pointer_X = attribute_pointer_X->next) {

      if (probabilities_stream != NULL) {
         Indent(probabilities_stream, 3);
         if (attribute_pointer_X->number > 1)
            fprintf(probabilities_stream, "\\multicolumn{%u}{c|}{}&",
                  attribute_pointer_X->number - 1);
      }

      /* for every attribute Y (i.e. every attribute after attribute X) ... */

      for (attribute_pointer_Y = attribute_pointer_X->next; attribute_pointer_Y != NULL;
            attribute_pointer_Y = attribute_pointer_Y->next) {

         if (attribute_pointer_X->probability_head == NULL) {

            /* allocate memory for this pair of probabilities (the first in
               the list) */

            if ((attribute_pointer_X->probability_head =
                        (probability_element *) malloc(sizeof(probability_element))) ==
                  NULL)
               error_exit(log_stream,
                     "malloc failed during probability matrix building");
            probability_pointer = attribute_pointer_X->probability_head;

         } else {

            /* go to the end of the list of probabilities */

            for (probability_pointer = attribute_pointer_X->probability_head;
                  probability_pointer->next != NULL;
                  probability_pointer = probability_pointer->next);

            /* allocate memory for this pair of probabilities */

            if ((probability_pointer->next =
                        (probability_element *) malloc(sizeof(probability_element))) ==
                  NULL)
               error_exit(log_stream,
                     "malloc failed during probability matrix building");
            probability_pointer = probability_pointer->next;

         }

         /* calculate the probabilities for attributes X and Y */

         probability_pointer->unknown = !calculate_probabilities(attribute_pointer_X,
               attribute_pointer_Y, &equivalence_function, &inverse_function,
               &probability_pointer->probability_that_or_fewer,
               &probability_pointer->probability_that_or_more);

         if (probability_pointer->unknown) {

            /* there are no known pairs */

            if (probabilities_stream != NULL)
               fprintf(probabilities_stream, "\\footnotesize?&");

         } else {

            probability_pointer->functional_dependence =
                  equivalence_function || inverse_function;

            if (probabilities_stream != NULL) {

               /* write the first probability for this cell in the matrix
                  (the probability of the actual number of YES/YES pairs or
                  fewer) */

               Write_Floating_Point(probabilities_stream,
                     probability_pointer->probability_that_or_fewer,
                     probability_pointer->functional_dependence ?
                     Functional_Dependence_Symbol :
                     probability_pointer->probability_that_or_fewer <= Threshold ?
                     Stochastic_Dependence_Symbol : Empty_String);
               fprintf(probabilities_stream, "&");
            }

            /* issue warnings if necessary */

            if (equivalence_function) {
               sprintf(message,
                     "functional dependence (equivalence) "
                     "between A%u and A%u in %s area",
                     attribute_pointer_X->number, attribute_pointer_Y->number,
                     area_pointer->identifier);
               warning(log_stream, message);
            } else if (inverse_function) {
               sprintf(message,
                     "functional dependence (inverse) "
                     "between A%u and A%u in %s area",
                     attribute_pointer_X->number, attribute_pointer_Y->number,
                     area_pointer->identifier);
               warning(log_stream, message);
            } else if (probability_pointer->probability_that_or_fewer <= Threshold ||
                  probability_pointer->probability_that_or_more <= Threshold) {
               sprintf(message,
                     "evidence of stochastic dependence "
                     "between A%u and A%u in %s area",
                     attribute_pointer_X->number, attribute_pointer_Y->number,
                     area_pointer->identifier);
               warning(log_stream, message);
            }
         }
         probability_pointer->next = NULL;
      }
      if (probabilities_stream != NULL) {
         fprintf(probabilities_stream, "&\\\\\n");
         Indent(probabilities_stream, 3);
         if (attribute_pointer_X->number > 1)
            fprintf(probabilities_stream, "\\multicolumn{%u}{c|}{}&",
                  attribute_pointer_X->number - 1);
      }

      /* for every pair of probabilities for attribute X ... */

      for (probability_pointer = attribute_pointer_X->probability_head;
            probability_pointer != NULL; probability_pointer = probability_pointer->next)
         if (probabilities_stream != NULL)

            /* write the second probability for this cell in the matrix (the
               probability of the actual number of YES/YES pairs or more) */

            if (probability_pointer->unknown)
               fprintf(probabilities_stream, "\\footnotesize?&");
            else {
               Write_Floating_Point(probabilities_stream,
                     probability_pointer->probability_that_or_more,
                     !probability_pointer->functional_dependence &&
                     probability_pointer->probability_that_or_more <= Threshold ?
                     Stochastic_Dependence_Symbol : Empty_String);
               fprintf(probabilities_stream, "&");
            }
      if (probabilities_stream != NULL)
         fprintf(probabilities_stream, "&\\smash{\\raisebox{%s}{$A_{%u}$}}\\\\"
               "\\cline{%u-%u}\\cline{%u-%u}\n", Raise_Height,
               attribute_pointer_X->number, attribute_pointer_X->number,
               area_pointer->number_of_attributes - 1,
               area_pointer->number_of_attributes + 1,
               area_pointer->number_of_attributes + 1);
   }
   if (probabilities_stream != NULL) {
      Indent(probabilities_stream, 2);
      fprintf(probabilities_stream, "\\end{tabular}\n");
      Indent(probabilities_stream, 1);
      fprintf(probabilities_stream, "\\end{small}\n\n");
   }
}

extern void
Check_for_Attribute_Dependence(
      file probabilities_stream,
      file log_stream,
      case_law_specification case_law,
      boolean inputable_latex)

/* Checks for evidence of dependence between the attributes in each area in
   the specification case_law.  Calculates the probabilities, and writes a
   matrix of probabilities for each area to probabilities_stream (if it is
   not NULL).  Writes LaTeX code that can be included in another LaTeX
   document (i.e. not stand-alone code), if inputable_latex is TRUE. */

{
   area *area_pointer;

   if (probabilities_stream != NULL) {
      fprintf(probabilities_stream, "%% Probabilities file\n\n");
      Write_LaTeX_Header(probabilities_stream, inputable_latex);
   }

   /* for every area ... */

   for (area_pointer = case_law.area_head; area_pointer != NULL;
         area_pointer = area_pointer->next)
      write_probabilities_matrix(probabilities_stream, log_stream, area_pointer);

   if (probabilities_stream != NULL)
      Write_LaTeX_Trailer(probabilities_stream, inputable_latex);
}

Other SHYSTER modules: Shyster, Statutes, Cases, Tokenizer, Parser, Dumper, Scales, Adjuster, Consultant, Odometer and Reporter.