/* This is the header file for the Checker module. It is also
included by the Cases module. */
/* external function */
extern void
Check_for_Attribute_Dependence(
file probabilities_stream,
file log_stream,
case_law_specification case_law,
boolean inputable_latex);
/* This is the implementation file for the Checker module. */
#include <stdio.h>
#include <stdlib.h>
#include "shyster.h"
#include "cases.h"
#include "checker.h"
static void
error_exit(
file stream,
const string message)
{
Write_Error_Message_And_Exit(stream, "Checker", message);
}
static void
warning(
file stream,
const string message)
{
Write_Warning_Message(stream, "Checker", message, Top_Level);
}
static floating_point
factorial(
cardinal number)
/* Returns number! */
{
if (number == 0)
return 1.0;
else
return (floating_point) number *factorial(number - 1);
}
static boolean
calculate_probabilities(
attribute *attribute_pointer_X,
attribute *attribute_pointer_Y,
boolean *equivalence_function,
boolean *inverse_function,
floating_point *probability_that_or_fewer,
floating_point *probability_that_or_more)
/* Calculates the probabilities for the two attributes pointed to by
attribute_pointer_X and attribute_pointer_Y, and sets
probability_that_or_fewer and *probability_that_or_more appropriately.
Sets *equivalence_function to TRUE, if there is an equivalence function
mapping attribute X to attribute Y. Sets *inverse_function to TRUE, if
there is an inverse function mapping attribute X to attribute Y. Returns
FALSE, if there are no known pairs. */
{
matrix_element *matrix_pointer_X,
*matrix_pointer_Y;
cardinal yes_count_X = 0,
yes_count_Y = 0,
yes_yes_count = 0,
total_count = 0,
count;
floating_point multiplier,
probability;
/* assume that there is both an equivalence function and an inverse
function */
*equivalence_function = TRUE;
*inverse_function = TRUE;
matrix_pointer_Y = attribute_pointer_Y->matrix_head;
/* for every attribute value for attribute X ... */
for (matrix_pointer_X = attribute_pointer_X->matrix_head; matrix_pointer_X != NULL;
matrix_pointer_X = matrix_pointer_X->attribute_next)
if (matrix_pointer_Y != NULL) {
if ((matrix_pointer_X->attribute_value != UNKNOWN) &&
(matrix_pointer_Y->attribute_value != UNKNOWN)) {
/* both attribute values for this case are known, so count the
YESs */
if (matrix_pointer_X->attribute_value == YES) {
yes_count_X++;
if (matrix_pointer_Y->attribute_value == YES)
yes_yes_count++;
}
if (matrix_pointer_Y->attribute_value == YES)
yes_count_Y++;
total_count++;
if (matrix_pointer_X->attribute_value == matrix_pointer_Y->attribute_value)
/* the attribute values are the same */
*inverse_function = FALSE;
else
/* the attribute values are the different */
*equivalence_function = FALSE;
}
/* move to the next attribute value for attribute Y */
matrix_pointer_Y = matrix_pointer_Y->attribute_next;
}
if (total_count == 0)
/* there are no known pairs */
return FALSE;
else {
/* calculate the probability of there being exactly yes_yes_count
YES/YES pairs */
if (yes_count_X + yes_count_Y > total_count) {
count = yes_count_X + yes_count_Y - total_count;
probability = (factorial(yes_count_X) * factorial(yes_count_Y)) /
(factorial(total_count) *
factorial(yes_count_X + yes_count_Y - total_count));
} else {
count = 0;
probability = (factorial(total_count - yes_count_X) *
factorial(total_count - yes_count_Y)) /
(factorial(total_count) *
factorial(total_count - yes_count_X - yes_count_Y));
}
*probability_that_or_fewer = probability;
while (count < yes_yes_count) {
multiplier = (floating_point) ((count - yes_count_X) * (count - yes_count_Y)) /
(floating_point) ((count + 1) *
(total_count - yes_count_X - yes_count_Y + count + 1));
probability *= multiplier;
*probability_that_or_fewer += probability;
count++;
}
*probability_that_or_more = 1.0 - (*probability_that_or_fewer - probability);
return TRUE;
}
}
static void
write_probabilities_matrix(
file probabilities_stream,
file log_stream,
area *area_pointer)
/* Calculates the probabilities for the area pointed to by area_pointer, and
writes a matrix of probabilities. */
{
attribute *attribute_pointer_X,
*attribute_pointer_Y;
probability_element *probability_pointer;
boolean equivalence_function,
inverse_function;
cardinal count;
char message[Max_Error_Message_Length];
if (probabilities_stream != NULL) {
fprintf(probabilities_stream, "%s{%s area}\n\n",
Heading, area_pointer->identifier);
if (area_pointer->number_of_attributes < 2)
return;
Indent(probabilities_stream, 1);
fprintf(probabilities_stream, "\\begin{small}\n");
Indent(probabilities_stream, 2);
fprintf(probabilities_stream, "\\def\\arraystretch{0}\n");
Indent(probabilities_stream, 2);
fprintf(probabilities_stream,
"\\begin{tabular}{*{%u}{|c}|@{}p{\\doublerulesep}@{}|c|}"
"\\cline{1-%u}\n",
area_pointer->number_of_attributes - 1,
area_pointer->number_of_attributes - 1);
Indent(probabilities_stream, 3);
/* write the column headings */
for (count = 2; count <= area_pointer->number_of_attributes; count++)
fprintf(probabilities_stream, "\\smash{\\raisebox{%s}{$A_{%u}$}}&",
Raise_Height, count);
fprintf(probabilities_stream,
"\\multicolumn{2}{c}{\\raisebox{\\ht\\strutbox}{\\strut}}"
"\\\\\\cline{1-%u}\n", area_pointer->number_of_attributes - 1);
Indent(probabilities_stream, 3);
fprintf(probabilities_stream,
"\\multicolumn{%u}{c}{\\rule{0mm}{\\doublerulesep}}\\\\"
"\\cline{1-%u}\\cline{%u-%u}\n",
area_pointer->number_of_attributes + 1,
area_pointer->number_of_attributes - 1,
area_pointer->number_of_attributes + 1,
area_pointer->number_of_attributes + 1);
}
/* for every attribute X ... */
for (attribute_pointer_X = area_pointer->attribute_head;
(attribute_pointer_X != NULL) && (attribute_pointer_X->next != NULL);
attribute_pointer_X = attribute_pointer_X->next) {
if (probabilities_stream != NULL) {
Indent(probabilities_stream, 3);
if (attribute_pointer_X->number > 1)
fprintf(probabilities_stream, "\\multicolumn{%u}{c|}{}&",
attribute_pointer_X->number - 1);
}
/* for every attribute Y (i.e. every attribute after attribute X) ... */
for (attribute_pointer_Y = attribute_pointer_X->next; attribute_pointer_Y != NULL;
attribute_pointer_Y = attribute_pointer_Y->next) {
if (attribute_pointer_X->probability_head == NULL) {
/* allocate memory for this pair of probabilities (the first in
the list) */
if ((attribute_pointer_X->probability_head =
(probability_element *) malloc(sizeof(probability_element))) ==
NULL)
error_exit(log_stream,
"malloc failed during probability matrix building");
probability_pointer = attribute_pointer_X->probability_head;
} else {
/* go to the end of the list of probabilities */
for (probability_pointer = attribute_pointer_X->probability_head;
probability_pointer->next != NULL;
probability_pointer = probability_pointer->next);
/* allocate memory for this pair of probabilities */
if ((probability_pointer->next =
(probability_element *) malloc(sizeof(probability_element))) ==
NULL)
error_exit(log_stream,
"malloc failed during probability matrix building");
probability_pointer = probability_pointer->next;
}
/* calculate the probabilities for attributes X and Y */
probability_pointer->unknown = !calculate_probabilities(attribute_pointer_X,
attribute_pointer_Y, &equivalence_function, &inverse_function,
&probability_pointer->probability_that_or_fewer,
&probability_pointer->probability_that_or_more);
if (probability_pointer->unknown) {
/* there are no known pairs */
if (probabilities_stream != NULL)
fprintf(probabilities_stream, "\\footnotesize?&");
} else {
probability_pointer->functional_dependence =
equivalence_function || inverse_function;
if (probabilities_stream != NULL) {
/* write the first probability for this cell in the matrix
(the probability of the actual number of YES/YES pairs or
fewer) */
Write_Floating_Point(probabilities_stream,
probability_pointer->probability_that_or_fewer,
probability_pointer->functional_dependence ?
Functional_Dependence_Symbol :
probability_pointer->probability_that_or_fewer <= Threshold ?
Stochastic_Dependence_Symbol : Empty_String);
fprintf(probabilities_stream, "&");
}
/* issue warnings if necessary */
if (equivalence_function) {
sprintf(message,
"functional dependence (equivalence) "
"between A%u and A%u in %s area",
attribute_pointer_X->number, attribute_pointer_Y->number,
area_pointer->identifier);
warning(log_stream, message);
} else if (inverse_function) {
sprintf(message,
"functional dependence (inverse) "
"between A%u and A%u in %s area",
attribute_pointer_X->number, attribute_pointer_Y->number,
area_pointer->identifier);
warning(log_stream, message);
} else if (probability_pointer->probability_that_or_fewer <= Threshold ||
probability_pointer->probability_that_or_more <= Threshold) {
sprintf(message,
"evidence of stochastic dependence "
"between A%u and A%u in %s area",
attribute_pointer_X->number, attribute_pointer_Y->number,
area_pointer->identifier);
warning(log_stream, message);
}
}
probability_pointer->next = NULL;
}
if (probabilities_stream != NULL) {
fprintf(probabilities_stream, "&\\\\\n");
Indent(probabilities_stream, 3);
if (attribute_pointer_X->number > 1)
fprintf(probabilities_stream, "\\multicolumn{%u}{c|}{}&",
attribute_pointer_X->number - 1);
}
/* for every pair of probabilities for attribute X ... */
for (probability_pointer = attribute_pointer_X->probability_head;
probability_pointer != NULL; probability_pointer = probability_pointer->next)
if (probabilities_stream != NULL)
/* write the second probability for this cell in the matrix (the
probability of the actual number of YES/YES pairs or more) */
if (probability_pointer->unknown)
fprintf(probabilities_stream, "\\footnotesize?&");
else {
Write_Floating_Point(probabilities_stream,
probability_pointer->probability_that_or_more,
!probability_pointer->functional_dependence &&
probability_pointer->probability_that_or_more <= Threshold ?
Stochastic_Dependence_Symbol : Empty_String);
fprintf(probabilities_stream, "&");
}
if (probabilities_stream != NULL)
fprintf(probabilities_stream, "&\\smash{\\raisebox{%s}{$A_{%u}$}}\\\\"
"\\cline{%u-%u}\\cline{%u-%u}\n", Raise_Height,
attribute_pointer_X->number, attribute_pointer_X->number,
area_pointer->number_of_attributes - 1,
area_pointer->number_of_attributes + 1,
area_pointer->number_of_attributes + 1);
}
if (probabilities_stream != NULL) {
Indent(probabilities_stream, 2);
fprintf(probabilities_stream, "\\end{tabular}\n");
Indent(probabilities_stream, 1);
fprintf(probabilities_stream, "\\end{small}\n\n");
}
}
extern void
Check_for_Attribute_Dependence(
file probabilities_stream,
file log_stream,
case_law_specification case_law,
boolean inputable_latex)
/* Checks for evidence of dependence between the attributes in each area in
the specification case_law. Calculates the probabilities, and writes a
matrix of probabilities for each area to probabilities_stream (if it is
not NULL). Writes LaTeX code that can be included in another LaTeX
document (i.e. not stand-alone code), if inputable_latex is TRUE. */
{
area *area_pointer;
if (probabilities_stream != NULL) {
fprintf(probabilities_stream, "%% Probabilities file\n\n");
Write_LaTeX_Header(probabilities_stream, inputable_latex);
}
/* for every area ... */
for (area_pointer = case_law.area_head; area_pointer != NULL;
area_pointer = area_pointer->next)
write_probabilities_matrix(probabilities_stream, log_stream, area_pointer);
if (probabilities_stream != NULL)
Write_LaTeX_Trailer(probabilities_stream, inputable_latex);
}
Other SHYSTER modules: Shyster, Statutes, Cases, Tokenizer, Parser, Dumper, Scales, Adjuster, Consultant, Odometer and Reporter.