\section{Axioms for a Single Monad} \label{sec-single}

\subsection{A Weaker Set of Monad Rules} \label{sec-monrules}

First %, in Table~\ref{tab-eowrules},
we give a set of monad rules (\ref{extru}) to (\ref{extfun})
which is similar to those in
\S \ref{s-monads}, % Table~\ref{tab-eorules},
but replacing (\ref{extass}) by (\ref{extfun}).
Note also that (\ref{extlu}) and (\ref{extfun}) express the fact that
\ext\ is a functor.
Rule (\ref{extfun}) is apparently easier to satisfy than (\ref{extass})
% Table~\ref{tab-eorules}
since we may choose \om\ arbitrarily:
if we add to (\ref{extfun}) the extra requirement of (\ref{omext})
then we get (\ref{extass}).
But in fact (\ref{omext}) can be \emph{proved} from
rules (\ref{extru}) and (\ref{extfun}). %Table~\ref{tab-eorules}.
%
\comment{
\begin{table}%[!hbt]
\caption{Monad rules for \unit, \ext\ and \om} 
\label{tab-eowrules}
\begin{center} \textit{
\begin{tabular}{rcl@{\qquad\qquad}r}
ext f \oo\ unit & = & f & (\extru) \\
ext unit & = & id & (\extlu) \\
ext (g \om\ f) & = & ext g \oo\ ext f & (\extfun)
\end{tabular}
} \end{center} 
\end{table}
}
%
\setcounter{equation}{0}
\renewcommand\theequation{E\arabic{equation}}
% Table~\ref{tab-eowrules} was rules (\ref{extru}) to (\ref{extfun})
\begin{eqnarray}
\ext\ f \oo \unit\ & = & f \label{extru'} \\[-0.5ex]
\ext\ \unit\ & = & \id\ \label{extlu'} \\[-0.5ex]
\ext\ (g \om f) & = & \ext\ g \oo \ext\ f \label{extfun} \\[1ex]
\stepcounter{equation} \stepcounter{equation} % \stepcounter{equation}
g \om f & = & \ext\ g \oo f \label{omext}
\end{eqnarray}

In fact we can prove (\ref{omext}),
identity and associativity in the Kleisli category,
and some other useful results, from rules (\ref{extru}) to (\ref{extfun}).
% the rules of Table~\ref{tab-eowrules}.

\begin{theorem} \label{thm-sres}
Assuming rules (\ref{extru}) to (\ref{extfun})
% and defining \join\ using (\ref{joinext}),
\begin{align}
\tag{\ref{omext}} g \om f & = \ext\ g \oo f \\
\tag{\ref{omoass}} (h \om g) \oo f & = h \om (g \oo f) \\
\tag{EI} \label{extcong} \ext\ f = \ext\ g & \Rightarrow f = g \\
\tag{\ref{krid}} f \om \unit & = f \\[-0.5ex]
\tag{\ref{klid}} \unit \om f & = f \\[-0.5ex]
\tag{\ref{kass}} h \om (g \om f) & = (h \om g) \om f 
\end{align}
\comment{
\begin{quote}
\begin{tabular}{l@{\qquad}r@{\ \ }c@{\ \ }l}
\textbf{(\omext)} & g \om\ f & = & ext g \oo\ f \\
\textbf{(\extcong)} & ext f = ext g & $\Rightarrow$ & f = g \\
\textbf{(\omoass)} & (h \om\ g) \oo\ f & = & h \om\ (g \oo\ f) \\
\textbf{(\klid)} & unit \om\ f & = & f \\
\textbf{(\krid)} & f \om\ unit & = & f \\
\textbf{(\kass)} & h \om\ (g \om\ f) & = & (h \om\ g) \om\ f \\
% \textbf{(\exto)} & ext (g \oo\ f) & = & ext g \oo\ map f
\end{tabular}
\end{quote}
}
\end{theorem}

\begin{proof}
\begin{description}
\mditem[(\ref{omext}):] By (\ref{extfun}), 
$\ext\ (g \om f) \oo \unit = \ext\ g \oo \ext\ f \oo \unit$.
Using (\ref{extru}), this simplifies to $g \om f = \ext\ g \oo f$.

% \begin{center} \textit{
% \begin{tabular}{rcl@{\qquad\qquad}r}
% ext (h \om\ f) \oo\ unit & = & ext h \oo\ ext f \oo\ unit & \extfun \\
% h \om\ f & = & ext h \oo\ f & \extru,\extru
% \end{tabular}
% } \end{center} 

\mditem[(\ref{extcong}):] If $\ext\ f = \ext\ g$ then 
$\ext\ f \oo \unit = \ext\ g \oo \unit$,
that is, by (\ref{extru}), $f = g$.
% Using (\ref{extru}), this simplifies to $f = g$.

% \begin{center} \textit{
% ext f = ext g $\Rightarrow$ ext f \oo\ unit = ext g \oo\ unit 
% $\Rightarrow$ f = g 
% } \end{center} 

\mditem[(\ref{omoass}):] Using (\ref{omext}), both sides are equal to 
$\ext\ h \oo g \oo f$.

\mditem[(\ref{krid},\ref{klid},\ref{kass}):] One proof of these would proceed by 
rewriting these, using (\ref{omext}) and (\ref{extfun}),
to eliminate all uses of \om.
Then they follow from (\ref{extru}), (\ref{extlu})
and the associativity of \oo\ 
(since both sides of (\ref{kass}) become \textit{ext h \oo\ ext g \oo\ f}).
But another proof which better reflects our focus on categories and functors
is to observe that in each case, by (\ref{extcong}), 
it suffices to prove \ext(\lhs) = \ext(\rhs).
In each case, this may be proved using the fact that \ext\ is a functor,
ie, using (\ref{extfun}) and (\ref{extlu}).
That is, we use the identity and associativity rules for \id\ and \oo\ to 
prove the corresponding rules for \unit\ and \om.
\begin{description} 
\mditem[for (\ref{krid}):]
$\ext\ (f \om \unit) = \ext\ f \oo \ext\ \unit = \ext\ f \oo id = \ext\ f $
\mditem[for (\ref{klid}):]
$\ext\ (\unit \om f) = \ext\ \unit \oo \ext\ f = id \oo \ext\ f = \ext\ f $
\mditem[for (\ref{kass}):]
$\ext\ (h \om (g \om f)) = \ext\ h \oo (\ext\ g \oo \ext\ f) = $\\
\mbox{} \hfill 
  $(\ext\ h \oo \ext\ g) \oo \ext\ f = \ext\ ((h \om g) \om f)$ \hfill ~
\qed
\end{description}

\end{description}
\end{proof}

% At this point, note that 
% (\ref{extom}) follows trivially from (\ref{omext}),
% as does (\ref{extjm}) from (\ref{exto}) and (\ref{joinext}).
The following theorem is useful later.

\begin{theorem} \label{thm-ext-join-iff}
% If \join\ is defined using (\ref{joinext}),
In a monad the following are equivalent
\begin{enumerate}
\renewcommand\theenumi{(\roman{enumi})}
\renewcommand\labelenumi\theenumi
\item \label{ej1} \textit{ext g = g \oo\ join}
\item \label{ej2} \textit{g = ext (g \oo\ unit)}
\item \label{ej3} there exists $f$ such that \textit{g = ext f}
\item \label{ej4} for all $h$, \textit{ext (g \oo\ h) = g \oo\ ext h}
\end{enumerate}
\end{theorem}
\begin{proof}
\begin{description}
\comment{ now superfluous 
\item[\ref{ej3} $\Rightarrow$ \ref{ej1}]
Assume \ref{ej3}. Then

\textit{
\begin{tabular}{@{\qquad}rcl@{\qquad\qquad}r}
ext (f \om\ id) & = & ext f \oo\ ext id & (\ref{extfun}) \\
ext (ext f) & = & ext f \oo\ join & (\ref{extom}, \ref{joinext})  \\
ext g & = & g \oo\ join & 
\end{tabular} } 
end of comment }

\mditem[\ref{ej1} $\Rightarrow$ \ref{ej2}:]
Assume \ref{ej1}.  Then 
$\ext\ g \oo \map\ \unit = g \oo \join \oo \map\ \unit$
and so, by (\ref{exto}) and (\ref{MJ6}), $\ext\ (g \oo \unit) = g$.
\comment{
\begin{align*}
\ext\ (g \oo \unit) & = \ext\ g \oo \map\ \unit \tag{\ref{exto}} \\[-0.5ex]
  & = g \oo \join \oo \map\ \unit = g \tag{\ref{MJ6}}
\end{align*}
}

\comment{
\textit{
\begin{tabular}{rcl@{\qquad\qquad}r}
\ext\ g \oo\ \map\ \unit & = & g \oo\ \join\ \oo\ \map\ \unit & \\
\ext\ (g \oo\ \unit) & = & g \oo\ \ext\ \unit = g & 
  (\ref{exto}, \ref{extjm}, \ref{extlu})  
\end{tabular} } 
}

\mditem[\ref{ej2} $\Rightarrow$ \ref{ej3}:] is obvious.

\mditem[\ref{ej3} $\Rightarrow$ \ref{ej4}:]
When \textit{g = ext f}, \ref{ej4} is just (\ref{extass}).

\mditem[\ref{ej4} $\Rightarrow$ \ref{ej1}:]
Set \textit{h = id} and use (\ref{joinext}).

\mditem[\ref{ej4} $\Rightarrow$ \ref{ej2}:]
Set \textit{h = unit} and use (\ref{extlu}).
\qed
\end{description}
\end{proof}

Note also that in a monad, these properties of $g$ are preserved under
composition, that is, if they hold for $g_1$ and $g_2$ then they hold for
$g_1 \oo g_2$.
This can be seen for \ref{ej3} from (\ref{extfun}), or for \ref{ej1}
from (\ref{MJ8}) and (\ref{exto}).

Finally, we include a proof that \kl\ is a functor 
\cite[ChVI, \S 5]{maclane-categories}.
\begin{theorem}
\kl\ is a functor. 
\end{theorem}
\begin{proof}
Clearly \kl\ preserves identity: \textit{kl id = unit \oo\ id = unit}.
For composition:
\begin{align*} 
kl\ g \om kl\ f & = (\unit \oo g) \om (\unit \oo f) \\[-0.5ex]
  & = ((\unit \oo g) \om \unit) \oo f \tag{\ref{omoass}} \\[-0.5ex]
  & = (\unit \oo g) \oo f = kl\ (g \oo f) \tag*{(\ref{krid}) $\square$} 
\end{align*}
\end{proof}


From rules (\ref{extru}) to (\ref{extfun}), and so (\ref{omext}),
% we have the monad rules (\ref{extru}), (\ref{extlu}) and (\ref{extass}),
we have (\ref{extass}),
and so, assuming (\ref{joinext}) and (\ref{mapext}) as definitions, 
we can therefore assume rules (\ref{MJ1}) to (\ref{MJ8})
(Wadler\cite[\S 2.10]{wadler-essence}).
However it is now easy to outline proofs of rules (\ref{MJ1}) to (\ref{MJ8}).
Using (\ref{joinext}) and (\ref{mapext}),
(\ref{MJ8}) is proved using (\ref{extass}) and (\ref{extru}).
Using (\ref{extjm}), (\ref{MJ6}) is just (\ref{extlu}).
Using (\ref{extjm}), then (\ref{mapext}) and (\ref{joinext}) respectively,
(\ref{MJ4}) and (\ref{MJ7}) are special cases
of Theorem~\ref{thm-ext-join-iff}, \ref{ej3} $\Rightarrow$ \ref{ej1}.
Using (\ref{mapext}) and (\ref{joinext}) respectively,
(\ref{MJ3}) and (\ref{MJ5}) are cases of (\ref{extru}).
Finally, as noted earlier, (\ref{mapext}) shows that \textit{map = ext \oo\ kl}.
As \ext\ and \kl\ are functors, so is \map,
which gives us (\ref{MJ1}) and (\ref{MJ2}).

\subsection{Monad Rules Based on the Kleisli Category} \label{sec-mrkleisli}

We have proved the three rules, (\ref{krid}) to (\ref{kass}),
which reflect that the Kleisli category is indeed a category. 
We may ask whether these are sufficient to establish a monad.
If we could assume also (\ref{omext}),
then it would be easy to show rules (\ref{extru}) to (\ref{extfun}).
However, if we are (hoping to) define a monad in terms of these rules,
and in terms of the functions \unit\ and \om, then we can take
(\ref{extom}) as a definition, but that does not give us (\ref{omext}).

If we add a fourth rule, (\ref{omoIass}) 
(see below), governing \om, then that is sufficient.
For, clearly, (\ref{omoIass}) and (\ref{extom}) imply (\ref{omext}).
Note that (\ref{omoIass}) is the special case $g = \id$ of (\ref{omoass})
(shown again below) 
and is therefore true in any monad, see Theorem~\ref{thm-sres}.
Therefore we have the following theorem.
Note that, if we replaced (\ref{omoIass}) by (\ref{omouass}),
we would get, in effect, the 
``clone form'' presentation of an algebraic theory of 
Manes \cite[Chapter~1, Definition~3.2]{manes}.
In fact it is easy to show (\ref{omoass}) directly from either 
(\ref{omoIass}) or (\ref{omouass}) using the associativity of 
$\oo$ or $\om$ respectively.
%
\setcounter{equation}{0}
\renewcommand\theequation{A\arabic{equation}}
\begin{align} 
% \label{tab-assrules} was rules (\ref{krid}) to (\ref{omoIass})
f \om \unit & = f \label{krid} \\[-0.5ex]
\unit \om f & = f \label{klid} \\[-0.5ex]
h \om (g \om f) & = (h \om g) \om f \label{kass} \\[-0.5ex]
(h \om id) \oo f & = h \om f \label{omoIass} \\[1ex]
h \om (\unit \oo f) & = h \oo f 
  \label{omouass} \tag{\ref{omoIass}$'$} \\[1ex]
\ext\ g & = g \om id \label{extom} \\[1ex]
(h \om g) \oo f & = h \om (g \oo f) \label{omoass} 
\end{align}

\begin{theorem}
Rules (\ref{krid}) to (\ref{omoIass}) and definitions
(\ref{extom}), (\ref{joinext}), (\ref{mapext}) give a monad.
\end{theorem}

\comment{
\begin{table}%[!hbt]
\caption{Monad rules for \unit\ and \om} 
\label{tab-assrules}
\begin{center} \textit{
\begin{tabular}{rcl@{\qquad\qquad}r}
unit \om\ f & = & f & (\klid) \\
f \om\ unit & = & f & (\krid) \\
h \om\ (g \om\ f) & = & (h \om\ g) \om\ f & (\kass) \\
(h \om\ g) \oo\ f & = & h \om\ (g \oo\ f) 
 & (\omoIass) 
\end{tabular}
} \end{center} 
\end{table}
}

In Appendix~\ref{app-ex} are some examples showing how easy 
it often is to prove the monad rules when using rules 
(\ref{krid}) to (\ref{omoIass}).
These include two examples of compound monads, formed by 
starting with a monad $M$ and developing a more complex monad from it.

\subsection{A Free Theorem}

In the examples in Appendix~\ref{app-ex} it is trivial to 
prove rules (\ref{krid}) to (\ref{kass});
in these examples the rule (\ref{omoIass}) is the only one of the four whose
proof is not quite immediate 
(that is, although easy, it is tedious, involving several steps).
We may therefore ask whether it is really necessary.
It turns out that in typical cases it is a ``free theorem'' 
(Wadler \cite{wadler-free}).
We consider it in the ``naive set-theoretic'' fashion of 
\cite[\S 2,\S 3]{wadler-free}.
The latter sections of that paper indicate the semantic complexities 
involved in applying such results in a setting such as a typical functional
programming language, where the types cannot be regarded as sets.
Therefore the application of these results to any particular functional
programming language depends on the semantics of that language and its type
system.
First, we state a more general result.
In category theoretic terms, this result it that $\theta$ is a natural
transformation between the functors $(\_ \to \beta)$ and $(\_ \to \gamma)$.

\begin{proposition} \label{thm-param}
If $\theta : \forall \alpha.\ (\alpha \to \beta) \to (\alpha \to \gamma)$
is a function which is polymorphic in $\alpha$,
but where $\beta$ and $\gamma$ are fixed types, then
\begin{enumerate}
\renewcommand\theenumi{(\roman{enumi})}
\renewcommand\labelenumi\theenumi
\item \label{par1} there exists 
$h : \beta \to \gamma$ such that (for all $f : \alpha \to \beta$)
$\theta f = h \circ f$
\item \label{par2} 
for all $f : \alpha \to \alpha'$ and $g : \alpha' \to \beta$,
$\theta\ (g \circ f) = \theta\ g \circ f$
\end{enumerate}
\end{proposition}

The result \ref{par1} may be explained intuitively as follows.
Since $\theta$ is polymorphic in $\alpha$, the coding of the function $\theta$
cannot use any information about the type $\alpha$.
That is, although an argument $a : \alpha$ to the function $\theta f$
may be an integer, tree of strings, or whatever, $\theta$ (on its own)
can only treat $a$ as a ``black box''.
So the only thing that $\theta f$ can do with $a$ (other than ignore it)
is to apply $f$ to it.

Wadler \cite[\S 3]{wadler-free} shows how to obtain results of this nature
about particular polymorphic functions, and we follow his approach.
In particular, we write the proof as though types were sets.

\begin{proof}
Clearly the two parts are equivalent ($h$ being $\theta\ \id$).
We prove \ref{par2}.
Wadler's general result is $\theta \sim \theta$, where
$\sim$ denotes the relation appropriate to the type of the terms related,
which, for $\theta$, is 
$\forall \alpha.\ (\alpha \to \beta) \to (\alpha \to \gamma)$.
This means that for all $\alpha, \alpha'$ and relation
$\sim\ \subseteq \alpha \times \alpha'$,
for all $k : \alpha \to \beta$, $k' : \alpha' \to \beta$, 
$a : \alpha$ and $a' : \alpha'$,
whenever $k \sim k'$ and $a \sim a'$, then $\theta\ k\ a \sim \theta\ k'\ a'$.
Now $k \sim k'$ means whenever $x \sim x'$,
for $x : \alpha$ and $x' : \alpha'$,
then $k\ x \sim k'\ x'$.

We choose the relation $\sim\ \subseteq \alpha \times \alpha'$
to be $a \sim a' \iff a' = f \ a$; the relations on (fixed) types
$\beta$ and $\gamma$ are the identity.
So we can simplify the above.
We get $k \sim k'$ iff, for all $x : \alpha$, $k\ x = k'\ (f\ x)$,
that is, $k = k' \circ f$.
Then, letting $k' = g$ and $k = g \circ f$, we have $k \sim k'$.
So for all $a : \alpha$, we can let $a' = f \ a$, so $a \sim a'$ and
$\theta\ k\ a \sim \theta\ k'\ a'$, that is, 
$\theta\ (g \circ f)\ a = \theta\ g\ (f\ a)$.
Therefore, $\theta\ (g \circ f) = \theta\ g \circ f$.
\qed
\end{proof}

\begin{proposition} \label{thm-par-cor}
Assuming that 
$\mbox{\om} : (\beta \to \gamma M) \to (\alpha \to \beta M) \to 
  (\alpha \to \gamma M)$
is polymorphic in $\alpha$, the rule (\ref{omoass}) holds.
\end{proposition}
\begin{proof}
Given $h : \beta \to \gamma M$, define
$\theta\ g \equiv h \odot g$.
Then Theorem~\ref{thm-param}\ref{par2} gives \\
$h \om (g \oo f) = (h \om g) \oo f$,
as required.
\qed
\end{proof}