\documentclass[%
version=last,%
a5paper,
12pt,%
titlepage=no,%
headings=small,%
bibliography=totoc,%
index=totoc,%
twoside,%
reqno,%
cleardoublepage=empty,%
open=any,%
%parskip=half,%
draft=true,%
%DIV=classic,%
DIV=12,%
%headinclude=true,%
pagesize]
%{scrartcl}
{scrbook}

%\usepackage[notref,notcite]{showkeys}

%\usepackage[headsepline]{scrpage2}
\usepackage{scrpage2}
\pagestyle{scrheadings}
\clearscrheadings
\ofoot{\pagemark}
\ifoot{\headmark}

\usepackage{moredefs,lips}

\usepackage{pstricks,pst-3dplot}

\renewcommand{\captionformat}{\ }

\usepackage{cclicenses}
\usepackage{url}

\usepackage{relsize} % Here \smaller scales by 1/1.2; \relscale{X} scales by X

\newcommand{\starred}{${}*{}$}

\renewenvironment{quote}{\begin{list}{}
{\relscale{.90}\setlength{\leftmargin}{0.05\textwidth}
\setlength{\rightmargin}{\leftmargin}}
\item[]}
{\end{list}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\usepackage[polutonikogreek,english]{babel}

% From the Greek Text Society

%\usepackage{gfsneohellenic}
\usepackage{gfsporson}

\newcommand{\gr}[1]{\foreignlanguage{polutonikogreek}{%
%\relscale{0.8}
\textporson{%
#1}}%
}%  Greek text

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\bce}{\textsc{b.c.e.}}
\newcommand{\ce}{\textsc{c.e.}}

\usepackage{amsmath,amssymb,amsthm,amscd}
\allowdisplaybreaks
\usepackage[mathscr]{euscript}
\usepackage{upgreek}
\usepackage{multicol}
\usepackage{stmaryrd}  % \triangle{left,right}eqslant
\usepackage[matrix,arrow]{xy}
\usepackage{hfoldsty}
\usepackage{verbatim}
\usepackage[neverdecrease]{paralist}
\usepackage{graphicx,rotating} % for the German script picture

%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  Theorems
%
%%%%%%%%%%%%%%%%%%%%%%%%

%\swapnumbers

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem{porism}{Porism}
\newtheorem{corollary}{Corollary}

\numberwithin{porism}{theorem}
\numberwithin{corollary}{theorem}

\theoremstyle{definition}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{example}[theorem]{Example}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\usepackage{makeidx}
\makeindex

\newcommand{\zfc}{\mathrm{ZFC}}
\newcommand{\zf}{\mathrm{ZF}}
\newcommand{\lto}{\Rightarrow}
\newcommand{\liff}{\Leftrightarrow}
%\renewcommand{\land}{\mathrel{\&}}

\newcommand{\cdotalone}{{}\cdot{}}
\newcommand{\invalone}{{}\inv}
\usepackage{bm}
\newcommand{\tuple}[1]{\bm{#1}}

\newcommand{\included}{\subseteq}      % [the name suggests the meaning here]
\newcommand{\nincluded}{\nsubseteq}      % [the name suggests the meaning here]
\newcommand{\pincluded}{\subset}      % [the name suggests the meaning here]
\renewcommand{\leq}{\leqslant}
\renewcommand{\geq}{\geqslant}
\renewcommand{\nleq}{\nleqslant}
\renewcommand{\emptyset}{\varnothing}
\renewcommand{\setminus}{\smallsetminus}
\let\standardphi\phi
\renewcommand{\phi}{\varphi}
\let\standardepsilon\epsilon
\renewcommand{\epsilon}{\varepsilon}

\newcommand{\stnd}[1]{\mathbb{#1}}
\newcommand{\N}{\stnd{N}}
\newcommand{\Z}{\stnd{Z}}         % integers
\newcommand{\Q}{\stnd{Q}}         % rationals
\newcommand{\Qp}{\Q^+}         % positive rationals
\newcommand{\rc}[1]{#1^{\mathrm{rc}}}         % real closure
\newcommand{\C}{\stnd{C}}         % complex numbers
\newcommand{\R}{\stnd{R}}         % real numbers
\newcommand{\Rp}{\R^+}         % positive real numbers
\newcommand{\diff}[1][\R]{\textrm C_{\infty}(#1)}
\newcommand{\F}{\stnd{F}}         % 
\newcommand{\Ham}{\stnd{H}}         % quaternions 
\newcommand{\Oct}{\stnd{O}}         % octonions
%\newcommand{\primes}{\stnd{P}}      % prime numbers
\newcommand{\on}{\mathbf{ON}}       % ordinals
\newcommand{\cn}{\mathbf{CN}}       % cardinals
\DeclareMathOperator{\id}{id}          % identity-map
%\newcommand{\id}{\operatorname{id}}          % identity-map
\newcommand{\gid}{\mathrm e}  % identity of group
%\newcommand{\bv}{\mathbf e}   % "basis vector"  
\newcommand{\mE}{\mathrm E}   % matrix with these as rows
\newcommand{\inv}{^{-1}}                % mult. inverse
\newcommand{\Id}{\mathrm I}

\newcommand{\It}{\mathbf{I}}

\newcommand{\Mat}[2][n\times n]{\operatorname M_{#1}(#2)}
\newcommand{\MatR}[1][n\times n]{\Mat[#1]{R}}
\newcommand{\MatZ}[1][n\times n]{\Mat[#1]{\Z}}
\newcommand{\GL}[2][n]{\operatorname{GL}_{#1}(#2)}
\newcommand{\GLZ}[1][n]{\GL[#1]{\Z}}
\newcommand{\GLR}[1][n]{\GL[#1]{R}}
\newcommand{\IM}{\mathrm I}
\newcommand{\lin}[1]{\mathscr L(#1)}
\DeclareMathOperator{\diag}{diag}
\newcommand{\trans}[1]{#1^{\mathrm t}}
\DeclareMathOperator{\adjoint}{adj}
\newcommand{\adj}[1]{\adjoint(#1)}
\DeclareMathOperator{\Span}{span}

\newcommand{\Kfg}{\mathrm V_4}     % Klein four group
\newcommand{\quat}{\mathrm Q_8}  % Quaternion group

\newcommand{\str}[1]{\mathfrak{#1}}     % structure
\newcommand{\qsep}{\;}                 % follows a quantified variable
\newcommand{\Forall}[1]{\forall{#1}\qsep }
\newcommand{\Exists}[1]{\exists{#1}\qsep }
\newcommand{\modsim}{/\mathord{\sim}}  % modulo the eq-ren \sim
\newcommand{\eqc}[1]{[#1]}             % equivalence-class

\newcommand{\divides}{\mathrel{|}}
\newcommand{\ndivides}{\mathrel{\nmid}}
\newcommand{\order}[1]{\lvert#1\rvert}
\newcommand{\gpgen}[1]{\langle#1\rangle}% subgroup generated by #1

%\newcommand{\trivgp}{\{\gid\}}  % trivial group
%\newcommand{\trivabgp}{\{0\}}  % trivial abelian group
%\newcommand{\nsubgen}[1]{\langle\langle#1\rangle\rangle}% normal subgroup generated by #1
\newcommand{\unordered}[2]{[#2]^{#1}}  % unordered #1-tuples from #2
\newcommand{\free}[1]{\operatorname{F}(#1)}  % free group on #1
\newcommand{\fggen}{I}  % generating set of a free group
\newcommand{\gprels}{B} % relations
\newcommand{\setactedon}{A}  % set acted on by a group

\newcommand{\setcolon}{\colon}

\newcommand{\subgp}{<}              % subgroup
\newcommand{\nsubgp}{\vartriangleleft}  % normal subgroup
\newcommand{\nnsubgp}{\ntriangleleft}  % not a normal subgroup
\newcommand{\nsupgp}{\vartriangleright}  % normal supergroup
%\newcommand{\nnsupgp}{\ntriangleright}  % normal supergroup
\newcommand{\psubgp}{\lneqq}
\newcommand{\psupgp}{\gneqq}

\newcommand{\Ker}[1]{\ker(#1)}
%\DeclareMathOperator{\im}{im}          % image
\newcommand{\im}[1]{\operatorname{im}(#1)}

\newcommand{\congruence}{\equiv}
\newcommand{\siml}{\congruence_{\ell}^H}
\newcommand{\simr}{\congruence_{\mathrm r}^H}

\newcommand{\weakprod}{\sideset{}{^{\mathrm{w}}}\prod}
\newcommand{\textweakprod}{\prod^{\mathrm w}}
\newcommand{\freeprod}{\sideset{}{^*}\prod}
\newcommand{\textfreeprod}{\prod^*}
\newcommand{\gpres}[2]{\gpgen{#1\mid#2}}% group on #1 with rel'ns #2
\newcommand{\centr}[1]{\operatorname{C}(#1)}  % center
\newcommand{\cseries}[2]{\operatorname{C}_{#1}(#2)} % central series
%\newcommand{\cseriesplain}[1]{\operatorname{C}_{#1}} % central series
\newcommand{\centralizer}[2]{\operatorname{C}_{#2}(#1)} % centralizer
\newcommand{\normalizer}[2]{\operatorname{N}_{#2}(#1)}
\newcommand{\dsubgp}[2]{{#2}^{(#1)}}  % n-th derived subgroup of #2, where n=#1.
\newcommand{\tsubgp}[1]{#1_{\mathrm{t}}} % torsion sub-group


\newcommand{\family}[1]{\mathcal{#1}}  % family (of sets)
\newcommand{\class}[1]{\mathbf{#1}}    % class

\newcommand{\units}[1]{{#1}^{\times}}    % group of units of a ring
\newcommand{\Zmodgp}[1]{\Z_{#1}}
\newcommand{\Zmod}[1]{\Z/(#1)}
\newcommand{\Zmodu}[1]{\units{\Zmod{#1}}}
\DeclareMathOperator{\lcm}{lcm}
\newcommand{\rest}[1]{\restriction{#1}}% restriction of function to #1
\newcommand{\modulo}{\emph{modulo}}

\newcommand{\bracket}{\operatorname b}  % (Lie) bracket

% Concerning permutations:

\newenvironment{cycle}{(}{)}
\newcommand{\cdiv}{\;\,} % space between terms in a cycle
\newcommand{\sgn}{\operatorname{sgn}}
\newcommand{\sq}[2][\sigma]{q_{#1}(#2)}  % used to define sgn

\newcommand{\Sym}[1]{\operatorname{Sym}(#1)}
\newcommand{\Alt}[1]{\operatorname{Alt}(#1)}       % alternating group
%\newcommand{\Dih}[1]{D_{#1}}       % dihedral group
\newcommand{\Dih}[1]{\operatorname{Dih}(#1)}       % dihedral group

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\mi}{\mspace{2mu}\mathrm i\mspace{2mu}}
\newcommand{\mj}{\mathrm j\mspace{2mu}}
\newcommand{\mk}{\mathrm k}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%\newcommand{\setimb}[1]{[#1]}   % image of a set, using brackets
\newcommand{\abs}[1]{\left\lvert#1\right\rvert}  % absolute value
\newcommand{\size}[1]{\lvert#1\rvert}  % cardinality

\newcommand{\so}[1]{\operatorname{E}(#1)}
\newcommand{\End}[1]{\operatorname{End}(#1)}
\newcommand{\Aut}[1]{\operatorname{Aut}(#1)}
\newcommand{\Hom}[1]{\operatorname{Hom}(#1)}
\newcommand{\Inn}[1]{\operatorname{Inn}(#1)}
\newcommand{\Der}[1]{\operatorname{Der}(#1)}

\newcommand{\pid}{\textsc{pid}}
\newcommand{\ufd}{\textsc{ufd}}
\newcommand{\ed}{\textsc{ed}}

%\newcommand{\pid}{PID}
%\newcommand{\Pid}{PID}
%\newcommand{\ufd}{UFD}
%\newcommand{\ed}{ED}

\newcommand{\primei}{\mathfrak{p}}      % a prime ideal
\newcommand{\maxi}{\mathfrak{m}}        % a maximal ideal
\newcommand{\supp}[1]{\operatorname{supp}(#1)}
\newcommand{\Supp}[1]{\operatorname{supp}[#1]}
\newcommand{\symdiff}{\mathbin{\vartriangle}}
\newcommand{\spec}[1]{\operatorname{Spec}(#1)}

%\newcommand{\lang}{\mathcal{L}}        % a language or signature

\usepackage{mathrsfs}
\newcommand{\pow}[1]{\mathscr{P}(#1)}  % power set
\let\oldsqrt\sqrt
\renewcommand{\sqrt}[2][1]{\oldsqrt{\vphantom{#1}}{#2}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%\renewcommand{\theequation}{\roman{equation}}
%\renewcommand{\theequation}{\fnsymbol{equation}}

\renewcommand{\thepart}{\Roman{part}}

\begin{document}
\title{Linear Algebra}
\author{David Pierce}
\date{April 15, 2019}
\publishers{Matematik B\"ol\"um\"u\\
Mimar Sinan G\"uzel Sanatlar \"Universitesi\\
\url{mat.msgsu.edu.tr/~dpierce/}\\
\url{polytropy.com}}
%\frontmatter

\maketitle

  \tableofcontents

\addchap{Introduction}

References for these notes include
Hoffman and Kunze \cite{MR0276251},
Ko\c c \cite{CK-II},
  Lang \cite{Lang-LA,Lang-alg}, and Roman \cite{MR2125693},
  but I may not follow them closely.

  Since in set theory the letter $\upomega$ denotes the set
  $\{0,1,2,\dots\}$ of natural numbers,
  I let $\N$ denote the set $\{1,2,3,\dots\}$
  of counting numbers.
  For notational convenience, each $n$ in $\N$
  is the set $\{0,\dots,n-1\}$,
  which has $n$ elements.
  The expressions $i<n$ and $i\in n$ are interchangeable.

  An expression like
  \begin{equation*}
    \bigwedge_{i<n}\phi(i)
  \end{equation*}
  means $\phi(i)$ holds whenever $i<n$; that is,
  \begin{equation*}
    i<n\implies\phi(i).
  \end{equation*}

  The notation $f\colon A\to B$
  is to be read as a sentence, ``$f$ is a function from $A$ to $B$.''

  \chapter{Determinants}

\section{Matrix multiplication}

The structures $\C$, $\R$, $\Q$, $\Z$, and $\Zmod n$,
where $n\in\N$, where
\begin{equation*}
  \N=\{x\in\Z\colon x>0\},
\end{equation*}
are \emph{commutative rings.}

For us,
a \textbf{ring} will be a structure $(R,\cdotalone,1)$,
where
\begin{compactenum}[1)]
\item
  $R$ is an abelian group, written additively,
\item
  $\cdotalone$ is a \textbf{multiplication} on $R$,
  that is, a binary operation on $R$ that
  distributes from each side over addition,
\item
  $\cdotalone$ is associative, and
\item
  $1$ is a two-sided identity with respect to $\cdotalone$.
\end{compactenum}
We usually write $(R,\cdotalone,1)$ as $R$.

A \textbf{unit} of a ring is an \textbf{invertible} element,
that is, an element with a left inverse and a right inverse.
When these one-sided inverses exist, they are equal.
The units of a ring $R$ compose a multiplicative group, denoted by
\begin{equation*}
\units R.  
\end{equation*}
A ring is \textbf{commutative} if its multiplication is commutative.
We gave examples above.
For an example of a group of units, 
we note that, for all $n$ in $\N$,
\begin{equation*}
  \size{\Zmodu n}=\size{x\in\Zmod n\colon\gcd(x,n)=1\}}=\upphi(n).
\end{equation*}
A commutative ring $R$ is a \textbf{field} if $\units R=R\setminus\{0\}$.
If $p$ is prime, then $\Zmod p$ is the field $\F_p$, and
\begin{equation*}
  \units{\F_p}\cong\Zmodgp{p-1},
\end{equation*}
where in general $\Zmodgp n$ is the cyclic group of order $n$,
and $\Zmod n$ means $(\Zmodgp n,\cdotalone,1)$.

In this chapter, we shall work with an arbitrary commutative ring $K$.
The definition of a \textbf{module} over $K$
is the same as the definition of a vector space,
when $K$ is a field.
An abelian group is a module over $\Z$.

If $(m,n)\in\N\times\N$, then $K^{m\times n}$ and $K^n$ are modules over $K$,
and
\begin{equation*}
  (X,\bm y)\mapsto X\bm y\colon K^{m\times n}\times K^n\to K^m,
\end{equation*}
defined as follows.

If $\Omega$ is a set, we denote by
\begin{equation*}
  K^{\Omega}
\end{equation*}
the $K$-module of functions from $\Omega$ to $K$.
This defines $K^n$ when we understand $n$ as the $n$-element set
$\{0,\dots,n-1\}$.
An arbitrary element of $K^n$ is one of
\begin{align*}
  &(a^0,\dots,a^{n-1}),&&(a^j\colon j\in n),&&\bm a.
\end{align*}
The superscripts are row numbers, when we think of $\bm a$ as the
$1\times n$ matrix
\begin{equation*}
  \begin{pmatrix}
    a^0\\\vdots\\a^{n-1}
  \end{pmatrix}.
\end{equation*}
Many persons understand $K^n$ as $K^{[n]}$,
where $[n]$ is the set
$\{1,\dots,n\}$ with $n$ elements.
What is important is that the entries of an element of $K^n$
be functions into $K$ from a linearly ordered set with $n$ elements.

An element $A$ of $K^{m\times n}$
is a matrix of $m$ rows and $n$ columns,
having entries $a^i_j$ from $K$, where $i\in m$ and $j\in n$, so
\begin{equation*}
  A=
  \begin{pmatrix}
    a^0_0&\cdots&a^0_{n-1}\\
    \vdots&\ddots&\vdots\\
    a^{m-1}_0&\cdots&a^{m-1}_{n-1}
  \end{pmatrix}
  =(a^i_j)^{i\in m}_{j\in n}.
\end{equation*}
If one prefers,
one may work instead with elements of $E^{[m]\times[n]}$,
and one may write $a_{ij}$ for $a^i_j$.
If also $\bm b\in K^n$, we define
\begin{equation}\label{eqn:Ab}
  A\bm b=\left(\sum_{j\in n}a^i_jb^j\colon i\in m\right),
\end{equation}
an element of $K^m$.
As in \eqref{eqn:Ab} with $j$,
when an index appears twice, once raised and once lowered,
it is usually being summed over.
When $j\in n$, we define
\begin{equation}\label{eqn:e}
  \mathbf e_j=(\updelta^i_j\colon i\in n)
\end{equation}
in the module $K^n$,
where
\begin{equation}\label{eqn:delta}
  \updelta_j^i=
  \begin{cases}
    1,&\text{ if }i=j,\\
    0,&\text{ if }i\neq j.
  \end{cases}
\end{equation}
Then
\begin{equation}\label{eqn:col-j}
  A\mathbf e_j
  =\left(\sum_{k\in n}a^i_k\updelta^k_j\colon i\in n\right)
  =(a^i_j\colon i\in n)=\bm a_j,
\end{equation}
this being column $j$ of $A$.
If $\bm b\in K^n$, then
\begin{equation}\label{eqn:b}
  \bm b=\sum_{j\in n}b^j\mathbf e_j.
\end{equation}
We denote by
\begin{equation*}
  \uptau_A
\end{equation*}
the function $\bm x\mapsto A\bm x$ from $K^n$ to $K^m$.

To say that a function $\phi$ from $K^n$ to $K^m$
is a \textbf{linear transformation}
means that $\phi$ is a homomorphism of modules over $K$,
that is,
\begin{align*}
  \phi(\bm b+\bm c)&=\phi(\bm b)+\phi(\bm c),&
  \phi(d\cdot\bm b)&=d\cdot\phi(\bm b).
\end{align*}
The linear transformations from $K^n$ to $K^m$
compose a module over $K$ denoted by
\begin{equation*}
\lin{K^n,K^m}.  
\end{equation*}

\begin{theorem}\label{thm:isom}
  $X\mapsto\uptau_X\colon K^{m\times n}\cong\lin{K^n,K^m}$.
\end{theorem}

\begin{proof}
  We have to check that
  \begin{compactenum}[(1)]
  \item
    $\uptau_A\in\lin{K^n,K^m}$ for each $A$ in $K^{m\times n}$;
  \item
    $X\mapsto\uptau_X$ is a homomorphism;
  \item
    if $\uptau_A=0$, then $A=0$;
  \item
    every member of $\lin{K^n,K^m}$ is $\uptau_A$
    for some $A$ in $K^{m\times n}$.
  \end{compactenum}
  Each step in the verification of
  the first two points
  uses the definition of a $K$-module
  or a property of $K$ as a commutative ring.
  If $\uptau_A=0$,
  this means in each case $\bm0=A\mathbf e_j$,
  which is column $j$ of $A$ by \eqref{eqn:col-j};
  so $A=0$.

  
  Finally, since each $\uptau_A$ is linear,
  from \eqref{eqn:col-j} and \eqref{eqn:b} we have
  \begin{equation*}
    A\bm b=\sum_{j\in n}b^j\bm a_j.
  \end{equation*}
  If $T\in\lin{K^n,K^m}$, by defining
  \begin{equation*}
    T\mathbf e_j=\bm a_j,
  \end{equation*}
  we obtain $A$, and then
  \begin{equation*}
    T=\uptau_A.\qedhere
  \end{equation*}
\end{proof}


If still $A\in K^{m\times n}$,
and now also $C\in K^{n\times s}$, then we define
\begin{equation}\label{eqn:mat-prod}
  AC=\left(\sum_{j\in n}a^i_jc^j_k\right)^{i\in m}_{k\in s},
\end{equation}
an element of $K^{m\times s}$.
We shall let $M$ denote
the special case $K^{n\times n}$,
which is closed under matrix multiplication.
We have
\begin{equation*}
  \Id A=A=A\Id,
\end{equation*}
where
\begin{equation}\label{eqn:I}
  \Id=(\updelta^i_j)^{i\in n}_{j\in n}.
\end{equation}

\begin{theorem}
  When $A\in K^{m\times n}$ and $C\in K^{n\times s}$,
  then
  \begin{equation*}
    \uptau_{AC}=\uptau_A\circ\uptau_C.
  \end{equation*}
  Thus for any matrices $A$, $B$, and $C$
  for which either of the products
  $(AB)C$ and $A(BC)$ is defined,
  then both are defined, and they are equal.
  In particular, the structure $(M,\cdot,\Id)$ is a ring,
  and $X\mapsto\uptau_X$ from $M$ to $\lin{K^n,K^n}$
  is an isomorphism of rings.
\end{theorem}

\section{Determinants}

We use the possibility of Gauss--Jordan elimination
to motivate the so-called Leibniz formula
\eqref{eqn:det-def} for the determinant.

\subsection{Desired Properties}

Let $M$ be the ring $K^{n\times n}$.
We want to define a \textbf{determinant} function,
\begin{equation*}
  X\mapsto\det X,
\end{equation*}
from $M$ to $K$ so that
\begin{equation}\label{eqn:det-prop}
  \det X\in\units K\iff X\in\units M.
\end{equation}
If $K$ is the two-element field $\F_2$,
then \eqref{eqn:det-prop} is equivalent to
\begin{equation}\label{eqn:01}
  \det X=
  \begin{cases}
    1,&\text{ if $X\in\units M$,}\\
    0,&\text{ otherwise.}
  \end{cases}
\end{equation}
Moreover, with this definition,
\begin{equation}\label{eqn:det(XY)}
  \det(XY)=\det X\det Y.
\end{equation}
However, over any $K$, we also want
\begin{equation}\label{eqn:poly}
  \det X=f\bigl(x^i_j\colon(i,j)\in n\times n\bigr)
\end{equation}
for some \emph{polynomial} $f$
(namely an element of the free abelian group
generated by products of the variables $x^i_j$).
In general then, \eqref{eqn:01} will fail.
We still want \eqref{eqn:det(XY)} to hold,
and this and \eqref{eqn:det-prop} imply
\begin{equation}\label{eqn:det(I)}
  \det\Id=1.
\end{equation}

\subsection{Additional properties}

In seeking a determinant function satisfying \eqref{eqn:det-prop},
\eqref{eqn:det(XY)}, and \eqref{eqn:poly}, and therefore \eqref{eqn:det(I)},
we consider what we know about $\units M$.
An element $A$ of $M$ is in $\units M$ just in case
$A$ is row-equivalent to $\Id$.\label{row-eq}
This means, for some \emph{elementary} matrices $E_i$,
\begin{equation}\label{eqn:fac}
  A=E_1\cdots E_n\Id.
\end{equation}
Thus, if \eqref{eqn:det(XY)} and \eqref{eqn:det(I)} hold,
then $\det A$ will determined by the $\det E_i$.

We recall that an \textbf{elementary matrix}
is the result of applying to $\Id$
an \textbf{elementary row operation.}
If $\Phi$ is such, then
\begin{equation*}
  \Phi(I)A=\Phi(A).
\end{equation*}
Here $\Phi$ does one of the following:
\begin{compactenum}[1)]
\item
  add to one row another row, scaled by some $a$ in $K$;
\item
  interchange two rows;
\item
  scale a row by an element $s$ of $\units K$.
\end{compactenum}
Let us denote the specific instance of $\Phi$ respectively by
\begin{align*}
  &\Phi_a,&&\Psi,&&\Theta_s.
\end{align*}
We do not specify the row or rows involved.
We draw the following conclusions about determinants.
\begin{asparaenum}
  \item
    If \eqref{eqn:poly} is to hold, then,
    for some single-variable polynomial $f$,
    \begin{equation*}
      \det\Phi_a(\Id)=f(a).
    \end{equation*}
    If also \eqref{eqn:det(XY)} is to hold,
    then, since
\begin{equation*}
\Phi_a(\Id)\cdot\Phi_b(\Id)=\Phi_{a+b}(\Id),
\end{equation*}
we must have
\begin{equation*}
  f(a)\cdot f(b)=f(a+b).
\end{equation*}
In particular, $f(x)^n=f(nx)$ for all $n$ in $\N$, and so,
since $f\neq0$, we must have
\begin{equation}\label{eqn:Phi}
  \det\Phi_a(\Id)=1.
\end{equation}
\item
      If, again, \eqref{eqn:det(XY)} is to hold,
    then, since
  \begin{equation*}
      \Psi(\Id)\cdot\Psi(\Id)=\Id,
  \end{equation*}
  we should have $\det\Psi(\Id)=\pm1$; we choose
  \begin{equation}\label{eqn:Psi}
    \det\Psi(\Id)=-1.
  \end{equation}
\item
    If, again \eqref{eqn:poly} is to hold, then,
    for some single-variable polynomial $g$,
    \begin{equation*}
      \det\Theta_s(\Id)=g(s).
    \end{equation*}
    If also \eqref{eqn:det(XY)} is to hold,
    then, since
  \begin{equation*}
  \Theta_s(\Id)\cdot\Theta_t(\Id)=\Theta_{st}(\Id),
  \end{equation*}
  we must have
  \begin{equation*}
    g(s)\cdot g(t)=g(st).
  \end{equation*}
  In particular, $g(x)^n=g(x^n)$,  
  so $\det\Theta_s(\Id)$ must be a power of $s$;
  we choose
  \begin{equation}\label{eqn:Theta}
    \det\Theta_s(\Id)=s.
  \end{equation}
\end{asparaenum}

The definitions, or choices,
\eqref{eqn:Phi}, \eqref{eqn:Psi}, and \eqref{eqn:Theta}
will follow if $X\mapsto\det X$ is an \emph{alternating multilinear form.}

We can understand any module $K^{m\times n}$
as $(K^m)^n$ or $(K^n)^m$,
treating an element $A$ as one of
\begin{align*}
  &\bigl((a^i_j\colon i\in m)\colon j\in n\bigr),&
  &\bigl((a^i_j\colon j\in n)\colon i\in m\bigr).
\end{align*}
Given a module $V$ over $K$
and $n$ in $\N$,
we can form the module $V^n$.
    For each $k$ in $n$,
    we let $\uppi_k$ the function from $V^n$ to $V$ given by
    \begin{equation*}
      \uppi_k(\bm x_j\colon j\in n)=\bm x_k.
    \end{equation*}
Suppose now
\begin{equation*}
  \phi\colon V^n\to K.
\end{equation*}
    Given $k$ in $n$
    and a function $j\mapsto\bm a_j$ from $n\setminus\{k\}$ to $V$,
    we let $\upiota$ be the function from $V$ to $V^n$ given by the rule that,
    for each $j$ in $n$,
    \begin{equation*}
      \uppi_j(\upiota(\bm x))=
      \begin{cases}
        \bm x,&\text{ if }j=k,\\
        \bm a_j,&\text{ if }j\in n\setminus\{k\}.
      \end{cases}
    \end{equation*}
    If the function $\bm x\mapsto\phi(\upiota(\bm x))$
    is always linear,
then $\phi$ itself is a \textbf{multilinear form,}
specifically an \textbf{$n$-linear} form, on $V$.
If, further, whenever $i<j<n$,
\begin{equation*}
  \bm x_i=\bm x_j\implies\phi(\bm x_k\colon k\in n)=0,
\end{equation*}
then $\phi$ is \textbf{alternating} as a multilinear form.
    
We let the group of permutations of a set $\Omega$ be
\begin{equation*}
  \Sym{\Omega}.
\end{equation*}
If $\Omega$ is finite, then $\Sym{\Omega}$ is generated by transpositions.
If $\sigma\in\Sym n$,
we define
\begin{equation}\label{eqn:sgn}
  \sgn(\sigma)=(-1)^{\size{(i,j)\in n\times n\colon i<j\And\sigma(i)>\sigma(j)\}}},
\end{equation}
one of the elements of $\units{\Z}$.

\begin{theorem}
  For every $n$ in $\N$,
  the function $\xi\mapsto\sgn(\xi)$ on $\Sym n$
  \begin{compactenum}[1)]
  \item
is given by
    \begin{equation}\label{eqn:sgn-frac}
  \sgn(\sigma)=\prod_{i\in j\in n}\frac{\sigma(i)-\sigma(j)}{i-j},
    \end{equation}
    \item
  is a homomorphism onto $\units{\Z}$, and
\item
  takes every transposition to $-1$.
  \end{compactenum}
\end{theorem}

\begin{proof}
  \begin{asparaenum}
  \item
    Since
    \begin{equation*}
      \prod_{i\in j\in n}\frac{\sigma(i)-\sigma(j)}{i-j}
      =\frac{\prod_{i\in j\in n}(\sigma(i)-\sigma(j))}
      {\prod_{i\in j\in n}(i-j)}=\pm1,
    \end{equation*}
    \eqref{eqn:sgn} follows from \eqref{eqn:sgn-frac}.
  \item
    Note
      \begin{equation*}
        \begin{aligned}[t]
          \sgn(\tau\sigma)
          &=\prod_{i\in j\in n}\frac{\tau\sigma(i)-\tau\sigma(j)}{i-j}\\
          &=\prod_{i\in j\in n}\left(
          \frac{\tau\sigma(i)-\tau\sigma(j)}{\sigma(i)-\sigma(j)}\cdot
          \frac{\sigma(i)-\sigma(j)}{i-j}\right)\\
          &=\prod_{i\in j\in n}
          \frac{\tau(i)-\tau(j)}{i-j}\cdot\sgn(\sigma)
          =\sgn(\tau)\cdot\sgn(\sigma).
        \end{aligned}
      \end{equation*}
  \item
Letting
  \begin{equation*}
    \tau=  \begin{cycle}
    0\cdiv1
  \end{cycle},
  \end{equation*}
  since every transposition is
    $\sigma\inv\cdot\tau\cdot\sigma$
  for some $\sigma$, it is enough to note that
  \begin{equation*}
    \sgn(\tau)=-1,
  \end{equation*}
  since
  \begin{equation*}
    \frac{\tau(i)-\tau(j)}{i-j}
    \begin{cases}
      >0,&\text{ when $(i,j)\neq(0,1)$,}\\
      <0,&\text{ when $(i,j)=(0,1)$.}
    \end{cases}\qedhere
  \end{equation*}  
  \end{asparaenum}
\end{proof}

An element $\sigma$ of $\Sym n$ is \textbf{even}
if $\sgn(\sigma)=1$;
this means $\sigma$ is a product of an even number of transpositions.
The even permutations compose the subgroup of $\Sym n$ denoted by
\begin{equation*}
  \Alt n.
\end{equation*}

\begin{theorem}
  For any module $V$ over $K$,
  for any $n$ in $\N$,
  for any $n$-linear form $\phi$ on $V$,
  for each $\sigma$ in $\Sym n$,
  \begin{equation*}
    \phi(\bm x_{\sigma(j)}\colon j\in n)
    =\sgn(\sigma)\cdot\phi(\bm x_j\colon j\in n).
  \end{equation*}
\end{theorem}

\begin{proof}
  Every permutation of a finite set
  being a product of transpositions,
  we need only prove the claim when $n=2$
  and $\sigma$ is the nontrivial permutation of $2$.
  Assuming
    \begin{equation*}
      \bm x=\bm y\implies\phi(\bm x,\bm y)=0,
    \end{equation*}
    we have $0=\phi(\bm x+\bm y,\bm x+\bm y)$,
    but the latter is
    \begin{equation*}
      \phi(\bm x,\bm x)+\phi(\bm x,\bm y)+\phi(\bm y,\bm x)+\phi(\bm y,\bm y),
    \end{equation*}
which reduces to $\phi(\bm x,\bm y)+\phi(\bm y,\bm x)$.
\end{proof}

In particular, if $\sigma\in\Alt n$, then
  \begin{equation*}
    \phi(\bm x_{\sigma(j)}\colon j\in n)
    =\phi(\bm x_j\colon j\in n).
  \end{equation*}

  \subsection{Existence and uniqueness}

\begin{theorem}\label{thm:unique}
  There is at most one alternating multilinear function
  $X\mapsto\det X$ from $M$ to $K$ that satisfies \eqref{eqn:det(I)},
  and if it does exist, it satisfies
satisfies \eqref{eqn:det-prop} and \eqref{eqn:det(XY)}.
\end{theorem}

\begin{proof}
  The hypotheses ensure \eqref{eqn:Phi}, \eqref{eqn:Psi},
  and \eqref{eqn:Theta}, as well as \eqref{eqn:det(I)}.
  Then \eqref{eqn:det(XY)} holds when $X$ is elementary,
  and therefore it holds for all $X$,
  and also \eqref{eqn:det-prop} holds by the analysis \eqref{eqn:fac}
  and since every non-invertible matrix is row-equivalent
  to one with a zero row.
\end{proof}

We now show that there is at least one function $X\mapsto\det X$
as desired.
We define
\begin{equation}\label{eqn:det-def}
  \det X=\sum_{\sigma\in\Sym n}\sgn(\sigma)\prod_{i\in n}x^i_{\sigma(i)}.
\end{equation}
Thus \eqref{eqn:poly} holds.

\begin{theorem}
  For all $A$ in $M$,
  \begin{equation*}
    \det(\trans A)=\det A.
  \end{equation*}
\end{theorem}

\begin{proof}
  Since $\sgn(\sigma\inv)=\sgn(\sigma)$, we compute
  \begin{align*}
    \det(\trans A)
    &=\sum_{\sigma\in\Sym n}\sgn(\sigma)\prod_{i\in n}a^{\sigma(i)}_i\\
    &=\sum_{\sigma\in\Sym n}\sgn(\sigma\inv)\prod_{i\in n}a^i_{\sigma\inv(i)},
  \end{align*}
which is $\det A$.
\end{proof}

\begin{theorem}\label{thm:det}
  The function given by \eqref{eqn:det-def} is $n$-linear and alternating,
  and satisfies \eqref{eqn:det(I)}.
\end{theorem}

\begin{proof}
  By \eqref{eqn:I}, since
\begin{equation*}
\prod_{i\in n}\updelta^i_{\sigma(i)}=0\iff\sigma\neq\id_n,
\end{equation*}
\eqref{eqn:det(I)} holds.
For multilinearity,
Suppose matrices $A$, $B$, and $C$ agree everywhere but in some row $k$,
and $a^k_j=s\cdot b^k_j+t\cdot c^k_j$ for each $j$ in $n$,
for some $s$ and $t$ in $K$.  Then
  \begin{multline*}
    \det A
    =\sum_{\sigma\in\Sym n}\sgn(\sigma)\prod_{i\in n\setminus\{k\}}a^i_{\sigma(i)}
    \cdot(s\cdot b^k_{\sigma(k)}+t\cdot c^k_{\sigma(k)})\\
    =s\cdot\det B+t\cdot\det C.
  \end{multline*}
  Finally, if $i<j<n$,
  and $\tau$ in $\Sym n$ transposes $i$ and $j$,
  then $\tau\inv=\tau$,
  and $\xi\mapsto\xi\circ\tau$ is a bijection between $\Alt n$
  and $\Sym n\setminus\Alt n$, so
  \begin{equation*}
    \det A=\sum_{\sigma\in\Alt n}\left(\prod_{k\in n}a^k_{\sigma(k)}
    -\prod_{k\in n}a^{\tau(k)}_{\sigma(k}\right).
  \end{equation*}
  If moreover $a^i_k=a^j_k$ for each $k$ in $n$, then $\det A=0$.
\end{proof}

\section{Inversion}

We know from Theorems \ref{thm:unique} and \ref{thm:det}
that \eqref{eqn:det-prop} holds.
In particular, if $\det A\in\units K$,
then $A\inv$ exists in $M$.
We can compute $A\inv$ by the reduction in \eqref{eqn:fac};
but we now develop another method.

As in \eqref{eqn:sgn},
if $\tau$ is a bijection from a finite ordered set $S$
to a finite ordered set $T$,
we can define
\begin{equation*}
  \sgn(\tau)=(-1)^{\size{(i,j)\in S\times S\colon i<j\And\sigma(i)>\sigma(j)\}}}.
\end{equation*}
There is a unique isomorphism $\phi$ from $S$ to $T$,
and then
\begin{gather*}
  \phi\inv\circ\tau\in\Sym S,\\
  \sgn(\tau)=\sgn(\phi\inv\circ\tau).
\end{gather*}
Suppose now $\sigma\in\Sym n$ and $k\in n$.
Letting $S$ be $n\setminus\{k\}$
and $T$ be $n\setminus\{\sigma(k)\}$,
we can define $\tau$ to be the restriction of $\sigma$
to $S$,
so that $\tau$ is a bijection from $S$ to $T$.
Then
\begin{equation*}
  \frac{\sgn(\sigma)}{\sgn(\tau)}
=(-1)^{\size{\{j\in n\setminus\{k\}\colon j>k\iff\sigma(j)<\sigma(k)\}}}.
\end{equation*}

\begin{theorem}\label{thm:s/t}
  In the notation above,
\begin{equation*}
  \frac{\sgn(\sigma)}{\sgn(\tau)}
=(-1)^{k+\sigma(k)}.
\end{equation*}
\end{theorem}

\begin{proof}
  We may assume $k\leq\sigma(k)$.
  There are at least $\sigma(k)-k$ values of $j$ greater than $k$
  and the condition
  \begin{equation}\label{eqn:j>k}
    j>k\iff\sigma(j)<\sigma(k)
  \end{equation}
  is satisfied.
  For every additional such value, there must be a value less than $k$
  for which \eqref{eqn:j>k} is satisfied.
  This proves the claim.  
\end{proof}

For any $(k,\ell)$ in $n\times n$,
assuming $n>1$, we let $\hat A^k_{\ell}$
be the matrix that we obtain from $A$
by deleting row $k$ and column $\ell$.
Formally,
\begin{equation*}
  \hat A^k_{\ell}=\left(a^{[i,k]}_{[j,\ell]}\right)^{i\in n-1}_{j\in n-1},
\end{equation*}
where
\begin{equation*}
  [i,k]=
  \begin{cases}
    i,&\text{ if }i<k,\\
    i+1,&\text{ if }k\leq i.
  \end{cases}
\end{equation*}

\begin{theorem}\label{thm:cof}
  For any $k$ in $n$,
  \begin{equation*}
    \det X=\sum_{j\in n}(-1)^{k+j}x^k_j\det \hat X^k_j.
  \end{equation*}
\end{theorem}

\begin{proof}
  We group the terms in \eqref{eqn:det-def},
which are indexed by $\sigma$ in $\Sym n$,
according to the value of $\sigma(k)$:
\begin{align*}
  \det X
  &=\sum_{j\in n}\sum_{\substack{\sigma\in\Sym n\\\sigma(k)=j}}
  \sgn(\sigma)\prod_{i\in n}x^i_{\sigma(i)}\\
  &=\sum_{j\in n}x^k_j\sum_{\substack{\sigma\in\Sym n\\\sigma(k)=j}}
  \sgn(\sigma)\prod_{\substack{i\in n\\i\neq k}}x^i_{\sigma(i)}\\
  &=\sum_{j\in n}(-1)^{k+j}x^k_j\det \hat X^k_j
\end{align*}
by Theorem \ref{thm:s/t}.
\end{proof}

We now define the operation $X\mapsto\adj X$ on $M$ by
\begin{equation*}
  \adj A=\left((-1)^{i+j}\det\hat A^j_i\right)^{i\in n}_{j\in n}.
\end{equation*}
This is the \textbf{adjugate} of $A$.

\begin{theorem}\label{thm:adj}
  For all $A$ in $M$,
  \begin{equation*}
    A\adj A=\det A\cdot\Id.
  \end{equation*}
\end{theorem}

\begin{proof}
  By Theorem \ref{thm:cof},
  if $A\adj A=B$,
  then $b^i_j$ is the determinant of the matrix that we obtain from $A$
  by replacing row $j$ with row $i$.  This determinant is
  \begin{compactitem}
  \item
    $\det A$, if $i=j$;
  \item
    $0$, if $i\neq j$, since $X\mapsto\det X$ is alternating.\qedhere
  \end{compactitem}
\end{proof}

\begin{theorem}
  If $\det A\in\units K$, then
  \begin{equation*}
    A\inv=(\det A)\inv\cdot\adj A.
  \end{equation*}
\end{theorem}

\begin{proof}
  Assuming $\det A\in\units K$,
  if we denote $(\det A)\inv\cdot\adj A$ by $B$,
  then by Theorem \ref{thm:adj},
  \begin{equation*}
    AB=\Id.
  \end{equation*}
  Since $A\inv$ does exist, we have
  \begin{equation*}
    A\inv=A\inv(AB)=(A\inv A)B=\Id B=B.\qedhere
  \end{equation*}
\end{proof}

\chapter{Polynomials}

\section{Characteristic values}

We henceforth suppose $K$ is a field; still $M$ is $K^{n\times n}$.
For any $A$ in $M$, an element $\lambda$ of $K$
is a \textbf{characteristic value} or \textbf{eigenvalue} of $A$
if, for some $\bm b$ in $K^n$,
\begin{equation}\label{eqn:lambda}
  A\bm b=\lambda\cdot\bm b.
\end{equation}
In this case, $\bm b$ is a \textbf{characteristic vector}
or \textbf{eigenvector} of $A$ associated with $\lambda$.
Rewriting \eqref{eqn:lambda} as
\begin{equation*}
  (A-\lambda\cdot\Id)\bm b=\bm 0
\end{equation*}
shows that the characteristic values of $A$
are precisely the zeroes of the polynomial
\begin{equation*}
  \det(A-x\cdot\Id),
\end{equation*}
which is called the \textbf{characteristic polynomial} of $A$.

If $\lambda$ is indeed a characteristic value of $A$,
then the null-space of $A-\lambda\cdot\Id$
is the \textbf{characteristic space}
or \textbf{eigenspace} of $A$ associated with $\lambda$.


\begin{theorem}\label{thm:eig-ind}
  Eigenvectors corresponding to distinct eigenvalues of any matrix
  are linearly independent.
\end{theorem}

\begin{proof}
We prove the claim by induction on the number of eigenvectors.
The empty set of eigenvectors is trivially linearly independent.
Suppose $(\bm v_i\colon i<k)$ is linearly independent,
each $\bm v_i$ being an eigenvector of $A$
with associated eigenvalue $\lambda_i$,
the $\lambda_i$ being distinct.
Let $\bm v_k$ be a an eigenvector
associated with a new eigenvalue, $\lambda_k$.
If
\begin{equation}\label{eqn:eig}
\sum_{i\leq k}x^i\bm v_i=\bm0,
\end{equation}
then
\begin{align*}
  \bm0=(A-\lambda_k\cdot\Id)\sum_{i<m+1}x^i\bm v_i
&=\sum_{i\leq k}(\lambda_i-\lambda_k)x^i\bm v_i\\
&=\sum_{i<k}(\lambda_i-\lambda_k)x^i\bm v_i,
\end{align*}
so $x^i=0$ when $i<k$,
and then also $x^k=0$ by \eqref{eqn:eig}.
\end{proof}

If $A$ in $M$ has $n$ linearly independent eigenvectors $\bm b_i$,
each associated with an eigenvalue $\lambda_i$
(possibly not distinct),
then the eigenvectors are the columns of an element $B$ of $\units M$, and
\begin{equation*}
  AB=BL,
\end{equation*}
where
\begin{equation*}
  \ell^i_j=
  \begin{cases}
    \lambda_i,&\text{ if }i=j,\\
    0,&\text{ if }i\neq j,
  \end{cases}
\end{equation*}
  or in short
  \begin{equation*}
    L=\diag(\lambda_i\colon i\in n),
  \end{equation*}
  a \textbf{diagonal matrix.}
  Thus
  \begin{equation*}
    B\inv AB=\diag(\lambda_i\colon i\in n),
  \end{equation*}
  and in particular $A$ is \textbf{diagonalizable.}
  
  It will be useful to recall that every matrix $B$ in $\units M$
  is the change-of-basis matrix
  from the basis $(\bm_j\colon j\in n)$ of $K^n$
  consisting of the columns of $B$
  to the standard basis of $K^n$.
  
  Every matrix of the form $P\inv AP$ for \emph{some} $P$ in $\units M$
  is \textbf{similar} to $A$
  (in group theory one says \emph{conjugate}).
  Similarity of matrices is an equivalence relation,
  as is row-equivalence (mentioned first on page \pageref{row-eq});
  but they are different relations.
  We want to characterize the diagonalizable matrices.

  A matrix $A$ in $M$ is \textbf{triangular} if
\begin{equation}\label{eqn:i<j}
  \bigwedge_{j<i<n}a^i_j=0.
\end{equation}
A matrix similar to a triangular matrix is
\textbf{triangularizable.}

\begin{theorem}\label{thm:tri-char}
  A matrix $A$ in $M$ is triangularizable just in case,
  for some $B$ in $\units M$,
  \begin{equation}\label{eqn:Axe}
    \bigwedge_{j\in n}  A\bm b_j\in\Span\{\bm b_0,\dots,\bm b_j\};
  \end{equation}
  and in this case $B\inv AB$ is triangular.
\end{theorem}

\begin{proof}
  The condition \eqref{eqn:i<j} on $A$ for being triangular means precisely
  \begin{equation}\label{eqn:Ae}
    \bigwedge_{j\in n}  A\mathbf e_j=\sum_{i=0}^ja^i_j\mathbf e_i,
  \end{equation}
  and thus that $\Id$ is a matrix $B$ as in the statement of the theorem.
  If $B\inv AB$ is triangular, then putting this matrix in place of $A$
  in \eqref{eqn:Ae} yields \eqref{eqn:Axe}.
  Conversely, if $B$ is as in the statement,
  then we can write \eqref{eqn:Axe} as
  \begin{equation*}
    \bigwedge_{j\in n}  AB\mathbf e_j
    \in\Span\{B\mathbf e_0,\dots,B\mathbf e_j\},
  \end{equation*}
  and then
  \begin{equation*}
    \bigwedge_{j\in n}  B\inv AB\mathbf e_j
    \in\Span\{\mathbf e_0,\dots,\mathbf e_j\},
  \end{equation*}
  so $B\inv AB$ is triangular.
\end{proof}

  

  \begin{theorem}\label{thm:tri}
    Every matrix in $M$ is triangularizable
    over an algebraically closed field.
  \end{theorem}

  \begin{proof}
    Given $A$ in $M$,
    assuming $K$ is algebraically closed,
    so that the characteristic polynomial of $A$
    has at least one zero,
    and therefore $A$ has at least one eigenvector,
    we extend this to a basis of $K^n$ that
    satisfies \eqref{eqn:Axe}.
    Doing this will be enough,
        by Theorem \ref{thm:tri-char}.
    
    We use induction on $n$.
    The claim is trivial when $n=1$.
    Suppose it holds when $n=m$.
    Now let $n=m+1$ and $A\in M$.
There is a basis $(\bm p_0,\dots,\bm p_m)$ of $K^n$
such that $\bm p_0$ is an eigenvector.
Thus the basis satisfies the first conjunct of \eqref{eqn:Axe}.
We could satisfy the remaining conjuncts,
by the inductive hypothesis,
if we had
\begin{equation*}\label{eqn:Av}
  \bigwedge_{j=1}^mA\bm p_j\in\Span\{\bm p_1,\dots,\bm p_m\}.
\end{equation*}
However, we may not actually have this.
Nonetheless,
there are matrices $B$ and $C$
such that
\begin{align}\label{eqn:tBtC}
  \uptau_B\left(\sum_{i=0}^mx^i\bm p_i\right)&=x_0\bm p_0,&
  \uptau_C\left(\sum_{i=0}^mx^i\bm p_i\right)&=\sum_{i=1}^mx^i\bm p_i.
\end{align}
In words,
\begin{compactitem}
  \item
    $\uptau_C$ is an endomorphism of $\Span\{\bm p_1,\dots,\bm p_m\}$,
    and therefore so is $\uptau_{CA}$;
    \item
$\uptau_B$ is a homomorphism from $K^n$ to $\Span\{\bm p_0\}$,
      and therefore so is $\uptau_{BA}$.
\end{compactitem}
Now $\Span\{\bm p_1,\dots,\bm p_m\}$ has a basis $(\bm v_1\dots,\bm v_m)$
such that
\begin{equation*}
\bigwedge_{j=1}^mCA\bm v_j\in\Span\{\bm v_1\dots,\bm v_j\},
\end{equation*}
by inductive hypothesis.
Therefore now
\begin{equation*}
\bigwedge_{j=1}^m(BA+CA)\bm v_j\in\Span\{\bm v_0,\dots,\bm v_j\}.
\end{equation*}
From all of \eqref{eqn:tBtC},
\begin{equation*}
  \uptau_B+\uptau_C=\id_{K^n},
\end{equation*}
and so
\begin{equation*}
\bigwedge_{j=1}^mA\bm v_j\in\Span\{\bm v_0,\dots,\bm v_j\}.
\end{equation*}
Finally, since $\bm v_0$ is an eigenvector of $A$,
\begin{equation*}
\bigwedge_{j=0}^mA\bm v_j\in\Span\{\bm v_0,\dots,\bm v_j\}.
\end{equation*}
Thus we have \eqref{eqn:Axe}.
This completes the induction.
  \end{proof}

  We can write out the foregoing proof
  entirely in terms of matrices as follows.
  We have
  \begin{equation*}
    P\inv AP
    =\left(\begin{array}{c|c}
      \lambda&\bm a\\\hline\bm0&D
    \end{array}\right)
  \end{equation*}
    for some $m\times m$ matrix $D$,
  where $\lambda$ is the eigenvalue associated with $\bm p_0$.
  We choose $B$ and $C$ so that
  \begin{align*}
    P\inv BP
    &=\left(
    \begin{array}{c|c}
      1&\bm0\\\hline\bm0&0
    \end{array}\right),&
    P\inv CP
    &=\left(
    \begin{array}{c|c}
      0&\bm0\\\hline\bm0&\Id
    \end{array}\right).
  \end{align*}
  Then
  \begin{multline*}
    P\inv CAP=P\inv CPP\inv AP
    =\left(
    \begin{array}{c|c}
      0&\bm0\\\hline\bm0&\Id
    \end{array}\right)
\left(
    \begin{array}{c|c}
      \lambda&\bm a\\\hline\bm0&D
    \end{array}\right)\\
    =
\left(
    \begin{array}{c|c}
      0&\bm0\\\hline\bm0&D
    \end{array}\right)
  \end{multline*}
  and
  \begin{equation*}
    P\inv BAP
=\left(\begin{array}{c|c}
      1&\bm0\\\hline\bm0&0
    \end{array}\right)    
\left(\begin{array}{c|c}
      \lambda&\bm a\\\hline\bm0&D
\end{array}\right)
=\left(\begin{array}{c|c}
      \lambda&\bm a\\\hline\bm0&0
\end{array}\right).
  \end{equation*}
  Therefore
  \begin{gather*}
    P\inv BAP+P\inv CAP=P\inv AP,\\
    BA+CA=A.
  \end{gather*}
  By inductive hypothesis, for some $Q$, $Q\inv DQ$
  is a triangular matrix $T$.  Then
  \begin{equation*}
    \left(\begin{array}{c|c}
      1&\bm0\\\hline\bm0&Q
    \end{array}\right)\inv
    P\inv CAP
    \left(\begin{array}{c|c}
      1&\bm0\\\hline\bm0&Q
    \end{array}\right)
    =   \left(\begin{array}{c|c}
      0&\bm0\\\hline\bm0&T
    \end{array}\right),
  \end{equation*}
  while
  \begin{multline*}
    \left(\begin{array}{c|c}
      1&\bm0\\\hline\bm0&Q
    \end{array}\right)\inv
    P\inv BAP
    \left(\begin{array}{c|c}
      1&\bm0\\\hline\bm0&Q
    \end{array}\right)\\
    =
    \left(\begin{array}{c|c}
      1&\bm0\\\hline\bm0&Q\inv
    \end{array}\right)
        \left(\begin{array}{c|c}
      \lambda&\bm aQ\\\hline\bm0&0
        \end{array}\right)
        =        \left(\begin{array}{c|c}
      \lambda&\bm aQ\\\hline\bm0&0
        \end{array}\right),
  \end{multline*}
  and therefore
  \begin{equation*}
    \left(\begin{array}{c|c}
      1&\bm0\\\hline\bm0&Q
    \end{array}\right)\inv
    P\inv AP
    \left(\begin{array}{c|c}
      1&\bm0\\\hline\bm0&Q
    \end{array}\right)
    =   \left(\begin{array}{c|c}
      \lambda&\bm aQ\\\hline\bm0&T
    \end{array}\right),
  \end{equation*}
  a triangular matrix.
  
\section{Polynomial functions of matrices}

Although $K$ is a field,
the ring $M$ is not commutative when $n>1$.
However, it has commutative sub-rings.
Indeed, for every $A$ in $M$,
the smallest sub-ring of $M$ that contains $A$ is commutative.
We may denote this sub-ring by
\begin{equation*}
K[A].
\end{equation*}
This is also a vector space over $K$,
spanned by the powers $\Id$, $A$, $A^2$, $A^3$, \lips, of $A$.
Thus
\begin{equation*}
K[A]=\bigl\{f(A)\colon f\in K[x]\bigr\},
\end{equation*}
where, if
\begin{equation}\label{eqn:f}
  f(x)=\sum_{i=0}^mf_ix^i
\end{equation}
in $K[x]$ (and $x^i$ is now the power $\prod_{k\in i}x$),
we define
\begin{equation*}
f(A)=\sum_{i=0}^nf_iA^i.
\end{equation*}
If $f(A)$ is the zero matrix,
we may say $A$ is a \textbf{zero} of $f$.
However, theorems about zeroes in fields may not apply here.
For example, since $K[A]$ may have zero divisors,
the number of zeroes of $f$ in $M$ may exceed $\deg f$.
Indeed, $A$ itself may be a zero divisor, as for example when
\begin{equation*}%\label{eqn:A0100}
  A=
  \begin{pmatrix}
0&1\\0&0
\end{pmatrix},
\end{equation*}
since then $A^2$ is the zero matrix.
In this case every scalar multiple $b\cdot A$ of $A$ is a zero in $K[A]$
of the polynomial $x^2$.

\section{Cayley--Hamilton Theorem}

Given $A$ in $M$,
we are going to want to know that $A$ is a zero
of some nonzero polynomial over $K$.
Suppose
\begin{equation}\label{eqn:f=det}
  f(x)=\det(x\cdot\Id-A),
\end{equation}
the characteristic polynomial of $A$.
The equation remains correct automatically
when we replace $x$ with an element of $K$
or of any field that includes $K$.
Note however that, for a matrix $B$ in $M$,
while $f(B)\in M$, we have
\begin{equation*}
  \det(B\Id-A)\in K.
\end{equation*}
Since $A\Id-A$ is the zero matrix,
we have $\det(A\Id-A)=0$.
This observation is not enough to ensure that $f(A)$ is the zero matrix.
Nonetheless, we shall show that it is,
in two ways.

\begin{theorem}[Cayley--Hamilton]
Over any field,
every matrix is a zero of its characteristic polynomial.
\end{theorem}

\begin{proof}[First proof.]
  By Theorem \ref{thm:adj}, with $f$ as in \eqref{eqn:f=det} we have
\begin{equation}\label{eqn:XI-A}
f(x)\cdot\Id=(x\cdot\Id-A)\adj{x\cdot\Id-A}.
\end{equation}
Moreover,
\begin{equation*}
f(x)\cdot\Id=  \sum_{j=0}^nx^j\cdot F_j,
\end{equation*}
where, in the notation of \eqref{eqn:f},
\begin{equation*}
  F=f_j\cdot\Id.
\end{equation*}
Likewise, for some matrices $B_j$ in $M$,
\begin{equation*}
  \adj{x\cdot\Id-A}
  =\sum_{j=0}^{n-1}x^j\cdot B_j.
\end{equation*}
 Thus \eqref{eqn:XI-A} becomes
\begin{equation}\label{eqn:fB}
  \sum_{j=0}^nx^j\cdot f_j\cdot\Id
  =(x\cdot\Id-A)\sum_{j=0}^{n-1}x^j\cdot B_j.
\end{equation}
This then will be true when $x$ is replaced by an element of $M$
that commutes with $A$.
Since $A$ is such an element,
and the right member of \eqref{eqn:fB} becomes $0$
when $x$ is replaced with $A$,
the same is true for the left member;
but this just means $f(A)=0$.
\end{proof}

\begin{proof}[Second proof.]
  Letting $f$ be the characteristic polynomial of $A$ in $M$
  as in \eqref{eqn:f=det},
we want to show $f(A)=0$.  
Since the determinant function is multiplicative,
for every $P$ in $\units M$,
\begin{align*}
\det(x\cdot\Id-A)
&=\det\bigl(P\inv\cdot(x\cdot\Id-A)\cdot P\bigr)\\
&=\det(x\cdot\Id-P\inv AP).
\end{align*}
By Theorem \ref{thm:tri},
for some matrix $P$, $P\inv AP$ is a triangular matrix.
It does not matter that entries of $P$
may come from the algebraic closure of $K$,
possibly not $K$ itself.
We may assume $A$ is triangular.
The characteristic polynomial of $A$ is now
\begin{equation*}
\prod_{i<n}(x-a^i_i).
\end{equation*}
Since the product is independent of the order of the factors,
so is the product
$\prod_{i<n}(A-a^i_i\cdot\Id)$.
We have to show that this product is $0$.
Column $j$ of the product is
\begin{equation*}
\prod_{i<n}(A-a^i_i\cdot\Id)\mathbf e_j.
\end{equation*}
However, by \eqref{eqn:Ae},
\begin{equation}\label{eqn:A-ajj}
(A-a^j_j\cdot\Id)\mathbf e_j
=A\mathbf e_j-a^j_j\mathbf e_j
=\sum_{i<j}a^j_i\mathbf e_i,
\end{equation}
and in particular
\begin{equation*}
(A-a^j_j\cdot\Id)\mathbf e_j
\in\Span\{\mathbf e_i\colon i<j\}.  
\end{equation*}
By induction then,
\begin{equation*}%\label{eqn:A-aii}
\prod_{i\leq j}(A-a^i_i\cdot\Id)\mathbf e_j=\bm0.
\end{equation*}
Finally
\begin{equation*}
  \prod_{i\in n}(A-a^i_i\cdot\Id)\mathbf e_j=
  \prod_{j<i<n}(A-a^i_i\cdot\Id)\prod_{i\leq j}(A-a^i_i\cdot\Id)\mathbf e_j
  =\bm0.\qedhere  
\end{equation*}
\end{proof}

\section{Minimal polynomial}

\begin{theorem}
  For any $A$ in $M$, the subset
  \begin{equation*}
    \{f\in K[x]\colon f(A)=0\}
  \end{equation*}
  of $K[x]$ is a nonzero ideal.
\end{theorem}

\begin{proof}
  The set is easily an ideal.
  It is nontrivial for containing the characteristic polynomial of $A$;
  alternatively, since $\dim M=n^2$,
  there must be some coefficients $f_i$, not all $0$, for which
  \begin{equation*}
    f_0+f_1\cdot A+\dots+f_{n^2}\cdot A^{n^2}=0.\qedhere
  \end{equation*}
\end{proof}

Since $K[x]$ is a principal-ideal domain,
the ideal of the theorem has a monic generator,
called the \textbf{minimal polynomial} of $A$.
This polynomial therefore is a factor of the characteristic polynomial of $A$.
In particular, every zero in $K$ of the minimal polynomial
is a zero of the characteristic polynomial.

\begin{theorem}\label{thm:zeroes}
  In a field, every zero of the characteristic polynomial
  of a square matrix over the field
  is a zero of the minimal polynomial.
  Hence every irreducible factor of the characteristic polynomial
  is a factor of the minimal polynomial.
\end{theorem}

\begin{proof}
  A zero of the characteristic polynomial of $A$
  is just an eigenvalue of $A$.
  Let $\lambda$ be an eigenvalue,
  with corresponding eigenvector $\bm b$.
  Thus
  \begin{gather*}
    A\bm b=\lambda\bm b,\\
    A^j\bm b=\lambda^j\bm b,\\
    f(A)\bm b=f(\lambda)\bm b
  \end{gather*}
  for all $f(x)$ in $K[x]$.
  In particular,
  \begin{equation*}
    f(A)=0\implies f(\lambda)=0.
  \end{equation*}
  If $f$ is the minimal polynomial of $A$,
  then $f(A)=0$, so $f(\lambda)=0$.
\end{proof}

\begin{theorem}
  A square matrix is diagonalizable if and only if
  its minimal polynomial is the product of distinct linear factors.
\end{theorem}

\begin{proof}
  Suppose $A$ in $M$ is diagonalizable, so that,
  for some $B$ in $\units M$, for some $\lambda_j$ in $K$,
  \begin{equation*}
    AB=B\diag(\lambda_j\colon j\in n).
  \end{equation*}
  Letting column $j$ of $B$ be $\bm b_j$, we have
  \begin{gather*}
    A\bm b_j=\lambda_j\bm b_j,\\
    (A-\lambda_j\cdot\Id)\bm b_j=\bm0.
  \end{gather*}
  Letting
    $m=\size{\{\lambda_j\colon j\in n\}}$,
  we may assume
  \begin{equation*}
    \{\lambda_j\colon j\in n\}
    =\{\lambda_i\colon i\in m\}.
  \end{equation*}
  For all $j$ in $n$, we have
  \begin{equation*}
    \prod_{i\in m}(A-\lambda_i\cdot\Id)\bm b_j=\bm0.
  \end{equation*}
  The $\bm b_j$ being linearly independent, letting
  \begin{equation}\label{eqn:f(x)}
    f(x)=\prod_{i\in m}(x-\lambda_i),
  \end{equation}
  we conclude $f(A)=0$,
  so the minimal polynomial of $A$ is a factor of $f(x)$.
  (It is the same as $f(x)$,
  since the $\lambda_i$ are eigenvectors of $A$,
  and each of these must be a zero of the minimal polynomial,
  by Theorem \ref{thm:zeroes}.)

  Suppose conversely $f(x)$ as given by \eqref{eqn:f(x)},
  where again the $\lambda_i$ are all distinct,
  is the minimal polynomial of $A$.
  In particular, $f(A)=0$.
  If $j\in m$, we can define $g_j(x)$ in $K[x]$ by
  \begin{equation}\label{eqn:gj}
    (x-\lambda_j)g_j(x)=f(x).
  \end{equation}
  The $\lambda_j$ being distinct,
  the greatest common divisor of the $g_j(x)$ in $K[x]$ is unity.
  Since $K[x]$ is a Euclidean domain,
  by B\'ezout's Lemma there are $q_j(x)$ in $K[x]$ such that
  \begin{equation*}
    \sum_{j\in m}g_j(x)q_j(x)=1.
  \end{equation*}
  Then
  \begin{equation*}
    \sum_{j\in m}g_j(A)q_j(A)=\Id.
  \end{equation*}
  Thus for every $\bm v$ in $K^n$,
  when we define
  \begin{equation}\label{eqn:gq}
    g_j(A)q_j(A)\bm v=\bm w_j,
  \end{equation}
  we have
  \begin{equation*}
    \sum_{j\in m}\bm w_j=\bm v.
  \end{equation*}
  But then since $f(A)=0$,
  from \eqref{eqn:gj} and \eqref{eqn:gq} we have
  \begin{equation*}
    \bm0=f(A)q_j(A)\bm v=(A-\lambda_j)\bm w_j,
  \end{equation*}
  so that $\bm w_j$ belongs to the eigenspace associated with $\lambda_j$.
  In particular, by Theorem \ref{thm:eig-ind},
  there must be $n$ linearly independent eigenvectors,
  so $A$ is diagonalizable.
\end{proof}

\chapter{Jordan Normal Form}

The presentation here is based mainly on Lang \cite{Lang-LA}.

\section{Cyclic spaces}

Supposing $\lambda$ is an eigenvalue of the $n\times n$ matrix $A$, we let
\begin{equation}\label{eqn:Bl}
  B_{\lambda}=A-\lambda\cdot\Id.
\end{equation}
If $\bm v_0$ is a corresponding eigenvector, this means
\begin{align*}
  \bm v_0&\neq\bm0,
  &B_{\lambda}\bm v_0&=\bm0.
\end{align*}
If possible now, let $B_{\lambda}\bm v_1=\bm v_0$.  Then
\begin{align*}
  A\bm v_1&=\lambda\bm v_1+\bm v_0,
  &B_{\lambda}{}^2\bm v_1=\bm 0.
\end{align*}
Suppose, in this way, for some $s$,
when $0<k<s$,
\begin{align*}
  A\bm v_k&=\lambda\bm v_k+\bm v_{k-1},
  &B_{\lambda}{}^{k+1}\bm v_k=\bm 0.
\end{align*}
Then defining $P$ as the $n\times s$ matrix
\begin{equation*}
  \left(
  \begin{array}{c|c|c}
    \bm v_0&\cdots&\bm v_{s-1}
  \end{array}
  \right),
\end{equation*}
we have
\begin{multline}\label{eqn:PA}
AP=\left(
  \begin{array}{c|c|c}
    A\bm v_0&\cdots&A\bm v_{s-1}
  \end{array}
  \right)\\
  =\left(
  \begin{array}{c|c|c|c}
    \lambda\bm v_0&\bm v_0+\lambda\bm v_1&\cdots&\bm v_{s-2}+\lambda\bm v_{s-1}
  \end{array}
  \right)
  =PJ,
\end{multline}
where $J$ is the $s\times s$ matrix
\begin{equation*}
 \begin{pmatrix}
\lambda&      1&     0&  \dots&      0\\
      0&\lambda&     1& \ddots& \vdots\\
      0&      0&\ddots& \ddots&      0\\
 \vdots&       &\ddots&\lambda&		  1\\
      0&    \hdotsfor2&      0&\lambda
\end{pmatrix}.
\end{equation*}

\begin{theorem}\label{thm:P}
  The columns of the matrix $P$ just defined are linearly independent.
\end{theorem}

\begin{proof}
  Writing $\bm v$ for $\bm v_{s-1}$ and $B$ for $B_{\lambda}$, we have
  \begin{equation*}
    P=\left(
  \begin{array}{c|c|c}
    B^{s-1}\bm v&\cdots&B^0\bm v.
  \end{array}
  \right).
  \end{equation*}
  We show the columns are linearly independent.
  Suppose for some scalars $c^i$,
  \begin{equation*}
    \sum_{i<s}c^i\cdot B^{s-i}\bm v=\bm0.
  \end{equation*}
  Then $f(B)\bm v=\bm0$, where
  \begin{equation*}
    f(x)=\sum_{i<s}c^ix^{s-i}.
  \end{equation*}
  However, also $g(B)\bm v=\bm0$, where
  \begin{equation*}
    g(x)=x^s.
  \end{equation*}
  Letting $h$ be the greatest common factor of $f$ and $g$,
  we have
  \begin{equation*}
    h(B)\bm v=\bm0.
  \end{equation*}
  Also, $h(x)=x^r$ for some $r$, where $r\leq s$.
  When $r<s$, we have
  \begin{equation*}
    B^r\bm v=\bm v_{s-1-r},
  \end{equation*}
  which is not $\bm0$.
  Thus $h(x)=x^s$, and therefore $f=0$.
\end{proof}

In the proof, $\Span\{\bm v_k\colon k\in s\}$
is a \textbf{$B$-cyclic} subspace of $K^n$,
because it is,
for some one vector $\bm v$, spanned by the vectors $B^k\bm v$.
The space is then \textbf{$B$-invariant,}
because closed under multiplication by $B$.

\section{Direct sums}

Suppose $V$ is a vector space over $K$,
and for some $m$ in $\N$,
and for each $j$ in $n$,
$V_j$ is a subspace of $V$.
If the homomorphism
\begin{equation*}
(v_i\colon i<n)\mapsto\sum_{i<n}v_i
\end{equation*}
from $\prod_{i<n}V_i$ to $V$ is surjective,
then $V$ is the \textbf{sum}
of the subspaces $V_i$,
and we may write
\begin{equation*}
V=V_0+\dots+V_{n-1}=\sum_{i<n}V_i.
\end{equation*}
If, further, the homomorphism is injective,
then $V$ is the \textbf{direct sum} of the $V_i$,
and we may write
\begin{equation*}
V=V_0\oplus\dots\oplus V_{n-1}=\bigoplus_{i<n}V_i.
\end{equation*}

Given $B$ in $M$, we shall understand
\begin{equation*}
\ker B=\{\bm x\in K^n\colon B\bm x=\bm0\}.
\end{equation*}


\begin{lemma}
  If $f$ and $g$ in $K[x]$ are co-prime,
  then for all $A$ in $M$,
  \begin{equation*}
    \ker(f(A)g(A))=\ker f(A)\oplus\ker g(A).
  \end{equation*}
\end{lemma}

\begin{proof}
By B\'ezout's Lemma
for some $q$ and $r$ in $K[x]$,
\begin{gather*}
  qf+rg=1,\\
  q(A)f(A)+r(A)g(A)=\Id.
\end{gather*}
For all $\bm v$ in $K^n$ then,
\begin{equation*}
  q(A)f(A)\bm v+r(A)g(A)\bm v=\bm v.
\end{equation*}
Suppose now
\begin{equation*}
  \bm w\in\ker(f(A)g(A)).
\end{equation*}
Then
\begin{align*}
  r(A)g(A)\bm w&\in\ker f(A),
  &q(A)f(A)\bm w&\in\ker g(A),
\end{align*}
and so
\begin{equation*}
  \bm w\in\ker f(A)+\ker g(A).
\end{equation*}
Conversely, suppose
\begin{align*}
  \bm u&\in\ker f(A),
  &\bm v&\in\ker g(A).
\end{align*}
Then
\begin{multline*}
  \bm u
  =q(A)f(A)\bm u+r(A)g(A)\bm u\\
  =r(A)g(A)\bm u
  =r(A)g(A)(\bm u+\bm v)
\end{multline*}
and likewise
\begin{equation*}
  \bm v=q(A)f(A)(\bm u+\bm v).
\end{equation*}
This shows $(\bm u,\bm v)\mapsto\bm u+\bm v$ is injective.
\end{proof}

\begin{theorem}\label{thm:sum}
  If each of some $f$ in $K[x]$ is prime to the others,
  then for all $A$ in $M$,
  \begin{equation*}
  \ker\prod_ff(A)=\bigoplus_f\ker f(A).
  \end{equation*}
\end{theorem}

\section{Kernels}

Suppose $A$ in $M$ has characteristic polynomial $f$,
and $K$ is algebraically closed.
Then
\begin{equation*}
  f=\prod_{j<m}(x-\lambda_j)^{r_j}
\end{equation*}
for some $\lambda_j$ in $K$ and $r_j$ in $\N$.
By the Cayley--Hamilton Theorem,
\begin{equation*}
  \ker\bigl(f(A)\bigr)=K^n.
\end{equation*}
Letting
\begin{equation*}
B_j=A-\lambda_j\cdot\Id,
\end{equation*}
we have now,
by Theorem \ref{thm:sum},
\begin{equation}\label{eqn:Kn=sum}
K^n=\bigoplus_{j<m}\ker\left(B_j{}^{r_j}\right).
\end{equation}

\begin{theorem}
For all $B$ in $M$,
for all $s$ in $\N$,
$\ker(B^s)$
is the direct sum of $B$-cyclic subspaces.
\end{theorem}

\begin{proof}
We shall prove the claim for every $B$-invariant subspace of $\ker(B^s)$.
We use induction on the dimension of the subspace.
If the dimension is $0$, the claim is vacuously true.
Suppose $V$ is a $B$-invariant subspace of $\ker(B^s)$
having positive dimension.
Then
\begin{align*}
  V&\nincluded\ker(B^0),
  &\ker(B^0)&\included\dots\included\ker(B^s),
  &V&\included\ker(B^s),
\end{align*}
so for some $r$,
\begin{align*}
  V&\nincluded\ker(B^{r-1}),
  &V&\included\ker(B^r).
\end{align*}
Then
\begin{equation*}
VB\included V\cap\ker(B^{r-1})\pincluded V.
\end{equation*}
This shows
\begin{equation*}
VB\pincluded V.
\end{equation*}
As an inductive hypothesis, we assume
\begin{equation}\label{eqn:oplus}
VB=\bigoplus_{i<m}W_i,
\end{equation}
where each $W_i$ is $B$-cyclic.
Then for some $\bm w_i$ in $V$,
for some $r_i$ in $\N$,
\begin{align}\label{eqn:basis}
W_i&=\Span\{B^j\bm w_i\colon j<r_i\},&
\bm0&= B^{r_i}\bm w_i.
\end{align}
For some $\bm v_i$ in $V$,
\begin{equation}\label{eqn:wvB}
\bm w_i=B\bm v_i.
\end{equation}
Now let
\begin{equation*}
  V_i=\Span\{B^j\bm v_i\colon i\leq r_i\}.
\end{equation*}
Then $V_i$ is a $B$-cyclic space, since $B^{r_i+1}\bm v_i=\bm0$.
We shall show that the sum of the $V_i$ is direct.
An arbitrary element of $V_i$ is $f_i(B)\bm v_i$ for some $f_i$ in $K[x]$
such that
\begin{equation}\label{eqn:degfi}
\deg f_i\leq r_i.
\end{equation}
Suppose
\begin{equation*}
\bm0=\sum_{i<m}f_i(B)\bm v_i.
\end{equation*}
Then by \eqref{eqn:wvB},
\begin{equation}\label{eqn:Bw}
\bm0=\sum_{i<m}f_i(B)\bm w_i.
\end{equation}
But then by \eqref{eqn:oplus},
\begin{equation*}
\bm0=f_i(B)\bm w_i,
\end{equation*}
so by \eqref{eqn:degfi}, and \eqref{eqn:basis}, and Theorem \ref{thm:P},
\begin{equation*}
f_i=c_ix^{r_i}
\end{equation*}
for some $c_i$ in $K$.
In this case,
we can write \eqref{eqn:Bw} as
\begin{equation*}
\bm0=\sum_{i<m}c_iB^{r_i-1}\bm w_i,
\end{equation*}
which implies that each $c_i$ is $0$.  Thus $f_i=0$.

Now we can let
\begin{equation*}
V'=\bigoplus_{i<m}V_i.
\end{equation*}
Then $V'\included V$.
By construction, $V_iB=W_i$, so
\begin{equation*}
V'B=W=VB.
\end{equation*}
Therefore
\begin{equation*}
V=V'+\ker B.
\end{equation*}
Each element of $\ker B$
constitutes a basis of a one-dimensional $B$-cyclic space.
Then $V$ is the direct sum of some of these spaces, along with the $V_i$,
as desired.
\end{proof}

In the notation of \eqref{eqn:Kn=sum},
there are $n_j$ in $\N$,
and then there are $\bm v_{jk}$ in $\ker(B_j{}^{r_j})$
and $s_{jk}$ in $\N$ such that
\begin{align*}
  B_j{}^{s_{jk}-1}\bm v_{jk}&\neq\bm0,
  &B_j{}^{s_{jk}}\bm v_{jk}&=\bm0,
\end{align*}
and
\begin{equation*}
  \ker(B_j{}^{r_j})=\bigoplus_{k<n_j}\Span\{B_j{}^i\bm v_{jk}\colon i<s_{jk}\}.
\end{equation*}
Now we may let
\begin{equation*}
  P=\left(
  \begin{array}{c|c|c}
    P_0&\cdots&P_{m-1}
  \end{array}
  \right),
\end{equation*}
where, for each $j$ in $m$,
\begin{equation*}
  P_j=
  \left(
  \begin{array}{c|c|c}
    P_{j0}&\cdots&P_{j,n_j-1}
  \end{array}
  \right),
\end{equation*}
where, for each $k$ in $n_j$,
\begin{equation*}
  P_{jk}=
  \left(
  \begin{array}{c|c|c}
B_j{}^{s_{jk}-1}\bm v_{jk}&\cdots&\bm v_{j,k}
  \end{array}
  \right).
\end{equation*}
Then $PAP\inv$ is a \textbf{Jordan normal form} for $A$.
Indeed, by the considerations yielding \eqref{eqn:PA},
\begin{equation*}
PAP\inv=\diag(\Lambda_0,\dots,\Lambda_{m-1}),
\end{equation*}
where, for each $j$ in $m$,
\begin{equation*}
\Lambda_j=\diag(\Lambda_{j0},\dots,\Lambda_{j,n_j-1}),
\end{equation*}
where, for each $k$ in $n_j$,
$\Lambda_{j,k}$ is the $s_{jk}\times s_{jk}$ matrix
\begin{equation*}
 \begin{pmatrix}
\lambda_j&      1&     0&  \dots&      0\\
      0&\lambda_j&     1& \ddots& \vdots\\
      0&      0&\ddots& \ddots&      0\\
 \vdots&       &\ddots&\lambda_j&		  1\\
      0&    \hdotsfor2&      0&\lambda_j
\end{pmatrix}.
\end{equation*}

%\bibliographystyle{plain}
%\bibliography{../../references}

\begin{thebibliography}{1}

\bibitem{MR0276251}
Kenneth Hoffman and Ray Kunze.
\newblock {\em Linear algebra}.
\newblock Second edition. Prentice-Hall, Inc., Englewood Cliffs, N.J., 1971.

\bibitem{CK-II}
Cemal Ko{\c c}.
\newblock Topics in linear algebra.
\newblock Printed by the Department of Mathematics, Middle East Technical
  University, 2010.
\newblock No ISBN.

\bibitem{Lang-LA}
Serge Lang.
\newblock {\em Linear Algebra}.
\newblock Addison-Wesley, Reading MA, second edition, 1970.
\newblock World Student Series edition, second printing, 1972.

\bibitem{Lang-alg}
Serge Lang.
\newblock {\em Algebra}.
\newblock Addison-Wesley, Reading MA, third edition, 1993.
\newblock Reprinted with corrections, 1997.

\bibitem{MR2125693}
Steven Roman.
\newblock {\em Advanced linear algebra}, volume 135 of {\em Graduate Texts in
  Mathematics}.
\newblock Springer, New York, second edition, 2005.

\end{thebibliography}


\end{document}