\documentclass[a4paper,11pt,twoside,reqno]{amsart}
\usepackage{typearea}
\usepackage{graphicx}
\title{Groups and Rings}
\author{David Pierce}
\date{\today}

\address{Mathematics Dept\\Middle East Tech.\ Univ.\\Ankara 06531, Turkey}

\email{dpierce@metu.edu.tr}

\urladdr{http://metu.edu.tr/~dpierce/}

%\usepackage[notref,notcite]{showkeys}

\usepackage{url}
\usepackage{amsmath,amssymb,amsthm,amscd}
\usepackage[mathscr]{euscript}
\usepackage{upgreek}
\usepackage{multicol}
\usepackage{stmaryrd}  % \triangle{left,right}eqslant
\usepackage[matrix,arrow]{xy}
\usepackage{hfoldsty}
\usepackage{verbatim}
\usepackage{paralist}

%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  Theorems
%
%%%%%%%%%%%%%%%%%%%%%%%%

%\swapnumbers

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem*{porism}{Porism}
\newtheorem*{corollary}{Corollary}

\theoremstyle{definition}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{example}[theorem]{Example}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\makeindex

\usepackage{bm}
\newcommand{\tuple}[1]{\bm{#1}}

\newcommand{\vscr}[1]{#1'}            % successor ordinal (v for von Neumann)
\newcommand{\vnn}{\upomega}        % my usual preference for this
\newcommand{\included}{\subseteq}      % [the name suggests the meaning here]
\newcommand{\pincluded}{\subset}      % [the name suggests the meaning here]
\renewcommand{\leq}{\leqslant}
\renewcommand{\geq}{\geqslant}
\renewcommand{\emptyset}{\varnothing}
\renewcommand{\land}{\mathrel{\&}}
\renewcommand{\setminus}{\smallsetminus}
\renewcommand{\phi}{\varphi}

\newcommand{\stnd}[1]{\mathbb{#1}}
\newcommand{\N}{\stnd{N}}
\newcommand{\Z}{\stnd{Z}}         % integers
\newcommand{\Q}{\stnd{Q}}         % rationals
\newcommand{\C}{\stnd{C}}         % complex numbers
\newcommand{\R}{\stnd{R}}         % real numbers
\newcommand{\F}{\stnd{F}}         % 
\newcommand{\Ham}{\stnd{H}}         % quaternions 
\newcommand{\Oct}{\stnd{O}}         % octonions
\newcommand{\id}{\operatorname{id}}          % identity-map
\newcommand{\gid}{\operatorname{e}}  % identity of group
\newcommand{\inv}{^{-1}}                % mult. inverse

\newcommand{\Mat}[2][n]{\operatorname M_{#1}(#2)}
\newcommand{\MatR}[1][n]{\Mat[#1]{R}}
\newcommand{\MatZ}[1][n]{\Mat[#1]{\Z}}
\newcommand{\GL}[2][n]{\operatorname{GL}_{#1}(#2)}
\newcommand{\GLZ}[1][n]{\GL[#1]{\Z}}
\newcommand{\GLR}[1][n]{\GL[#1]{R}}
\newcommand{\Kfg}{\mathrm V}     % Klein four group
\newcommand{\quat}{\mathrm Q_8}  % Quaternion group

\newcommand{\str}[1]{\mathfrak{#1}}     % structure
\newcommand{\qsep}{\;}                 % follows a quantified variable
\newcommand{\Forall}[1]{\forall{#1}\qsep }
\newcommand{\Exists}[1]{\exists{#1}\qsep }
\newcommand{\modsim}{/\mathord{\sim}}  % modulo the eq-ren \sim
\newcommand{\eqc}[1]{[#1]}             % equivalence-class

\newcommand{\divides}{\mathrel{|}}
\newcommand{\ndivides}{\mathrel{\nmid}}
\newcommand{\order}[1]{\lvert#1\rvert}
\newcommand{\gpgen}[1]{\langle#1\rangle}% subgroup generated by #1
\newcommand{\unordered}[2]{[#2]^{#1}}  % unordered #1-tuples from #2
\newcommand{\free}[1]{\operatorname{F}(#1)}  % free group on #1
\newcommand{\fggen}{I}  % generating set of a free group
\newcommand{\gprels}{B} % relations
\newcommand{\setactedon}{A}  % set acted on by a group

\newcommand{\setcolon}{\colon}

\newcommand{\subgp}{<}              % subgroup
\newcommand{\nsubgp}{\vartriangleleft}  % normal subgroup
\newcommand{\nsupgp}{\vartriangleright}  % normal supergroup
\newcommand{\psubgp}{\lneqq}

\newcommand{\congruence}{\equiv}
\newcommand{\siml}{\congruence_{\ell}^H}
\newcommand{\simr}{\congruence_{\mathrm r}^H}

\newcommand{\weakprod}{\sideset{}{^{\mathrm{w}}}\prod}
\newcommand{\textweakprod}{\prod^{\mathrm w}}
\newcommand{\freeprod}{\sideset{}{^*}\prod}
\newcommand{\textfreeprod}{\prod^*}
\newcommand{\gpres}[2]{\gpgen{#1\mid#2}}% group on #1 with rel'ns #2
\newcommand{\centr}[1]{\operatorname{C}(#1)}  % center
\newcommand{\cseries}[2]{\operatorname{C}_{#1}(#2)} % central series
\newcommand{\cseriesplain}[1]{\operatorname{C}_{#1}} % central series
\newcommand{\centralizer}[2]{\operatorname{C}_{#2}(#1)} % centralizer
\newcommand{\normalizer}[2]{\operatorname{N}_{#2}(#1)}
\newcommand{\dsubgp}[2]{#2^{(#1)}}  % n-th derived subgroup of #2, where n=#1.
\newcommand{\tsubgp}[1]{#1_{\mathrm{t}}} % torsion sub-group

\DeclareMathOperator{\im}{im}          % image

\newcommand{\family}[1]{\mathcal{#1}}  % family (of sets)
\newcommand{\class}[1]{\mathbf{#1}}    % class

\newcommand{\unit}[1]{{#1}^{\times}}    % group of units of a ring
\newcommand{\Zmod}[1]{\Z_{#1}}
\newcommand{\Zmodu}[1]{\unit{\Zmod{#1}}}
\DeclareMathOperator{\lcm}{lcm}
\newcommand{\rest}[1]{\restriction{#1}}% restriction of function to #1
\newcommand{\modulo}{\emph{modulo}}

\newcommand{\bracket}{\operatorname b}  % (Lie) bracket

% Concerning permutations:

\newcommand{\sgn}[1]{\operatorname{sgn}(#1)}
\newcommand{\sq}[1]{q_{\sigma}(#1)}  % used to define sgn

\newcommand{\Sym}[1]{\operatorname{Sym}(#1)}
\newcommand{\Alt}[1]{\operatorname{Alt}(#1)}       % alternating group
\newcommand{\Dih}[1]{D_{#1}}       % dihedral group

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\mi}{\mathrm i}
\newcommand{\mj}{\mathrm j}
\newcommand{\mk}{\mathrm k}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\setimb}[1]{[#1]}   % image of a set, using brackets
\newcommand{\abs}[1]{\left\lvert#1\right\rvert}  % absolute value
\newcommand{\size}[1]{\lvert#1\rvert}  % cardinality

\newcommand{\so}[1]{\operatorname{E}(#1)}
\newcommand{\End}[1]{\operatorname{End}(#1)}
\newcommand{\Aut}[1]{\operatorname{Aut}(#1)}
\newcommand{\Hom}[1]{\operatorname{Hom}(#1)}
\newcommand{\Inn}[1]{\operatorname{Inn}(#1)}
\newcommand{\Der}[1]{\operatorname{Der}(#1)}

%\newcommand{\pid}{\textsc{pid}}
\newcommand{\pid}{PID}
\newcommand{\ufd}{UFD}
\newcommand{\ed}{ED}

\newcommand{\primei}{\mathfrak{p}}      % a prime ideal
\newcommand{\maxi}{\mathfrak{m}}        % a maximal ideal
\newcommand{\supp}[1]{\operatorname{supp}(#1)}
\newcommand{\Supp}[1]{\operatorname{supp}[#1]}
\newcommand{\symdiff}{\mathbin{\triangle}}

%\newcommand{\lang}{\mathcal{L}}        % a language or signature

\newcommand{\pow}[1]{\mathscr{P}(#1)}  % power set
\let\oldsqrt\sqrt
\renewcommand{\sqrt}[2][1]{\oldsqrt{\vphantom{#1}}{#2}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\renewcommand{\theequation}{\roman{equation}}

\renewcommand{\thepart}{\Roman{part}}

\begin{document}
%\frontmatter
\maketitle

\section*{Preface}

These are lecture notes for the first semester of a
one-year graduate course in algebra.  The main reference for the
course is Hungerford's \emph{Algebra} \cite{MR600654}. 
The present notes are mostly derived from the notes I kept while
teaching Math
503 at METU in the fall of 2008.  However, \S~\ref{sect:N} has a
different source: a course called Non-standard Analysis, which I gave
at the Nesin Mathematics Village, \c Sirince, in the summer of 2009.  I
have built up Part~\ref{part:N} around this section. 

I edit these notes as I teach Math 503 in the fall of 2009.  I do some
reorganizing according to how I should like to do things in the future.

%\newpage

\tableofcontents
\newpage

\part{Foundations}\label{part:N}
%\setcounter{section}{-1}

\section{Functions and relations}\label{sect:f}
%I start with a brief set-theoretic review of \emph{functions.}
If $A$ and $B$ are sets, then their \textbf{cartesian
  product,}\index{cartesian product}
denoted by
\begin{equation*}
  A\times B,
\end{equation*}
is the set $\{(x,y)\colon x\in A\land y\in B\}$.
Here the \textbf{ordered pair}\index{ordered pair} $(x,y)$ is defined so that
\begin{equation*}
(a,b)=(x,y)\iff a=x\land b=y.
\end{equation*}
One definition that accomplishes this is
$(x,y)=\{\{x\},\{x,y\}\}$, but we never actually need the precise
definition. 
An \textbf{ordered triple}\index{ordered triple} $(x,y,z)$ can be defined as $((x,y),z)$, and
so forth.

A \textbf{function}\index{function} or \textbf{map}\index{map} from $B$ to
$A$ is a subset 
$f$ of $B\times A$ such that, for each $b$ in $B$, there is exactly
one $a$ in $A$ such that $(b,a)\in f$.  Then instead of $(b,a)\in f$,
we write 
\begin{equation}\label{eqn:f}
  f(b)=a.
\end{equation}
I assume the reader is familiar with the \emph{kinds} of functions
from $B$ to $A$:
injective, surjective, and so forth.

A \textbf{singulary operation}\index{singulary}\footnote{The word
  \textbf{unary}\index{unary} is  
  more common, but less etymologically correct.} on $A$ is a function
from $A$ to itself; a \textbf{binary}\index{binary} operation on $A$
is a function 
from $A\times A$ to $A$.  A \textbf{binary relation} on $A$ is a
subset of $A\times A$; if $R$ is such, and $(a,b)\in R$, we often
write
\begin{equation*}
  a\mathrel Rb.
\end{equation*}
However, a singulary operation on $A$ is a particular kind of binary
relation on $A$: a kind of relation for which we already have the
special notation in~\eqref{eqn:f}.  I assume the reader is familiar
with other kinds of binary relations, namely orderings. 

\section{An axiomatic development of the natural numbers}\label{sect:N}

The set of natural numbers, commonly denoted by
\begin{equation*}
  \N,
\end{equation*}
can be understood as having
\begin{enumerate}
\item 
a distinguished \textbf{initial element,}\index{initial element} denoted by
\begin{equation*}
  0
\end{equation*}
and called \textbf{zero,}\index{zero} and
\item
a distinguished singulary operation of
\textbf{succession,}\index{succession, successor}
  denoted by 
  \begin{equation*}
  n\mapsto n+1,
  \end{equation*}
  where $n+1$ is called the \textbf{successor} of $n$. 
\end{enumerate}
I propose to refer to the ordered triple $(\N,0,n\mapsto n+1)$ as an
\emph{iterative structure.}

In general, by an \textbf{iterative structure,}\index{iterative} I mean any set that has a
distinuished element and a distinguished singulary operation.  Here
the underlying set is sometimes called the
\textbf{universe}\index{universe} of the 
structure.  If one wants a simple notational distinction between a
structure and its universe, and the universe is $A$, then the
structure might be denoted by $\str A$.  (Here $\str A$ is
  the Fraktur version of $A$.  See Appendix~\ref{app:German}.)

 The
iterative structure $(\N,0,n\mapsto n+1)$ is
distinguished among iterative structures for satisfying the
following axioms.
\begin{enumerate}
\item\label{ax:0}
$0$ is not a successor: $0\neq n+1$.
\item\label{ax:inj}
Succession is injective: if $m+1=n+1$, then $m=n$.
\item\label{ax:ind}
the structure admits \textbf{proof by induction,}\index{induction} in
the sense that, 
of all subsets of the universe, the only
  subset $A$ with the following two closure
  properties is the whole universe:
  \begin{enumerate}
  \item 
$0\in A$;
\item
for all $n$, if $n\in A$, then $n+1\in A$.
  \end{enumerate}
\end{enumerate}

These axioms seem to have been discovered originally by
Dedekind~\cite[II, VI (71), p.~67]{MR0159773}, although they were also
written down by 
Peano~\cite{Peano} and are often known as the \textbf{Peano
  axioms.}\index{Peano} 

\begin{theorem}[Recursion]
For every iterative structure $(A,b,f)$, there is a unique
\textbf{homomorphism}\index{homomorphism} to this structure from
$(\N,0,n\mapsto n+1)$: 
that is, there is a unique function $h$ from $\N$ to 
$A$ such that
\begin{enumerate}
\item 
$h(0)=b$,
\item
$h(n+1)=f(h(n))$ for all $n$ in $\N$.
\end{enumerate}
\end{theorem}

\begin{proof}
We seek $h$ as a particular subset of $\N\times A$.
Let $B$ be the set whose elements are the subsets $C$ of $\N\times
A$ such that, if $(x,y)\in C$, then either 
\begin{enumerate}
\item 
$(x,y)=(0,b)$ or else
\item $C$ has an element
$(u,v)$ such that $(x,y)=(u+1,f(v))$.
\end{enumerate}
Let $R=\bigcup B$; so $R$ is a subset of $\N\times A$.  We may say $R$
is a \emph{relation} from $\N$ to $A$.  If
$(x,y)\in R$, we may write also 
\begin{equation*}
x\mathrel Ry.  
\end{equation*}
Since ${(0,b)}\in
B$, we have $0\mathrel Rb$.  If $n\mathrel Ry$, then $(n,y)\in C$ for
some $C$ in $B$, but then $C\cup\{(n+1,f(y))\}\in B$ by definition of $B$, so
$(n+1)\mathrel R f(y)$.  Therefore $R$ is the desired function $h$,
provided it is a \emph{function} from $\N$ to $A$.  Proving this has
two stages.
\begin{asparaenum}[1.]
  \item
For all $n$ in $\N$, there is $y$ in $A$ such that $n\mathrel Ry$.
Indeed, let $D$ be the set of such $n$.  Then we have just seen that
$0\in D$, and if $n\in D$, then $n+1\in D$.  By induction, $D=\N$.
\item
For all $n$ in $\N$, if $n\mathrel Ry$ and $n\mathrel Rz$, then $y=z$.
Indeed, let $E$ be the set of such $n$.  Suppose $0\mathrel R y$.  Then
$(0,y)\in C$ for some $C$ in $B$.  Since $0$ is not a successor, we
must have $y=b$, by definition of $B$.  Therefore $0\in E$.  Suppose
$n\in E$, and $(n+1)\mathrel Ry$.  Then $(n+1,y)\in C$ for some $C$ in
$B$.  Again since $0$ is not a successor, we must have
$(n+1,y)=(m+1,f(v))$ for some $(m,v)$ in $C$.  Since succession is
injective, we must have $m=n$.  Since $n\in E$, we know $v$ is
\emph{unique} such that $n\mathrel Rv$.  Since $y=f(v)$, therefore $y$
is unique such that $(n+1)\mathrel Ry$.  Thus $n+1\in E$.  By
induction, $E=\N$.
\end{asparaenum}

So $R$ is the desired function $h$.
Finally, $h$ is unique by induction.
\end{proof}

\begin{corollary}
For every set $A$ with a distinguished element $b$, and for every function
$F$ from $\N\times B$ to $B$, there is a unique function $H$ from $\N$ to
$A$ such that
\begin{enumerate}
\item 
$H(0)=b$,
\item
$H(n+1)=F(n,H(n))$ for all $n$ in $\N$.
\end{enumerate}
\end{corollary}

\begin{proof}
Let $h$ be the unique homomorphism from $(\N,0,n\mapsto n+1)$ to
$(\N\times A,(0,b),f)$, where $f$ is the operation
$(n,x)\mapsto(n+1,F(n,x)))$.  In particular, $h(n)$ is always an
ordered pair.  By induction, the 
first entry of $h(n)$ is always $n$; so there is a function $H$ from
$\N$ to $A$ such that $h(n)=(n,H(n))$.  Then $H$ is as desired.  By
induction, $H$ is unique.
\end{proof}

We can now use recursion to \emph{define} the binary operation
$(x,y)\mapsto x+y$
of \textbf{addition,}\index{addition} along with the binary operation
$(x,y)\mapsto x\cdot y$ or $(x,y)\mapsto xy$
of \textbf{multiplication,}\index{multiplication} on
$\N$.  The definitions are:
\begin{align*}
    n+0&=n,&     n+(m+1)&=(n+m)+1,&
n\cdot0&=0,& n\cdot(m+1)&=n\cdot m+n.
\end{align*}

\begin{lemma}\label{lem:+}
For all $n$ and $m$ in $\N$,
\begin{align*}
  0+n&=n,&(m+1)+n&=(m+n)+1.
\end{align*}
\end{lemma}

\begin{proof}
  Induction.
\end{proof}

\begin{theorem}\label{thm:N-comm}
Addition on $\N$ is
  \begin{enumerate}
  \item 
\textbf{commutative:}\index{commutative} $n+m=m+n$; and
\item
\textbf{associative:}\index{associative} $n+(m+k)=(n+m)+k$.
  \end{enumerate}
\end{theorem}

\begin{proof}
  Induction and the lemma.
\end{proof}

\begin{theorem}\label{thm:cancel}
  Addition on $\N$ allows \textbf{cancellation:}\index{cancellation}
if $n+x=n+y$, then $x=y$.
\end{theorem}

\begin{proof}
  Induction, and injectivity of succession.
\end{proof}

\begin{lemma}\label{lem:.}
For all $n$ and $m$ in $\N$,
\begin{align*}
  0\cdot n&=0,&(m+1)\cdot n&=m\cdot n+n.
\end{align*}
\end{lemma}

\begin{proof}
  Induction.
\end{proof}

\begin{theorem}\label{thm:mult-comm}
Multiplication on $\N$ is
  \begin{enumerate}
  \item 
commutative: $nm=mn$;
\item
\textbf{distributive}\index{distributive} over addition: $n(m+k)=nm+nk$; and
\item
associative: $n(mk)=(nm)k$.
  \end{enumerate}
\end{theorem}

\begin{proof}
  Induction and the lemma.
\end{proof}

Landau \cite{MR12:397m} proves \emph{using induction alone} that $+$
and $\cdot$ exist 
as given by the recursive definitions above.  However,
Theorem~\ref{thm:cancel} needs more than induction.  Also, the
existence of \textbf{exponentiation,}\index{exponentiation} as an
operation $(x,y)\mapsto 
x^y$ such that
\begin{align*}
  n^0&=1,& n^{m+1}&=n^m\cdot n,
\end{align*}
requires more than induction.

The usual ordering $<$ of $\N$ is defined recursively as follows.
First note that $m\leq n$ means simply $m<n$ or $m=n$.  Then the
definition of $<$ is:
\begin{enumerate}
\item 
$m\not<0$;% for \emph{no} $m$ in $\N$;
\item
$m<n+1$ if and only if $m\leq n$.
\end{enumerate}
In particular, $n<n+1$.
Really, it is the sets $\{x\in\N\colon x<n\}$ that are defined by
recursion:
\begin{enumerate}
\item 
$\{x\in\N\colon x<0\}=\emptyset$;
\item
$\{x\in\N\colon x<n+1\}=\{x\in\N\colon x<n\}\cup\{n\}$.
\end{enumerate}
We now have $<$ as a binary relation on $\N$;
we must \emph{prove} that it is an ordering.

\begin{theorem}\label{thm:<trans}
  The relation $<$ is \textbf{transitive}\index{transitive} on $\N$,
  that is, if $k<m$ 
  and $m<n$, then $k<n$.
\end{theorem}

\begin{proof}
  Induction on $n$.
\end{proof}

\begin{lemma}
  $m\neq m+1$.
\end{lemma}

\begin{proof}
  The claim is true when $m=0$, since $0$ is not a successor.  Suppose
  the claim is true when $m=k$, that is, $k\neq k+1$.  Then $k+1\neq
  (k+1)+1$, by injectivity of succession, so the claim is true when
  $m=k+1$.  By induction, the claim is true for all $m$.
\end{proof}

\begin{theorem}\label{thm:<irr}
  The relation $<$ is \textbf{irreflexive}\index{irreflexive} on $\N$:
  $m\not<m$. 
\end{theorem}

\begin{proof}
  The claim is true when $m=0$, since $m\not<0$ by definition.
  Suppose the claim \emph{fails} when $m=k+1$.  This means $k+1<k+1$.
  Therefore $k+1\leq k$ by definition.  By the previous lemma,
  $k+1<k$.  But $k\leq k$, so $k<k+1$ by definition.  So $k<k+1$ and
  $k+1<k$; hence $k<k$ by
  Theorem~\ref{thm:<trans}, that is, the claim fails when $m=k$.  By
  induction, the claim holds for all $m$. 
\end{proof}

\begin{lemma}
%  \mbox{}
  \begin{enumerate}
  \item 
$0\leq m$.
\item
If $k<m$, then $k+1\leq m$.
  \end{enumerate}
\end{lemma}

\begin{proof}
  \begin{asparaenum}
  \item 
Induction.
\item
The claim is vacuously true when $m=0$.  Suppose it is true when
$m=n$.  Say $k<n+1$.  Then $k\leq n$.  If $k=n$, then
$k+1=n+1<(n+1)+1$.  If $k<n$, then $k+1<n+1$ by inductive hypothesis,
so $k+1<(n+1)+1$ by transitivity.  Thus the claim holds when $m=n+1$.
By induction, the claim holds for all $m$.\qedhere
  \end{asparaenum}
\end{proof}

\begin{theorem}\label{thm:<tot}
  The relation $\leq$ is \textbf{total}\index{total} on $\N$: either
  $k\leq m$ or 
  $m\leq k$.
\end{theorem}

\begin{proof}
  Induction and the lemma.
\end{proof}

Because of Theorems~\ref{thm:<trans},~\ref{thm:<irr},
and~\ref{thm:<tot}, the set $\N$ is \textbf{(strictly)
  ordered}\index{order}\index{strict} by $<$. 

\begin{theorem}\label{thm:m+x=n}
  For all $m$ and $n$ in $\N$, we have $m\leq n$ if and only if the
  equation
  \begin{equation}\label{eqn:m+x=n}
    m+x=n
  \end{equation}
is soluble in $\N$.
\end{theorem}

\begin{proof}
  By induction on $k$, if $m+k=n$, then $m\leq n$.  

Conversely, if $m\leq0$, then $m=0$ (why?), so $m+0=0$.
Suppose the equation $m+x=r$ is soluble whenever $m\leq r$, but now $m\leq r+1$.
If $m=r+1$, then $m+0=r+1$.  If $m<r+1$, then $m\leq r$, so the
equation $m+x=r$ has a solution $k$, and therefore $m+(k+1)=r+1$.
Thus the equation $m+x=r+1$ is soluble whenever $m\leq r+1$.
By
induction, for all $n$ in $\N$, if $m\leq n$, then~\eqref{eqn:m+x=n}
is soluble in $\N$. 
\end{proof}

\begin{theorem}\label{thm:N<}
  \begin{enumerate}
  \item\label{part:<+}
If $k<\ell$, then $k+m<\ell+m$.
\item\label{part:cancel}
If $k<\ell$ and $m\neq0$, then $km<\ell m$.
  \end{enumerate}
\end{theorem}

Here part~\ref{part:<+} is a refinement of Theorem~\ref{thm:cancel},
and part~\ref{part:cancel} yields the following analogue of
Theorem~\ref{thm:cancel} for multiplication.

\begin{corollary}
  If $km=\ell m$ and $m\neq0$, then $k=\ell$.
\end{corollary}

\begin{theorem}\label{thm:wo}
  $\N$ is \textbf{well ordered}\index{well ordered} by $<$: every
  nonempty set of natural 
  numbers has a least element.
\end{theorem}

\begin{proof}
  Suppose $A$ is a set of natural numbers with no least element.  Let
  $B$ be the set of natural numbers $n$ such that, if $m\leq n$, then
  $m\notin A$.  Then
  $0\in B$, by the last lemma, since otherwise $0$ would be the least
  element of $A$.  Suppose $m\in B$.  Then $m+1\in B$, since otherwise
  $m+1$ would be the least element of $A$.  By induction, $B=\N$, so
  $A=\emptyset$. 
\end{proof}

\section{A construction of the natural numbers}

The \textbf{Axiom of Infinity}\index{infinity} is that there is a set
that contains 
$\emptyset$ and is closed under the operation
$x\mapsto x'$, where
\begin{equation*}
  x'=x\cup\{x\}.
\end{equation*}
We assume this.
Then
the smallest such set is the intersection of the class of all such
sets.  This intersection is denoted by
\begin{equation*}
  \vnn.
\end{equation*}
Immediately, the iterative structure
 $(\vnn,\emptyset,{}')$ admits induction.

\begin{lemma}\label{lem:mem-inc}
  On $\vnn$, membership implies inclusion.
\end{lemma}

\begin{proof}
  By induction on $n$, we prove that, for all $k$ in $\vnn$, if $k\in
  n$, then $k\included n$.  The claim is vacuously true when
  $n=\emptyset$.  Suppose it is true when $n=m$.  If $k\in m'$, then
  either $k\in m$ or else $k=m$.  In the former case, by inductive
  hypothesis, $k\included m\included m'$; in the latter case,
  $k=m\included m'$.  Thus the claim is true when $n=m'$.  By
  induction, the claim is true for all $n$ in $\vnn$.
\end{proof}

\begin{lemma}\label{lem:p}
  In $\vnn$, if $k\pincluded n$, then $k'\included n$.
\end{lemma}

\begin{proof}
  The claim is vacuously true when $n=\emptyset$.  Suppose it is true
  when $n=m$.  Say $k\pincluded m'$.  If $k\included m$, then either
  $k\pincluded m$, in which case the inductive hypothesis implies,
  giving us $k'\included m\included m'$,---or else $k=m$, so that
  $k'=m'$.
If $k\not\included m$, then $m\in k$, so by Lemma~\ref{lem:mem-inc} we
have $m\included k\pincluded
m'=m\cup\{m\}$, and therefore $m=k$, so again $k'=m'$.  Thus the claim
is true when $n=m'$.  Therefore the claim holds for all $n$ in $\vnn$.
\end{proof}

\begin{lemma}\label{lem:<}
  Inclusion is a total ordering of $\vnn$.
\end{lemma}

\begin{proof}
  We have to show on $\vnn$ that, if $k\not\included n$, then
  $n\included k$.  The claim is trivially true when $n=\emptyset$.
  Suppose it is true when $n=m$.  If $k\not\included m'$, then
  $k\not\included m$, so $m\included k$, but $m\neq k$, so
  $m\pincluded k$, and therefore $m'\included k$ by Lemma~\ref{lem:p}.
\end{proof}

\begin{lemma}\label{lem:distinct}
Elements of $\vnn$ are distinct from their successors.  
\end{lemma}

\begin{proof}
We prove that no element of $\vnn$ has an element that is equal to its
successor.  This is trivially true for the empty set.  Suppose it is
true for $m$.  If $k\in m'$, then either $k\in m$, or else $k=m$.  In
the former case, by inductive hypothesis, $k\neq k'$.  In the latter
case, if $k=k'$, then $m=k\cup\{k\}$, and in particular $k\in m$,
contary to inductive hypothesis.  Therefore no element of $m'$ is
equal to its successor.  This completes the induction.  Since every
element of $\vnn$ is an element of its successor, which is in $\vnn$,
no element of $\vnn$ is equal to its successor.
\end{proof}

\begin{theorem}
The iterative structure $(\vnn,\emptyset,{}')$ satisfies the Peano
Axioms.
\end{theorem}

\begin{proof}
We have observed that
 $(\vnn,\emptyset,{}')$ admits
induction.  Easily too, $\emptyset$ is not a successor.  By
Lemma~\ref{lem:<}, if $m\neq n$, we may assume $m\pincluded n$.  By
Lemmas~\ref{lem:p} and~\ref{lem:distinct}, we then have $m'\included
n\pincluded n'$.  Thus succession is injective.
\end{proof}


The definition of $\N$ as $\vnn$ is due to von Neumann \cite{von-Neumann}.
Henceforth we write $0$ for $\emptyset$, then $1$ for $0'$, and $2$
for $1'$, and  so on.
 Thus
 \begin{gather*}
   0=\emptyset;\qquad
1=\{0\};\qquad
2=\{0,1\};\qquad
3=\{0,1,2\},\quad\dots
 \end{gather*}
If $n\in\vnn$, then
\begin{equation*}
  n=\{0,\dots,n-1\}.
\end{equation*}

\section{Structures}

For us, the point of using the von-Neumann definition is that, under
this definition, a natural number $n$ is a set with $n$ elements.
Since the set of functions from a set $B$ to a set $A$ can be
denoted by
\begin{equation*}
  A^B,
\end{equation*}
we have, in particular, that $A^n$ is the set of functions from
$\{0,\dots,n-1\}$ into $A$.  We can denote such a function by
$(x_0,\dots,x_{n-1})$; that is, 
\begin{equation*}
A^n=\{(x_0,\dots,x_{n-1})\colon x_i\in A\}.
\end{equation*}
Thus, $A^2$ can be identified with $A\times A$, and $A^1$ with $A$
itself.  There is exactly one function from $0$ to $A$, namely $0$; so
\begin{equation*}
  A^0=\{0\}=1.
\end{equation*}
An $n$-ary \textbf{relation}\index{relation} on $A$ is a subset of $A^n$;
an \textbf{$n$-ary}\index{n-ary@$n$-ary}
\textbf{operation}\index{operation} on $A$ is a function from $A^n$ to
$A$.  Relations and operations that are $2$-ary, $1$-ary, or $0$-ary
can be called 
\textbf{binary,}\index{binary} \textbf{singulary,}\index{singulary}
or \textbf{nullary,}\index{nullary} respectively; after the appropriate
identifications, this agrees with the terminology used in
\S~\ref{sect:f}. 
A nullary operation on $A$ can be identified with an element of $A$.  

Generalizing the terminology used at the beginning of \S~\ref{sect:N},
we define a \textbf{structure}\index{structure} as a set
together with some distinguished relations and operations on the set;
as before, the set is the \textbf{universe}\index{universe} of the
structure.  Again, if the 
universe is $A$, then
the whole structure might be denoted by $\str A$; if $B$, then $\str
B$.

The \textbf{signature}\index{signature} of a structure comprises a
symbol for each 
distinguished relation and operation of the structure.  For example,
the signature of an ordered field like $\R$ is $\{<,0,1,+,-,\cdot\}$.
If $s$ is a symbol of the signature of $\str A$, then the
corresponding relation or operation on $A$ can be denoted by $s^{\str A}$.

A \textbf{homomorphism}\index{homomorphism} from a structure $\str A$ to a structure $\str
B$ of the same signature is a function $h$ from $A$ to $B$ that
\emph{preserves} the distinguished relations and operations: this
means
\begin{gather}\notag
  h(f^{\str A}(x_0,\dots,x_{n-1}))=f^{\str
    B}(h(x_0),\dots,h(x_{n-1})),\\\label{eqn:hom}
(x_0,\dots,x_{n-1})\in R^{\str A}\implies(h(x_0),\dots,h(x_{n-1}))\in
  R^{\str B},
\end{gather}
for all $n$-ary operation-symbols $f$ and relation-symbols $R$ of the
signature, for all $n$ in $\vnn$.  A homomorphism is an
\textbf{embedding}\index{embedding} if it is injective and if the converse
of~\eqref{eqn:hom} also holds.  A surjective embedding is an
\textbf{isomorphism.}\index{isomorphism} 
A \textbf{substructure}\index{substructure} of $\str B$ is a structure $\str A$ of the
same signature such that $A\included B$ and the inclusion of $A$ in
$B$ is an embedding of $\str A$ in $\str B$.
 
%\newpage

\part{Groups}

\section{Groups}

Given a set $A$, we may refer to a bijection from $A$ to itself as a
\textbf{symmetry}\index{symmetry} or \textbf{permutation}\index{permutation} of $A$.  Let us denote the set
of these symmetries by
\begin{equation*}
  \Sym A.
\end{equation*}
This set is equipped with:
\begin{enumerate}\setcounter{enumi}{-1}
  \item
the element (or nullary operation\footnote{It is a nullary operation
  on $\Sym A$, but a singulary operation on $A$.}) $\id_{A}$ (the
  \textbf{identity}\index{identity} on $A$); 
\item
the singulary operation $f\mapsto f\inv$ (functional \textbf{inversion}\index{inversion});
\item
the binary operation $(f,g)\mapsto f\circ g$ (functional \textbf{composition}\index{composition}).
\end{enumerate}
The structure $(\Sym A,\id_A,{}\inv,\circ)$ is the \textbf{complete
  group of symmetries}\index{complete group of symmetries} of $A$; a
substructure of this can be called 
simply a 
\textbf{group of symmetries}\index{group of symmetries} of $A$.  
  
  In general, a \textbf{group}\index{group} is a structure that is isomorphic to a
  symmetry group.\footnote{This is not the usual definition, but it is
    equivalent, by Cayley's Theorem below.} That is,
  $(G,\gid,{}\inv,\cdot)$ is a group, provided
  that, for some set $A$, there is an injection $\phi$ from $G$ to $\Sym
  A$ such that 
  \begin{enumerate}
  \item
  $\phi(\gid)=\id_A$,
  \item
  $\phi(x\inv)=\phi(x)\inv$,
  \item
  $\phi(x\cdot y)=\phi(x)\circ\phi(y)$.
  \end{enumerate}
    
    \begin{theorem}\label{thm:sym=>gp}
    In every group, the following equations are identities:
\begin{gather}\label{def-1}
  x\gid=x=\gid{}x,\\\label{def-inv}
xx\inv=\gid{}=x\inv x,\\\label{def-ass}
(xy)z=x(yz).
\end{gather}    
    \end{theorem}
  
  \begin{proof}
  With $\phi$ as above, we have
  $\phi(x\gid{})=\phi(x)\circ\phi(\gid{})=\phi(x)\circ\id_A=\phi(x)$,
  so $x\gid{}=x$ since $\phi$ is injective.  The remaining identities
  are established likewise. 
  \end{proof}
  
Any element $a$ of a group determines a
singulary operation $\lambda_a$ on the group, given by
\begin{equation*}
  \lambda_a(x)=ax.
\end{equation*}

\begin{theorem}\label{thm:Cay}
The function $x\mapsto\lambda_x$ embeds a group in its symmetry group. 
\end{theorem}

\begin{proof}
Let $G$ be a group, and $a\in G$.
We have
\begin{equation*}
\lambda_{a\inv}(\lambda_a(x))=a\inv(ax)=(a\inv a)x=\gid{}x=x
\end{equation*} 
by Theorem~\ref{thm:sym=>gp},
so $\lambda_{a\inv}\circ\lambda_a=\id_G$.  Likewise
\begin{equation*}
\lambda_{a}(\lambda_{a\inv}(x))=a(a\inv x)=(aa\inv)x=\gid{}x=x,
\end{equation*} 
so $\lambda_a\circ\lambda_{a\inv}=\id_G$.  Thus $\lambda_a$ is invertible and therefore belongs to $\Sym G$, and
\begin{equation*}
\lambda_{a}{}\inv=\lambda_{a\inv}.
\end{equation*}
We have also
\begin{equation*}
\lambda_{\gid}(x)={\gid}x=x=\id_G(x),
\end{equation*}
so 
\begin{equation*}
\lambda_{\gid}=\id_G,
\end{equation*} 
and
\begin{equation*}
\lambda_{ab}(x)=(ab)x=a(bx)=\lambda_a(\lambda_b)(x)=(\lambda_a\circ\lambda_b)(x),
\end{equation*} 
so
\begin{equation*}
\lambda_{ab}=\lambda_a\circ\lambda_b.
\end{equation*}
Finally, if $\lambda_a=\lambda_b$, then
\begin{gather*}
ax=bx,\\
(ax)x\inv=(bx)x\inv,\\
a(xx\inv)=b(xx\inv),\\
a{\gid}=b{\gid},\\
a=b.\qedhere
\end{gather*}
%So $x\mapsto\lambda_x$ is as claimed.
\end{proof}

The following is known as \textbf{Cayley's Theorem.}\index{Cayley's Theorem}
\index{Cayley's Theorem}
  \index{theorem!Cayley's Th---}

\begin{porism}
The converse of Theorem~\ref{thm:sym=>gp} holds.
\end{porism}

The binary operation of a group is often referred to as
\textbf{multiplication;} singulary, \textbf{inversion;} nullary, the
\textbf{identity}\index{identity} or the \textbf{neutral
  element.}\index{neutral}  The identity is 
sometimes denoted by $1$ rather than $\gid$. 

\section{Simplifications}\label{sect:simp}

A \textbf{monoid}\index{monoid} is a structure $(G,{\gid},\cdot)$
satisfying~\eqref{def-1} and~\eqref{def-ass} above; a
\textbf{semigroup}\index{semigroup} is a structure $(G,\cdot)$
satisfying~\eqref{def-ass}. 
Given a set $A$, let us denote by
\begin{equation*}
\so A
\end{equation*}
the set of functions from $A$ to itself (that is, the set of singulary
operations on $A$).  Then $(\so A,\id_A,\circ)$ is a monoid.  If
$(G,\gid,\cdot)$ is a monoid, then by the proof of
Theorem~\ref{thm:Cay}, $x\mapsto\lambda_x$ is a homomorphism from
$(G,\gid,\cdot)$ to $(\so G,\id_G,\circ)$; however, it might not be an
embedding.

The following will be used in Theorem~\ref{thm:solutions}. 

\begin{theorem}\label{thm:left}
Any structure that satisfies
\begin{gather*}
	{\gid}x=x,\\
	x\inv x=\gid,\\
	x(yz)=(xy)z
\end{gather*}
is a group.
In other words, 
  any semigroup with a left-identity and with left-inverses is a group.  
\end{theorem}

\begin{proof}
Using the given identies, we have
\begin{equation*}
(xx\inv)(xx\inv)=x(x\inv x)x\inv=x{\gid}x\inv=xx\inv,
\end{equation*}
and so
\begin{equation*}
xx\inv={\gid}xx\inv=(xx\inv)\inv(xx\inv)(xx\inv)=(xx\inv)\inv(xx\inv)={\gid}.
\end{equation*}
Hence also
\begin{equation*}
x{\gid}=x(x\inv x)=(xx\inv)x={\gid}x=x.\qedhere
\end{equation*}
\end{proof}

%The lemma has an obvious dual.

A semigroup \textbf{expands}\index{expands} to a group if it can be given an identity and an inversion so as to become a group (while the underlying set remains the same).

\begin{theorem}\label{thm:solutions}
Let $G$ be a nonempty semigroup.  The following are equivalent.
\begin{enumerate}
\item 
$G$ expands to a group.
\item
$G$ expands uniquely to a group.
\item
Each equation $ax=b$ and $ya=b$ with coefficients from $G$ has a
solution in $G$.
\item
Each equation $ax=b$ and $ya=b$ with coefficients from $G$ has a
unique solution in $G$.
\end{enumerate}
\end{theorem}

\begin{proof}
In a group, the equation $b=ax$ implies $a\inv b=a\inv(ax)$, and
  \begin{equation*}
a\inv(ax)
=(a\inv a)x
={\gid}x
=x;
  \end{equation*}
  so the equation has at most one solution.  It has at least one solution, since indeed $a(a\inv b)=(aa\inv)b={\gid}b=b$.
Likewise for the equation $b=ya$. 

Conversely, suppose $G$ is a nonempty semigroup in which all of the
given equations have solutions.  If $c\in G$, let $\gid$ be
a solution to $yc=c$.  If $b\in G$, let $d$ be a
solution to 
$cx=b$.  Then
\begin{equation*}
  {\gid}b={\gid}(cd)=({\gid}c)d=cd=b.
\end{equation*}
Also the equation $yc={\gid}$ has a solution: call it $c\inv$.  Now use
Theorem~\ref{thm:left}.
\end{proof}

By the theorem, we can characterize
groups as those semigroups that satisfy the axiom
\begin{equation*}
  \Forall x\Forall y\Exists z\Exists w(xz=y\land wx=y).
\end{equation*}
More is true:

\begin{theorem}
  A map from one group to another is a homomorphism, provided it is a
  homomorphism of semigroups.  
\end{theorem}

\begin{proof}
In a group, if $a$ is an element, then the identity is the unique
solution of $xa=a$, and $a\inv$ is the unique solution of $yaa=a$.  A
semigroup homomorphism $\phi$, where $\phi(a)=b$, takes solutions of
these equations to solutions of $xb=b$ and $ybb=b$. 
\end{proof}

\section{The integers}

A group or monoid or semigroup is \textbf{abelian}\index{abelian} if it satisfies the identity
\begin{equation*}
  xy=yx.
\end{equation*}
Multiplication on an abelian group is often (though not always) called \textbf{addition}\index{addition} and denoted by $+$; in this case, the identity may be denoted by $0$.

Let
\begin{equation*}
\N^+=\vnn\setminus\{0\}.
\end{equation*}

\begin{theorem}
$(\vnn,1,\cdot)$ and $(\N^+,1,\cdot)$ are abelian monoids.
\end{theorem}

\begin{proof}
The claim follows from the definition of addition on $\vnn$ and from
Theorem~\ref{thm:mult-comm}.
\end{proof}

If an abelian semigroup $(G,+)$ also has a total ordering such that
\begin{equation*}
x<y\implies x+z<y+z,
\end{equation*}
then $(G,+,<)$ is an \textbf{ordered abelian semigroup.}\index{ordered
  abelian semigroup}

\begin{theorem}
$(\N^+,+,<)$ is an ordered abelian semigroup satisfying
\begin{equation}\label{eqn:oas}
	x<y\iff \Exists zx+z=y.
\end{equation}
\end{theorem}

\begin{proof}
By Theorems~\ref{thm:N-comm} and~\ref{thm:N<} and the definition of $+$ on $\vnn$, $(\vnn,0,+,<)$ is an ordered abelian monoid.  Also $\N^+$ is closed under addition, since the successors in $\vnn$ are precisely the elements of $\N^+$, and $n+(m+1)=(n+m)+1$.  Finally, \eqref{eqn:oas} is by Theorem~\ref{thm:m+x=n}. 
\end{proof}


\begin{theorem}
Suppose $(S,+,<)$ is an ordered abelian semigroup in which~\eqref{eqn:oas} always holds.  Let $-S$ be a set disjoint from $S$ such that there is a bijection $x\mapsto-x$ from $S$ to $-S$, and let $0\notin S\cup-S$.  Then the set $S\cup\{0\}\cup-S$ can be made uniquely into an ordered abelian group that, considered as an ordered semigroup, has $S$ as a substructure.
\end{theorem}

\begin{proof}
Follow the definition of $\Z$ given in school.
\end{proof}

We now have the ordered abelian group $(\Z,0,-,+,<)$.  We also have:

\begin{theorem}
$(\Z,1,\cdot)$ is an abelian monoid, and on $\Z$, multiplication distributes over addition.
\end{theorem}

\begin{proof}
Again, define multiplication on $\Z$ as in school; then use~\ref{thm:mult-comm}.
\end{proof}


\section{Repeated multiplication}\label{sect:repeat}

Suppose on a set $A$ there is a binary operation $\cdot$ or
$(x,y)\mapsto xy$.  For each $n$ in $\N^+$, there is a set $P_n$ of
$n$-ary operations on $A$.  The definition is recursive: 
\begin{enumerate}
	\item 
	$P_1=\{\id_A\}$;
	\item
	$P_{n+1}$ consists of the operations
	\begin{equation*}
(x_0,\dots,x_n)\mapsto f(x_0,\dots,x_{k-1})\cdot g(x_k,\dots,x_n),
\end{equation*}
where $f\in P_k$ and $g\in P_{n+1-k}$, where $1\leq k\leq n$.
\end{enumerate}
Each $P_n$ has a particular element $f_n$, where
\begin{enumerate}
	\item 
	$f_1=\id_A$,
	\item
	$f_{n+1}$ is $(x_0,\dots,x_n)\mapsto f_n(x_0,\dots,x_{n-1})\cdot x_n$.
\end{enumerate}
So 
\begin{equation*}
f_n(x_0,\dots,x_{n-1})=(\dotsm(x_0x_1)x_2\dotsm x_{n-1}).  
\end{equation*}
But
$P_5$, for example, also contains $(x,y,z,u,v)\mapsto(x(yz))(uv)$.  In
a semigroup, it is easy to show that this operation is the same as
$f_5$.  In general, we have: 

\begin{theorem}
If $A$ is a semigroup, then $P_n=\{f_n\}$.
\end{theorem}

\begin{proof}
The claim is immediately true when $n=1$.  Suppose it is true when $1\leq n\leq s$.  Each element $g$ of $P_{n+1}$ is therefore
\begin{equation*}
(x_0,\dots,x_s)\mapsto f_n(x_0,\dots,x_{n-1})\cdot f_{s+1-n}(x_n,\dots x_s)
\end{equation*}
for some $n$, where $1\leq n\leq s$.  If $n=s$, then $g=f_{n+1}$.  If $n<s$, then
\begin{align*}
g(x_0,\dots,x_s)
&=f_n(x_0,\dots x_{n-1}\cdot(f_{s-n}(x_n,\dots,x_{s-1})\cdot x_s)\\
&=(f_n(x_0,\dots x_{n-1}\cdot f_{s-n}(x_n,\dots,x_{s-1}))\cdot x_s\\
&=f_s(x_0,\dots,x_{s-1})\cdot x_s\\
&=f_{s+1}(x_0,\dots x_s),
\end{align*}
so again $g=f_{s+1}$.  By induction, the claim is true for all $n$ in $\N^+$. 
\end{proof}

It follows that, in a semigroup, the product $a_0\dotsm a_{n-1}$ is
unambiguous: it is just $g(a_0,\dots,a_{n-1})$ for any element $g$ of
$P_n$.  We may write also
\begin{equation*}
a_0\dotsm a_{n-1}=\prod_{k=0}^{n-1}a_k=\prod_{k\in n}a_k.
\end{equation*}
In an \emph{abelian} group, the product may be written as a sum:
\begin{equation*}
a_0+\cdots+a_{n-1}=\sum_{k=0}^{n-1}a_k=\sum_{k\in n}a_k.
\end{equation*}
We also use the notation
\begin{align*}
	\prod_{k\in n}a&=a^n,&
	\sum_{k\in n}a&=na.						
\end{align*}

\begin{theorem}
Suppose $(G,\cdot)$ is a semigroup, and $m$ and $n$ range over $\N^+$.
\begin{enumerate}
\item 
On $G$,
\begin{equation*}
x^{m+n}=x^mx^n.
\end{equation*}
That is, if $a\in G$, then
$x\mapsto a^x$ is a homomorphism from $(\N^+,+)$ to $(G,\cdot)$.
\item
On $G$,
\begin{equation*}
x^{mn}=(x^m)^n;
\end{equation*}
that is,
$x\mapsto(y\mapsto y^x)$ is a homomorphism from $(\N^+,1,\cdot)$ to
$(\so G,\id_A,\circ)$. 
\end{enumerate}
\end{theorem}

\begin{proof}
Use induction: $a^{n+1}=a^n\cdot a=a^n\cdot a^1$, and if
$a^{n+m}=a^n\cdot a^m$, then 
\begin{equation*}
a^{n+(m+1)}=a^{(n+m)+1}=a^{n+m}\cdot a=a^na^ma=a^na^{m+1}.
\end{equation*}
Also, $a^{n\cdot 1}=a^n=(a^n)^1$, and if $a^{nm}=(a^n)^m$, then
\begin{equation*}
  a^{n(m+1)}=a^{nm+n}=a^{nm}a^n=(a^n)^ma^n=(a^n)^{m+1}.\qedhere
\end{equation*}
\end{proof}

In a monoid, we define
\begin{equation}\label{eqn:a^0}
a^0=e.
\end{equation}
The set $\so G$ in the following was defined in \S~\ref{sect:simp}.

\begin{theorem}
Suppose $(G,\gid,\cdot)$ is a monoid.
\begin{enumerate}
\item 
If $a\in G$, then
$x\mapsto a^x$ is a homomorphism from $(\vnn,0,+)$ to $(G,\gid,\cdot)$.
\item
$x\mapsto(y\mapsto y^x)$ is a homomorphism from $(\vnn,1,\cdot)$ to
$(\so G,\id_A,\circ)$. 
\end{enumerate}
\end{theorem}

In a group, we define
\begin{equation*}
  a^{-n}=(a^n)\inv.
\end{equation*}

\begin{theorem}\label{thm:exp-in-groups}
Suppose $(G,\gid,{}\inv,\cdot)$ is a group.
\begin{enumerate}
\item 
If $a\in G$, then
$x\mapsto a^x$ is a homomorphism from $(\Z,0,+)$ to $(G,\gid,\inv,\cdot)$.
\item
$x\mapsto(y\mapsto y^x)$ is a homomorphism from $(\Z,1,\cdot)$ to
$(\so G,\id_A,\circ)$. 
\end{enumerate}
\end{theorem}

\begin{proof}
  \dots
\end{proof}


\section{Rings}\label{sect:rings}

A homomorphism from a structure to itself is an
\textbf{endomorphism.}\index{endomorphism} 
The set of endomorphisms of an abelian group can be made into an
abelian group in which: 
\begin{enumerate}
\item 
the identity is the constant function $x\mapsto\gid$;
\item
additive inversion converts $f$ to $x\mapsto-f(x)$;
\item
addition converts $(f,g)$ to $x\mapsto f(x)+g(x)$.
\end{enumerate}
If $E$ is an abelian group, let the abelian group of its endomorphisms
be denoted by
\begin{equation*}
  \End E.
\end{equation*}
The set of endomorphisms of $E$ can also be made into
a monoid in which 
the identity is the identity function $x\mapsto x$, and multiplication
is functional composition.
This multiplication
distributes in both senses over addition: 
\begin{align*}
  f(g+h)&=fg+fh,& (f+g)h&=fh+gh.
\end{align*}
We may denote the two combined structures---abelian group and
monoid---by
\begin{equation*}
  (\End E,\circ);
\end{equation*}
this is the \textbf{complete ring of
  endomorphisms}\index{complete ring of endomorphisms} of $E$.  
A substructure of $(\End E,\circ)$ can be called
simply a \textbf{ring of endomorphisms}\index{ring of endomorphisms} of $E$.  

A \textbf{ring}\index{ring} is an abelian group $E$ with a multiplication $\cdot$
such that $(E,\cdot)$
is isomorphic to an endomorphism ring.\footnote{Some writers
  do not 
require a ring as such to have a multiplicative identity.}
  In an arbitrary
ring, the additive identity is usually denoted by~$0$; the
multiplicative, by $1$.

As with a group, so with a ring: an element $a$ determines a singulary
operation $\lambda_a$ on the ring, given by
\begin{equation*}
  \lambda_a(x)=ax.
\end{equation*}

\begin{theorem}\label{thm:x-lambda_x}
  The function $x\mapsto\lambda_x$ embeds a ring in the endomorphism
  ring of its underlying abelian group.
\end{theorem}

\begin{porism}
  A structure is a ring if it has:
\begin{enumerate}
  \item
an addition that makes it an abelian group, and
\item
a multiplication that makes it a monoid,
\end{enumerate}
such that multiplication distributes in both senses over addition.
\end{porism}

If, in a ring, the multiplication commutes---
\begin{equation*}
  xy=yx
\end{equation*}
---then the ring is a \textbf{commutative ring.}\index{commutative ring}

\begin{theorem}\label{thm:Z}
  $\Z$ is a commutative ring.
\end{theorem}

In a ring, an element with both a left and a right inverse can be
called simply \textbf{invertible;}\index{invertible} it is also called
a \textbf{unit.}\index{unit}  

\begin{theorem}\label{thm:units}
In a ring, the units compose a group with respect to
multiplication.  In particular, a unit has a unique
left inverse, which is also a right inverse.
\end{theorem}

The group of units of a ring $R$ is denoted by
\begin{equation*}
  \unit R.
\end{equation*}
For example, $\unit{\Z}=\{1,-1\}$.  Evidently all two-element groups
are isomorphic to this one.

If $R$ is commutative, and
$\unit R=R\setminus\{0\}$, then $R$ is a \textbf{field.}\index{field}  From $\Z$
can be constructed the field $\Q$ of rational numbers; from this can
be constructed the field $\R$ of real numbers and then the field $\C$
of complex numbers.  An example of a ring in which some elements have
right but not left inverses will be given in \S~\ref{sect:prod-sum}.


\section{General linear groups}

Given a commutative ring $R$ and an element $n$ of $\vnn$, we define
\begin{equation*}
\MatR
\end{equation*}
as the set of functions from $n\times n$ into $R$.  A typical such
function can be written as a \textbf{matrix}\index{matrix}
\begin{equation*}
  \begin{pmatrix}
    a^0_0&\cdots&a^0_{n-1}\\
\vdots&\ddots&\vdots\\
a^{n-1}_0&\cdots&a^{n-1}_{n-1}
  \end{pmatrix},
\end{equation*}
or as
\begin{equation*}
(a^i_j)^{i< n}_{j< n},
\end{equation*}
or simply as $(a^i_j)^{i}_{j}$ if the set over which $i$ and $j$ range
is clear.
Addition on $\MatR$ is defined by
\begin{equation*}
  (a^i_j)^{i<n}_{j<n}+(b^i_j)^{i<n}_{j<n}
= (a^i_j+b^i_j)^{i<n}_{j<n}.
\end{equation*}
Multiplication on $\MatR$ is defined by
\begin{equation*}
(a^i_j)^{i<n}_{j<n}(b^j_k)^{j<n}_{k<n}=(\sum_{j\in n}a^i_jb^j_k)^{i<n}_{k<n}.
\end{equation*}
One particular element of $\MatR$ is $(\delta^i_j)^{i<n}_{j<n}$, where
\begin{equation*}
\delta^i_j=\begin{cases}
	1,&\text{ if } i=j,\\
	0,&\text{ otherwise.}
\end{cases}
\end{equation*}

\begin{theorem}\label{thm:M}
If $R$ is a commutative ring, then
  $\MatR$ is a ring with multiplicative identity $(\delta^i_j)^{i<n}_{j<n}$.
\end{theorem}

The group $\unit{\MatR}$ is called the \textbf{general linear
  group}\index{general linear group}
of degree $n$ over $R$; it is also denoted by
\begin{equation*}
\GLR.
\end{equation*}

We shall characterize the elements of this group in
\S~\ref{sect:det}.  Meanwhile, since
\begin{equation*}
  \begin{pmatrix}
    a&b\\c&d
  \end{pmatrix}
  \begin{pmatrix}
    d&-b\\-c&a
  \end{pmatrix}
=\begin{pmatrix}
  ad-bc&0\\0&ad-bc
\end{pmatrix}
=
  \begin{pmatrix}
    d&-b\\-c&a
  \end{pmatrix}
  \begin{pmatrix}
    a&b\\c&d
  \end{pmatrix},
\end{equation*}
we may observe that the element 
$\begin{pmatrix}
 a&b\\c&d 
\end{pmatrix}$ 
of $\MatR$ is invertible if $ad-bc\in\unit R$.


\section{New groups from old}\label{sect:new}

If $G$ and $H$ are two groups, then we can define a
multiplication on $G\times H$ termwise:
\begin{equation*}
(g_0,h_0)(g_1,h_1)=(g_0g_1,h_0h_1)
\end{equation*}
(that is, $(g_0\cdot^Gg_1,h_0\cdot^Hh_1)$).
The result is a group called the \textbf{direct product}\index{direct product} of
  $G$ and $H$ and also denoted by
\begin{equation*}
   G\times H.
\end{equation*}
If $G$ and $H$ are abelian, then their direct product is called a
\textbf{direct sum}\index{direct sum} and is denoted by
\begin{equation*}
G\oplus H.
\end{equation*}

Suppose $\sim$ is an equivalence-relation
on a set $G$, so that it partitions $G$ into equivalence-classes
\begin{equation*}
  \{x\in G\setcolon x\sim a\};
\end{equation*}
such classes can be denoted by $\eqc a$ or $\overline a$.  
The \textbf{quotient}\index{quotient} of $G$ by $\sim$, denoted by 
\begin{equation*}
G\modsim, 
\end{equation*}
is the set of equivalence-classes with respect to $\sim$.
Immediately, if $G$ is a semigroup, and $\sim$ is such that
\begin{equation*}
  a\sim a'\land b\sim b'\implies ab\sim a'b',
\end{equation*}
then $G\modsim$ is a semigroup in which multiplication is given by
\begin{equation*}
  \eqc a\eqc b=\eqc{ab}.
\end{equation*}
In this case, $\sim$ is called a
\textbf{congruence-relation}\index{congruenc-relation} with respect to
the multiplication. 

\begin{theorem}
  If $G$ is a group, and $\sim$ is a congruence-relation on $G$, then
  $G\modsim$ is a group.
\end{theorem}

If $n\in\vnn$, recall that two integers $a$ and $b$ are
\textbf{congruent \emph{modulo} $n$} if $n\divides b-a$; in this case
one writes
\begin{equation*}
  a\equiv b\pmod n.
\end{equation*}

\begin{theorem}\label{thm:Z-mod-n}
  If $n\in\N^*$, then congruence \emph{modulo} $n$ is a
  congruence-relation on $\Z$ with respect to addition and
  multiplication, and the quotient is a commutative ring.  If $n$ is
  prime, then this ring is a field.
\end{theorem}

The commutative ring in the theorem can be denoted by
\begin{equation*}
  \Zmod n,
\end{equation*}
though sometimes we may mean to denote the additive group.
Note that $\Zmod0$ is isomorphic to $\Z$.
The direct sum $\Zmod 2\oplus\Zmod 2$ is the \textbf{Klein four
  group,}\index{Klein four group} denoted by 
\begin{equation*}
\Kfg
\end{equation*}
(for `Vierergruppe').  This is the smallest group containing two elements
neither of which is a power of the other.

  A congruence-relation on $\R$ with respect to addition can be
  defined by
  \begin{equation*}
  a\sim b\iff a-b\in\Z.
  \end{equation*}
Then the function
      $a\mapsto \exp(2\pi\mi a)$ is an embedding of $\R\modsim$ in
$\unit{\C}$. 

A \textbf{subgroup}\index{subgroup} of a group is a subset containing the identity that is
closed under multiplication and inversion.  Every group has both
itself and $\{\gid\}$ as subgroups.  Also $G\times\{\gid\}$ and
$\{\gid\}\times H$ are subgroups of $G\times H$, while $G\times G$ has
the subgroup $\{(x,x)\colon x\in G\}$.

\begin{theorem}\label{thm:subgp}
  A subset of a group is a subgroup if and only if it is non-empty and
  closed under the binary operation $(x,y)\mapsto xy\inv$.
\end{theorem}

If $ H$ is a subgroup of $G$, we write\footnote{One might write
  $ H\leqslant G$, if one wants to reserve $H<G$ for the case where
  $H$ is a \emph{proper} subgroup of $G$.} 
\begin{equation*}
H\subgp  G.  
\end{equation*}

\begin{theorem}
  If $\sim$ is a congruence-relation on $G$, then the
  $\sim$-class of $\gid$ is a
subgroup of~$G$.
\end{theorem}

It is important to note that the converse of
the lemma is false in
general: not every subgroup of a group determines a
congruence-relation.  (see Theorem~\ref{thm:n}.)

If $f$ is a homomorphism from $G$ to $H$, then its \textbf{kernel}\index{kernel} is the
set
\begin{equation*}
  \{x\in G\setcolon f(x)=\gid\},
\end{equation*}
denoted by $\ker f$.  The \textbf{image}\index{image} of $f$ is
\begin{equation*}
  \{y\in H\setcolon y=f(x)\text{ for some $x$ in }G\},
\end{equation*}
denoted by $\im f$.

A homomorphism is called:
a \textbf{monomorphism,}\index{monomorphism} if it is injective;
an \textbf{epimorphism,}\index{epimorphism} if it is surjective.

\begin{theorem}\label{thm:ker-im}
  Let $f$ be a homomorphism from $G$ to $H$.
  \begin{enumerate}
    \item
$\ker f\subgp G$.
\item
$f$ is a monomorphism $\iff \ker f=\{\gid\}$.
\item
$\im f\subgp H$.
  \end{enumerate}
\end{theorem}

There is a monomorphism
 from $\R\oplus\R$ into $\Mat[2]{\R}$, namely
 \begin{equation*}
(x,y)\mapsto
\begin{pmatrix}
  x&y\\-y&x
\end{pmatrix}.
 \end{equation*}
One can define $\C$ to be the image of this monomorphism.  One shows
that $\C$ then is a sub-ring of $\Mat{\R}$ and is a field.  The
elements of $\C$ usually denoted by $1$ and $\mi$ are given by
\begin{align*}
1
&=  
\begin{pmatrix}
    1&0\\0&1
  \end{pmatrix},&
  \mi
&=
  \begin{pmatrix}
    0&1\\-1&0
  \end{pmatrix}.
\end{align*}
Then every element of $\C$ is $x+y\mi$ for some unique $x$ and $y$ in
$\R$.  The function $z\mapsto\bar z$ is an automorphism of $\C$, where
\begin{equation*}
  \overline{x+y\mi}=x-y\mi.
\end{equation*}
There is then a monomorphism from $\C\oplus\C$ into $\Mat[2]{\C}$,
namely
 \begin{equation*}
(x,y)\mapsto
\begin{pmatrix}
  x&y\\-\bar y&\bar x
\end{pmatrix};
 \end{equation*}
its image is denoted by
\begin{equation*}
  \Ham
\end{equation*}
in honor of its discoverer Hamilton: it
consists of the \textbf{quaternions.}\index{quaternion}  One shows
that $\Ham$ is a 
sub-ring of $\GL[2]{\C}$ and that all non-zero elements of $\Ham$ are
invertible, although $\Ham$ is not commutative.
The
element of $\Ham$ usually denoted by $\mj$ is given by
\begin{equation*}
  \mj=
  \begin{pmatrix}
    0&1\\-1&0
  \end{pmatrix}.
\end{equation*}

\begin{theorem}
  An arbitrary intersection of subgroups is a subgroup.
\end{theorem}

Given a subset $A$ of (the universe of) a group $G$, we can `close'
under the three group-operations, obtaining a subgroup, $\gpgen A$.
For a formal definition, we let
\begin{equation*}
  \gpgen A=\bigcap\family S,
\end{equation*}
where $\family S$ is the set of all subgroups of $G$ that
include $A$.  Note that $\gpgen{\emptyset}=\{\gid\}$.

If $G=\gpgen A$, then $G$ is
\textbf{generated}\index{generated} by $A$.  If
$A=\{a_0,\dotsc,a_{n-1}\}$, we may write
\begin{equation*}
  \gpgen{a_0,\dotsc,a_{n-1}}
\end{equation*}
for $\gpgen A$, and say that $G$ has the $n$ \textbf{generators}\index{generators}
$a_0$, \dots, $a_{n-1}$.  In particular, $G$ is \textbf{finitely
  generated}\index{finitely generated} 
in this case.  The subgroup $\gpgen{\mi,\mj}$ of $\Ham$
is the \textbf{quaternion group,}\index{quaternion group} denoted by
\begin{equation*}
\quat;
\end{equation*}
it has eight elements: $\pm1$, $\pm\mi$, $\pm\mj$, and $\pm\mk$, where
$\mk=\mi\mj$.

\section{Cyclic groups}

The \textbf{order}\index{order!--- of a group} of a group is its
size (or cardinality).  The order 
of $G$ is therefore denoted by
\begin{equation*}
  \order G.
\end{equation*}
A group is called \textbf{cyclic}\index{cyclic
  group}\index{group!cyclic ---} if generated by a single element.  If
$a$ is
an element of a group $G$, then $\gpgen a$ is a cyclic subgroup of
$G$, and the \textbf{order}\index{order!--- of an element} of
  $a$, denoted by
\begin{equation*}
  \order a,
\end{equation*}
is just the order of $\gpgen a$.

\begin{theorem}\label{thm:cyc}
  If $a$ is an element of a group $G$, then
  \begin{equation*}
    \gpgen a=\im(n\mapsto a^n).
%\{x\in G\setcolon x=a^n\text{ for some $n$ in }\Z\}.
  \end{equation*}
\end{theorem}

\begin{proof}
Let $f$ be the homomorphism $n\mapsto a^n$ from $\Z$ to $G$.  We have
to show $\gpgen a=\im f$.  Since $\gpgen a$ is a group, we know that
$a^0\in\gpgen a$.  If
$a^n\in\gpgen a$, then $a^{n+1}\in\gpgen a$ and $a^{-n}\in\gpgen a$.  Hence,
by induction, $\im f\included\gpgen a$.  Since $a\in\im f$, we have
$\gpgen a\included\im f$ by definition of $\gpgen a$.
\end{proof}

\begin{theorem}
  If $a$ is a group-element of finite order, then $a^{\order a}=\gid$.
\end{theorem}

\begin{proof}
The subset $\{\gid,a,a^2,\dots,a^{\order a}\}$ of $\gpgen a$ has size at
most $\order a$.
Hence we have $0\leq i<j\leq \order a$ but $a^i=a^j$ for some $i$ and $j$.
Therefore $\gid=a^{j-i}$, and $a^k=a^n$ as long as $k\equiv
n\pmod{j-i}$.  This means $\order a\leq j-i$ and hence $\order a=j-i$.
\end{proof}

\begin{theorem}\label{thm:Z-subg}
  All subgroups of $\Z$ are cyclic.  All nontrivial subgroups of $\Z$
  are isomorphic.
\end{theorem}

\begin{proof}
Say $G\subgp \Z$ and $G\neq\gpgen 0$.  
Let $m$ be the least positive element of $G$.  If $n\in G$, then
$n=km+r$, where $0\leq r<m$; but $r\in G$, so $r=0$.  Thus $\gpgen
m<G<\gpgen m$.
  The map $x\mapsto mx$ from $\Z$ to $G$ is an
epimorphism, by Theorem~\ref{thm:cyc}; but its kernel is trivial; so it is an
isomorphism, by Theorem~\ref{thm:ker-im}. 
\end{proof}

\begin{theorem}
  Every cyclic group is isomorphic to some $\Zmod n$.
\end{theorem}

\begin{proof}
  Say $G=\gpgen a$.  By Theorem~\ref{thm:Z-subg}, the epimorphism
  $x\mapsto a^x$ from $\Z$ to $G$ has kernel $\gpgen n$ for some $n$;
  therefore  
  \begin{equation*}
    a^r=a^s\iff a^{r-s}=\gid \iff r-s\in\gpgen m\iff m\divides r-s.
  \end{equation*}
Hence the map $x\mapsto a^x$ is well-defined on $\Zmod n$ and has
trivial kernel.
\end{proof}

\section{Cosets}

Suppose $H\subgp G$.  If $a\in G$, let
\begin{gather*}
  aH=\lambda_a\setimb H,\\
Ha=\rho_a\setimb H.
\end{gather*}
Each of the sets $aH$ is a \textbf{left coset}\index{left!---
  coset}\index{coset} 
of $H$, and the set of these 
is denoted by 
\begin{equation*}
  G/H.
\end{equation*}
Each of the sets $Ha$ is a \textbf{right coset}\index{right!--- coset} of $H$, and the set of these
is denoted by 
\begin{equation*}
  H\backslash G.
\end{equation*}

\begin{theorem}\label{thm:cosets}
The left cosets of $H$ in $G$ are the classes determined by an
equivalence-relation on $G$.  Likewise for the right cosets.  All
cosets of $H$ have the same size; also, $G/H$ and
  $H\backslash G$ have the same size.
\end{theorem}

\begin{proof}
  We have $a\in aH$.  All cosets of $H$ have the same size as $H$, since
  the maps $\lambda_a$ and $\rho_a$ are bijections by Cayley's Theorem.  If $aH\cap bH\neq\emptyset$, then $ah\in bH$ for
  some $h$ in $H$, so $a\in bHH\inv\included bH$, whence $aH\included bH$, so
  $aH=bH$.  Hence the left cosets compose a partition of $G$, and
  therefore determine an equivalence-relation.  Inversion is a
  permutation of $G$ taking $aH$ to $Ha\inv$, so $G/H$ and
  $H\backslash G$ have the same size.
\end{proof}

The size of $G/H$ (or $H\backslash G$) is the \textbf{index}\index{index} of $H$ in
  $G$ and can be denoted by
\begin{equation*}
  [G:H].
\end{equation*}

\begin{theorem}\label{thm:KHG}
  If $K\subgp H\subgp G$, then $[G:K]=[G:H][H:K]$.
\end{theorem}

\begin{proof}
  The partition of $H$ into left cosets of $K$ is transformed, under
  each $X\mapsto\lambda_a[X]$, into a partition of a coset of $H$.
  Indeed, if $bK\cap aH\neq\emptyset$, then as in the proof of
  Theorem~\ref{thm:cosets}, $bK\included aH$.
\end{proof}

\begin{theorem}
  If $H$ and $K$ are finite subgroups of $G$, then
  \begin{equation*}
    \order{HK}=\frac{\order H\order K}{\order{H\cap K}}.
  \end{equation*}
\end{theorem}

\begin{proof}
  Partition $H$ as $a_1(H\cap K)\cup\dotsb\cup a_n(H\cap K)$.  Then
  $\order H=n\order{H\cap K}$.  Also
  \begin{equation*}
    a_1K\cup\dotsb\cup a_nK=HK.
  \end{equation*}
This union is disjoint, since if $x=a_ik_i=a_jk_j$, where $k_i$ and
$k_j$ are in $K$,
then $a_j{}\inv a_i\in H\cap K$, and hence $a_i(H\cap K)=a_j(H\cap
K)$, so that $a_i=a_j$.  Therefore $\order{HK}=n\order K$.
\end{proof}

\begin{theorem}\label{thm:HHK}
  Suppose $H$ and $K$ are subgroups of $G$, and $[G:K]$ is finite.
  Then
  \begin{equation*}
    [H:H\cap K]\leq[G:K],
  \end{equation*}
with equality if and only if $G=HK$.
\end{theorem}

\begin{proof}
  As in the previous proof, the function
  $x(H\cap K)\mapsto xK$ from $H/H\cap K$ to $G/K$ is injective; it is
  surjective if and only if $G=HK$.
\end{proof}

\begin{theorem}
  If $[G:H]$ and $[G:K]$ are finite, then
  \begin{equation*}
    [G:H\cap K]\leq[G:H][G:K],
  \end{equation*}
with equality if and only if $G=HK$.
\end{theorem}

\begin{proof}
  By Theorems~\ref{thm:KHG} and~\ref{thm:HHK}, $[G:H\cap
    K]=[G:H][H:H\cap K]\leq [G:H][G:K]$, again with equality if and
  only if $G=HK$.
\end{proof}

\section{Lagrange's Theorem}


\begin{theorem}[Lagrange]\label{thm:Lagrange}\index{Lagrange's
    Theorem}\index{theorem!Lagrange's Th---}
$\order H$ divides $\order G$ if both are finite.
\end{theorem}

\begin{proof}
Use Theorem~\ref{thm:KHG} when $K=\gpgen{\gid}$.
\end{proof}

\begin{corollary}
  Groups of prime order are cyclic.
\end{corollary}

\begin{proof}
  Say $\order G=p$.  There is $a$ in $G\setminus\gpgen{\gid}$, so $\order
  a>1$; but $\order a\divides p$, so $\order a=p$, that is, $G=\gpgen a$.
\end{proof}

\begin{corollary}
  If $G$ is finite and $a\in G$, then $a^{\order G}=\gid$.
\end{corollary}

\begin{proof}
$a^{\order a}=\gid$ and $\order a$ divides $\order G$.
\end{proof}

An application is the theorems of Fermat and Euler
(Theorems~\ref{thm:Fermat} and~\ref{thm:Euler}).  The first
Sylow Theorem (Theorem~\ref{thm:Sylow-1}) is a partial converse.

\begin{theorem}
  $\Zmodu n=\{[x]\in\Zmod n\setcolon \gcd(x,n)=1\}$. 
\end{theorem}

\begin{proof}
  $\gcd(m,n)=1$ if and only if $am+bn=1$ for some integers $a$ and
  $b$; but this just means $[a][m]=1$ for some $a$.
\end{proof}

\begin{theorem}[Fermat]\label{thm:Fermat}
If the prime $p$ is not a factor of $a$, then
\begin{equation*}
  a^{p-1}\equiv 1\pmod p.
\end{equation*}
Hence $a^p\equiv a\pmod p$ for any integer $a$.
\end{theorem}

\begin{proof}
  The order of $\Zmodu p$ is $p-1$, and $[a]\in\Zmodu p$.
  This proves the first claim, and the second if $p\ndivides a$; the
  second is trivial if $p\divides a$.
\end{proof}

If $n\neq0$, let the order of $\Zmodu n$ be denoted by
\begin{equation*}
  \phi(n).
\end{equation*}

\begin{theorem}[Euler]\label{thm:Euler}\index{Euler's Theorem}
  \index{theorem!Euler's Th---} 
  If $\gcd(a,n)=1$, then $a^{\phi(n)}\equiv 1\pmod n$.
\end{theorem}


\section{Normal subgroups}

If $H\subgp G$, then there are equivalences $\siml$ and $\simr$ on $G$
given by 
\begin{equation*}
  x\siml y\iff xH=yH;\qquad\qquad x\simr y\iff Hx=Hy.
\end{equation*}


\begin{theorem}\label{thm:n}
  Suppose $H\subgp G$.  The following are equivalent:
  \begin{enumerate}
    \item
$G/H$ is a group.
\item
$\siml$ and $\simr$ are the same.
\item
$aH=Ha$ for all $a$ in $G$.
\item
$a\inv Ha=H$ for all $g$ in $G$.
  \end{enumerate}
\end{theorem}

\begin{proof}
Suppose $G/H$ is a group, that is,
$\siml$ is a congruence relation.  This
means
\begin{equation*}
  xH=x'H\land yH=y'H\implies xyH=x'y'H.
\end{equation*}
As a special case, we have that, if $h\in H$, so that $hH=H$, then
$hyH=yH$, so $y\inv hyH=H$.
Thus
\begin{equation*}
  y\inv Hy=H,
\end{equation*}
equivalently, $Hy=yH$.  Therefore
$\siml$ and $\simr$ are the same.

Conversely, suppose these relations are the same.  Then every $x$ has the
same congruence class with respect to either one: $xH=Hx$.  If $xH=x'H$ and
$yH=y'H$, then $xyH=xy'H=xHy'=x'Hy'=x'y'H$.  Thus $G/H$ is a group.
\end{proof}

A subgroup $H$ of $G$ meeting any of these equivalent conditions is
called \textbf{normal,}\index{normal!--- subgroup} and we write
\begin{equation*}
  H\nsubgp G.
\end{equation*}
Of abelian groups, all subgroups are normal.
In general, if $N\nsubgp G$, then the group $G/N$ is the
\textbf{quotient-group}% 
\index{quotient!--- group}\index{group!quotient ---} of
$G$ by $N$.

\begin{theorem}\label{thm:NGHG}
If $N\nsubgp G$ and $H\subgp G$, then $N\cap H\nsubgp H$.
(That is, normality is preserved in subgroups.)
\end{theorem}

\begin{proof}
  The defining property of normal subgroups is universal, that is,
  $N\nsubgp G$ means
$(G,N)\models\Forall x\Forall y(x\in N\to yxy\inv\in N)$.
\end{proof}

\begin{theorem}
If $N\nsubgp G$ and $H\subgp G$, 
then $\gpgen{N\cup H}=NH$.  
\end{theorem}

\begin{proof}
Suppose $n\in N$ and $h\in H$.  
Then $nh=hh\inv nh$.
Since $N\nsubgp{N\cup H}$, we have
$h\inv nh\in N$, so $nh\in HN$. 
Thus $NH\included HN$, so by symmetry $NH=HN$.  Therefore
\begin{equation*}
NH(NH)\inv=NHH\inv N\inv=NHHN\included NHN=NNH\included NH, 
\end{equation*}
that is,
$NH$ is closed under $(x,y)\mapsto xy\inv$.  Since $NH$ also contains
$\gid$, it is a subgroup of $G$ by Theorem~\ref{thm:subgp}. 
\end{proof}

\begin{theorem}\label{thm:isdp}
Suppose $N\nsubgp G$ and $H\subgp G$ and
$N\cap H=\gpgen{\gid}$.  Then the surjection $(x,y)\mapsto xy$ from
$N\times H$ to $NH$ is a bijection.  
\end{theorem}

\begin{proof}
If $g$ and $h$ are in $H$, and $m$ and $n$ are in $N$, and $gm=hn$,
then  
\begin{equation*}
  h\inv g=nm\inv,
\end{equation*}
so each side must be $\gid$, and hence $g=h$ and $m=n$.  
\end{proof}

In the theorem, $NH$ is the \textbf{internal semidirect
  product}\index{internal semidirect product} of
$N$ and $H$.  Note well that the bijection between $N\times H$ and
$NH$ need not be an isomorphism, since we have, in $N\times H$,
\begin{equation*}
  (m,g)(n,h)=(mn,gh),
\end{equation*}
while, in $NH$,
\begin{equation}\label{eqn:sdp}
(mg)(nh)=(mgng\inv)(gh). 
\end{equation}
Theorem~\ref{thm:wdp} below establishes conditions under which the
bijection \emph{is} an isomorphism.  Semidirect products in general
are treated in \S~\ref{sect:semidirect}.

\begin{theorem}
  The normal subgroups of a group are precisely the kernels of
  homomorphisms on the group.
\end{theorem}

\begin{proof}
  If $f$ is a homomorphism from $G$ to $H$, then
  $f(ana\inv)=f(a)f(n)f(a)\inv=\gid$ for all $n$ in $\ker f$, so $a(\ker
  f)a\inv\included \ker f$; thus $\ker f\nsubgp G$.  Conversely,
if $N\nsubgp G$, then the map $x\mapsto xN$ from $G$ to $G/N$ is a
homomorphism with kernel~$N$.
\end{proof}

In the proof, the map $x\mapsto xN$ is the 
\textbf{canonical projection}%
\index{projection}\index{canonical!--- projection} 
or the \textbf{quotient map}\index{quotient map}
of $G$ onto $G/N$; it may be denoted
  by $\pi$.

\begin{theorem}\label{thm:hom-n}
  If $f$ is a homomorphism from $G$ to $H$, and $N$ is a normal
  subgroup of $G$ such that $N\subgp \ker f$, then there is a unique
  homomorphism
  $\tilde f$ from $G/N$ to $H$ such that $f=\tilde f\circ \pi$, that
  is, the following diagram \textbf{commutes}\index{commutes} (all directed paths from
  one node to another represent the same function).
  \begin{equation*}
\xymatrix{
N \ar[r] \ar[dr]_1 & G \ar[r]^{\pi} \ar[d]^f & G/N
\ar[dl]^{\tilde f}\\
&H&
}
  \end{equation*}
\end{theorem}

\begin{proof}
If $\tilde f$ exists, it must satisfy
 $\tilde f(xN)=f(x)$ for all $x$ in $G$.  Such $\tilde f$ does exist,
 since if $xN=yN$, then $xy\inv\in N\subgp \ker f$, so $f(xy\inv)=\gid$ and
 $f(x)=f(y)$. 
\end{proof}

\begin{corollary}[First Isomorphism Theorem]%
\index{isomorphism!I--- Theorems|(}\index{theorem!Isomorphism Th---s|(}  
  $G/\ker f\cong \im f$ for any homomorphism $f$ on~$G$.
\end{corollary}

\begin{proof}
  Let $N=\ker f$; then $\tilde f$ is the
  desired homomorphism.
\end{proof}

\begin{corollary}
  If $f$ is a homomorphism from $G$ to $H$, and $N$ is a normal subgroup
  of $G$, and $M\nsubgp H$, and $f[N]\subgp M$, then there is a
  homomorphism $\tilde f$ from $G/N$ to $H/M$ such that the following
  diagram commutes:
  \begin{equation*}
    \xymatrix{
N\ar[d]\ar[r] & G \ar[r]^{\pi}\ar[d]_f & G/N\ar[d]^{\tilde f}\\
M \ar[r]      & H \ar[r]                     & H/M
}
  \end{equation*}
\end{corollary}

\begin{proof}
  The induced homomorphism from $N$ to $H/M$ is trivial. 
\end{proof}

\begin{theorem}[Second Isomorphism]
  If $H\subgp G$ and $N\nsubgp G$, then 
  \begin{equation*}
%  H/(H\cap N)\cong NH/N.
\frac H{H\cap N}\cong\frac{HN}N.
  \end{equation*}
\end{theorem}

\begin{proof}
The map $h\mapsto hN$ from $H$ to $HN/N$ is surjective with kernel
$H\cap N$.  So the claim follows by the First Isomorphism Theorem (a corollary to Theorem~\ref{thm:hom-n}).
\end{proof}

For example,
In $\Z$, since $\gpgen n\cap\gpgen m=\gpgen{\lcm(n,m)}$ and $\gpgen
n+\gpgen m=\gpgen{\gcd(n,m)}$, we have
\begin{equation*}
  \frac{\gpgen n}{\gpgen{\lcm(n,m)}}
\cong\frac{\gpgen{\gcd(n,m)}}{\gpgen m}.
\end{equation*}

\begin{theorem}[Third Isomorphism]
  If $N$ and $K$ are normal subgroups of $G$ and $N\subgp K$, then
  $K/N\nsubgp G/N$ and
  \begin{equation*}
    \frac{G/N}{K/N}\cong G/K.
  \end{equation*}
\end{theorem}

\begin{proof}
By Theorem~\ref{thm:hom-n}, the map $xN\mapsto xK$ from $G/N$ to $G/K$
  is a well-defined epimorphism.  The kernel contains $xN$ if and only
  if 
  $x\in K$, that is, $xN\in K/N$.  Again the claim now follows
  by the First Isomorphism Theorem (a corollary to Theorem~\ref{thm:hom-n}).
\end{proof}
\index{isomorphism!I--- Theorems|)}\index{theorem!Isomorphism Th---s|)}

Theorem~\ref{thm:hom-n} will also be used to prove von Dyck's Theorem (Theorem~\ref{thm:vD}).

\begin{lemma}
  If $f$ is an epimorphism from $G$ onto $H$, then there is a
  one-to-one correspondence $K\mapsto f[K]$ between subgroups of $G$
  that include 
  $\ker f$ and subgroups of $H$; under this, normal subgroups
  correspond.
  \begin{equation*}
    \xymatrix{
\ker f \ar[r]\ar[d]  &   K  \ar[r]\ar[d] & G \ar@{>>}[d]^f\\
\{e\}\ar[r] & f[K] \ar[r] & H
}
  \end{equation*}
\end{lemma}

\begin{theorem}
  If $N\nsubgp G$, then every subgroup of $G/N$ is $K/N$ for some
  subgroup $K$ of $G$ that includes $N$, and moreover $K/N$ is normal
  in $G/N$
  if and only if $K$ is normal in~$G$.
  \begin{equation*}
    \xymatrix{
N \ar[r]\ar[d]  &   K  \ar[r]\ar[d] & G \ar@{>>}[d]^f\\
\{e\}\ar[r] & K/N \ar[r] & G/N
}
  \end{equation*}
\end{theorem}

\section{Finite groups}

Since every group can be considered as a symmetry group of \emph{itself,}
every \emph{finite} group $G$ can be considered as a symmetry group of
finite set.  In particular, $G$ can be considered as a subgroup of $\Sym
n$ for some $n$ in $\vnn$.

An element $\sigma$ of $\Sym n$ can be
denoted by
\begin{equation*}
  \begin{pmatrix}
       0  &        1  & \cdots &        n-1\\
\sigma(0) & \sigma(1) & \cdots & \sigma(n-1)
  \end{pmatrix}.
\end{equation*}
In particular, the permutation
\begin{equation*}
  \begin{pmatrix}
    0 & 1 & \cdots & n-2 & n-1\\
    1 & 2 & \cdots & n-1 &   0
  \end{pmatrix}.
\end{equation*}
can be called a \emph{cycle.}\index{cycle}  More generally, if $m\leq
n$, then the 
permutation 
\begin{equation*}
  \begin{pmatrix}
    0 & 1 & \cdots & m-2 & m-1 & m & \cdots & n-1\\
    1 & 2 & \cdots & m-1 &   0 & m & \cdots & n-1
  \end{pmatrix}
\end{equation*}
is a cycle too, or more precisely an \emph{$m$-cycle.}  For the
moment, let us call this $\sigma_m$.
In the most general sense, an
\textbf{$m$-cycle,} or a cycle of \textbf{length}\index{length} $m$,
in $\Sym n$ is an element of the form
\begin{equation*}
  \begin{pmatrix}
\tau(0) &\tau(1) &\cdots &\tau(m-2) &\tau(m-1) &\tau(m) &\cdots &\tau(n-1)\\
\tau(1) &\tau(2) &\cdots &\tau(m-1) &  \tau(0) &\tau(m) &\cdots &\tau(n-1)
  \end{pmatrix}
\end{equation*}
where $\tau\in\Sym n$.  Let this $m$-cycle be called $\sigma$.  Then
$\sigma(\tau(k))=\tau(\sigma_m(k))$, so
\begin{equation*}
  \sigma=\tau\sigma_m\tau\inv.
\end{equation*}
In general, the length of a cycle is its order.
The $m$-cycle $\sigma$ above can be written 
more neatly as
\begin{equation*}
  \begin{pmatrix}
    \tau(0) & \tau(1) & \tau(m-1)
  \end{pmatrix}.
\end{equation*}
In this notation, the same cycle $\sigma$ can be written in $m$
different ways, as
\begin{equation*}
  \begin{pmatrix}
    \tau(i) & \tau(i+1) & \cdots & \tau(m-1) & \tau(0) & \cdots & \tau(i-1)
  \end{pmatrix}
\end{equation*}
for any $i$ in $m$.  

Two elements $\sigma$ and $\tau$ of $\Sym n$ are \textbf{disjoint}\index{disjoint} if,
for all $x$ in $n$,
\begin{equation*}
  \sigma(x)\neq x\implies\tau(x)=x.
\end{equation*}
In this case, $\sigma\tau=\tau\sigma$.

\begin{theorem}
  Every element of $\Sym n$ is a product of disjoint cycles of length
  at least $2$, uniquely up to order of factors.
\end{theorem}

\begin{proof}
  Let $\sigma\in\Sym n$.  If $k\in n$, let
  \begin{equation*}
    [k]=\{\sigma^{\ell}(k)\colon \ell\in\Z\}.
  \end{equation*}
Then the sets $[k]$ partition $n$: we have
\begin{equation*}
  n=[k_0]\cup\cdots\cup[k_{\ell-1}]
\end{equation*}
for some $\ell$, the union being disjoint.  If $i\in\ell$,
define $\sigma_i$ by
\begin{equation*}
  \sigma_i(x)=
  \begin{cases}
    \sigma(x),&\text{ if }x\in[k_i],\\
x,&\text{ otherwise.}
  \end{cases}
\end{equation*}
If $[k_i]$ has size $\ell_i$, then
$\sigma_i$ is the $\ell_i$-cycle 
$
\begin{pmatrix}
  k & \sigma(k) & \cdots & \sigma^{\ell_i-1}(k)
\end{pmatrix}
$.
Finally, $\sigma$ is the product (that is, the composition) of all of
the $\sigma_i$ such that $\ell_i>1$.
\end{proof}

\begin{theorem}
  The order of a finite permutation is the least common multiple of
  the orders of its disjoint cyclic factors.
\end{theorem}

A $2$-cycle is also called a \textbf{transposition.}\index{transposition}

\begin{corollary}
  Every finite permutation is a product of transpositions.
\end{corollary}

\begin{proof}
$\begin{pmatrix}
    0 & 1 & \cdots & m-1
  \end{pmatrix}
=
\begin{pmatrix}
  0 & m-1
\end{pmatrix}
\dotsm
\begin{pmatrix}
  0 & 2
\end{pmatrix}
\begin{pmatrix}
  0 & 1
\end{pmatrix}$.
\end{proof}

Let the set of $2$-element subsets of $n$ by denoted by
\begin{equation*}
  \unordered{2}{n}.
\end{equation*}
If $\sigma\in\Sym n$, and $\{i,j\}\in\unordered 2n$, then we can
define 
\begin{equation*}
  \sigma(\{i,j\})=\{\sigma(i),\sigma(j)\}.
\end{equation*}
Thus we have a homomorphism from $\Sym n$ to $\Sym{\unordered 2n}$.
Understanding $n$ as the subset $\{0,\dots,n-1\}$ of $\Q$, we have a
function $X\mapsto\sq X$ from $\unordered 2n$ to $\unit{\Q}$ given by
\begin{equation*}
  \sq{\{i,j\}}=\frac{\sigma(i)-\sigma(j)}{i-j}.
\end{equation*}
Then we can define the
function $\sigma\mapsto\sgn{\sigma}$ from $\Sym n$
into $\unit{\Q}$ by
\begin{equation*}
  \sgn{\sigma}=
\prod_{X\in \unordered{2}{n}}
\sq X.
\end{equation*}

\begin{theorem}
  The function $\sigma\mapsto\sgn{\sigma}$ is an homomorphism
from  $\Sym n$ onto the subgroup $\gpgen{-1}$ of $\unit{\Q}$; it takes
every transposition to $-1$. 
\end{theorem}

\begin{proof}
If $\sigma=
\begin{pmatrix}
  k&\ell
\end{pmatrix}$, then
\begin{equation*}
  \sgn{\sigma}=\sq{\{k,\ell\}}\prod_{i\in
    n\setminus\{k,\ell\}}(\sq{\{i,\ell\}}\sq{\{k,i\}}) 
=\frac{\ell-k}{k-\ell}\cdot\prod_{i\in
  n\setminus\{k,\ell\}}\Bigl(\frac{i-k}{i-\ell}\cdot\frac{\ell-i}{k-i}\Bigr)
  =-1.  
\end{equation*}
If $\sigma$ and $\tau$ are arbitrary elements of $\Sym n$, then
\begin{align*}
  \sgn{\sigma\tau}
&=\prod_{\{i,j\}\in
  \unordered{2}{n}}\frac{\sigma(\tau(i))-\sigma(\tau(j))}{i-j}\\
&=\prod_{\{i,j\}\in
  \unordered{2}{n}}\left(\frac{\sigma(\tau(i))-\sigma(\tau(j))}
{\tau(i)-\tau(j)}\cdot
\frac{\tau(i)-\tau(j)}{i-j}\right)\\
&=\prod_{X\in
  \unordered{2}{n}}\sq{\tau(X)}\cdot\sgn{\tau}\\
&=\sgn{\sigma}\sgn{\tau}
\end{align*}
since $\tau$ permutes $\unordered 2n$.
\end{proof}

The value $\sgn{\sigma}$ can now be called the \textbf{signum}\index{signum} of
$\sigma$; it is $1$ if and only if $\sigma$ is the product of an even
number of transpositions.  Such a product is itself called
\textbf{even;}\index{even} the other permutations, with signum $-1$, are called
\textbf{odd.}\index{odd} 

The \textbf{alternating
  group}\index{alternating}\index{group!alternating ---} of degree 
$n$ is the kernel of 
$\sigma\mapsto\sgn \sigma$ on
$\Sym n$ and is denoted by
\begin{equation*}
  \Alt n.
\end{equation*}
Hence $\Alt n\nsubgp\Sym n$ and $[\Sym n:\Alt n]=2$.

A group is \textbf{simple}\index{simple group}%
\index{group!simple ---} if it has no proper nontrivial normal
subgroups.  
For example,
  $\Zmod n$ is simple just in case $\abs n$ is prime.  Hence the
  only simple abelian groups are the $\Zmod p$, where $p$ is prime.

\begin{lemma}
  $\Alt n$ is generated by the $3$-cycles in $\Sym n$.
\end{lemma}

\begin{proof}
The group $\Alt n$ is generated by the products
$\begin{pmatrix}
  a&b
\end{pmatrix}
\begin{pmatrix}
  a&c
\end{pmatrix}$ and
$\begin{pmatrix}
  a&b
\end{pmatrix}
\begin{pmatrix}
  c&d
\end{pmatrix}$, where $a$, $b$, $c$, and $d$ are distinct elements of
$n$.  But 
\begin{gather*}
  \begin{pmatrix}
    a & b
  \end{pmatrix}
  \begin{pmatrix}
    a & c
  \end{pmatrix}=
\begin{pmatrix}
    a & c & b
  \end{pmatrix},\\
\begin{pmatrix}
 a & b   
  \end{pmatrix}
  \begin{pmatrix}
    c & d
  \end{pmatrix}
=
  \begin{pmatrix}
    b & c & a
  \end{pmatrix}
  \begin{pmatrix}
    c & d & b
  \end{pmatrix}.
\end{gather*}
Hence all $3$-cycles belong to $\Alt n$, and this group is generated
by these cycles.
\end{proof}

\begin{lemma}
  $\Alt n$ is generated by the $3$-cycles 
$\begin{pmatrix}
    0 & 1 & k
  \end{pmatrix}$,
where $1<k<n$.
\end{lemma}

\begin{proof}
If $a$, $b$, and $c$ are distinct elements of $n\setminus\{0,1\}$, then
\begin{align*}
  \begin{pmatrix}
    0 & a & b
  \end{pmatrix}
&= \begin{pmatrix}
    0 & 1 & b
  \end{pmatrix}
  \begin{pmatrix}
    a & 1 & 0
  \end{pmatrix}
= \begin{pmatrix}
    0 & 1 & b
  \end{pmatrix}
  \begin{pmatrix}
    0 & 1 & a
  \end{pmatrix}\inv,\\
\begin{pmatrix}
1 & a & b
\end{pmatrix}
&= \begin{pmatrix}
    1 & 0 & b
  \end{pmatrix}
  \begin{pmatrix}
    a & 0 & 1
  \end{pmatrix}
= \begin{pmatrix}
    0 & 1 & b
  \end{pmatrix}\inv
  \begin{pmatrix}
    0 & 1 & a
  \end{pmatrix},\\
\begin{pmatrix}
a & b & c  
\end{pmatrix}
&=
\begin{pmatrix}
  c&1&0
\end{pmatrix}
\begin{pmatrix}
  0 & a & b
\end{pmatrix}
\begin{pmatrix}
  0 & 1 & c
\end{pmatrix}.\qedhere
\end{align*}
\end{proof}

\begin{lemma}
Any normal subgroup of $\Alt n$ containing a $3$-cycle \emph{is} $\Alt
  n$. 
\end{lemma}

\begin{proof}
We show that every $3$-cycle is conjugate in $\Alt n$ to a cycle
$\begin{pmatrix}
  0&1&k
\end{pmatrix}$.  It is enough to not that
  $
  \begin{pmatrix}
    a & b & d
  \end{pmatrix}=
\underbrace{\begin{pmatrix}
    a & b
  \end{pmatrix}
  \begin{pmatrix}
    c & d
  \end{pmatrix}}
  \begin{pmatrix}
    c & b & a
  \end{pmatrix}
\underbrace{
  \begin{pmatrix}
    c & d
  \end{pmatrix}
  \begin{pmatrix}
    a & b
  \end{pmatrix}}$.
\end{proof}

\begin{lemma}
  If $n>4$, then a normal subgroup of $\Alt n$ contains a $3$-cycle,
  provided it has 
  a nontrivial element whose factorization into disjoint cycles
  contains one of the following:
  \begin{enumerate}
    \item
a cycle of length at least $4$;
\item
two cycles of length $3$;
\item
transpositions, only one $3$-cycle, and no other cycles; or
\item
only transpositions.
  \end{enumerate}
\end{lemma}

\begin{proof}
\begin{asparaenum}
\item
  If $k\geq 4$, and $\sigma$ is disjoint from 
$\begin{pmatrix}
      0 & 1 & \dots & k-1
    \end{pmatrix}$, then
  \begin{equation*}
  \begin{pmatrix}
      0 & 1 & 2
    \end{pmatrix}
    \begin{pmatrix}
      0 & 1 & \dots & k-1
    \end{pmatrix}\sigma
    \begin{pmatrix}
      2 & 1 & 0
    \end{pmatrix}
\sigma\inv
    \begin{pmatrix}
k-1&\dots & 1 & 0
    \end{pmatrix}=
    \begin{pmatrix}
      0 & 1 & 3
    \end{pmatrix}.
  \end{equation*}
\item
If $\sigma$ is disjoint from $\begin{pmatrix}
    0 & 1 & 2
  \end{pmatrix}
  \begin{pmatrix}
    3 & 4 & 5
  \end{pmatrix}$, then we reduce to the previous case:
  \begin{equation*}
  \begin{pmatrix}
    0 & 1 & 3
  \end{pmatrix}
\underbrace{
  \begin{pmatrix}
    0 & 1 & 2
  \end{pmatrix}
  \begin{pmatrix}
    3 & 4 & 5
  \end{pmatrix}}\sigma
  \begin{pmatrix}
    3 & 1 & 0
  \end{pmatrix}
\sigma\inv
\underbrace{
  \begin{pmatrix}
    5 & 4 & 3
  \end{pmatrix}
  \begin{pmatrix}
    2 & 1 & 0
  \end{pmatrix}}=
  \begin{pmatrix}
    0 & 1 & 4 & 2 & 3
  \end{pmatrix}.
  \end{equation*}
 \item
If $\sigma$ is disjoint from 
$\begin{pmatrix}
  0 & 1 & 2
\end{pmatrix}$ and is the product of transpositions, then
\begin{equation*}
\left[\begin{pmatrix}
  0 & 1 & 2
\end{pmatrix}\sigma\right]^2=
\begin{pmatrix}
2 & 1 & 0
\end{pmatrix}.
\end{equation*}
\item
If $\sigma$ is a product of transpositions disjoint from 
$\begin{pmatrix}
  0&1
\end{pmatrix}$ and
$\begin{pmatrix}
 2&3 
\end{pmatrix}$, then
\begin{gather*}
  \begin{pmatrix}
  0 & 1 & 2
\end{pmatrix}
\underbrace{
\begin{pmatrix}
  0 & 1
\end{pmatrix}
\begin{pmatrix}
  2 & 3
\end{pmatrix}
\sigma}
\begin{pmatrix}
  2 & 1 & 0
\end{pmatrix}
\underbrace{
\sigma
\begin{pmatrix}
  3 & 2
\end{pmatrix}
\begin{pmatrix}
  1 & 0
\end{pmatrix}}
=
\begin{pmatrix}
  0 & 2
\end{pmatrix}
\begin{pmatrix}
  1 & 3
\end{pmatrix},\\
\begin{pmatrix}
  0 & 2 & 4
\end{pmatrix}
\underbrace{
\begin{pmatrix}
  0 & 2
\end{pmatrix}
\begin{pmatrix}
  1 & 3
\end{pmatrix}}
\begin{pmatrix}
  4 & 2 & 0
\end{pmatrix}
\underbrace{
\begin{pmatrix}
  3 & 1
\end{pmatrix}
\begin{pmatrix}
  2 & 0
\end{pmatrix}}
=
\begin{pmatrix}
  0 & 4 & 2
\end{pmatrix}.\qedhere
\end{gather*}
\end{asparaenum}
\end{proof}

% END OF DAY 6 (October 13, 2008)

\begin{theorem}
  $\Alt n$ is simple if and only if $n\neq 4$.
\end{theorem}

\begin{proof}
  $\Alt 1$ and $\Alt 2$ are trivial, and $\Alt 3\cong\Zmod 3$.
The case when $n>4$ is handled by the previous lemmas.  Finally, every
element of $\Alt 4$ (in fact, of $\Sym 4$) can be considered as a
permutation of the set
\begin{equation*}
  \Bigl\{
\bigl\{\{0,1\},\{2,3\}\bigr\},
\bigl\{\{0,2\},\{1,3\}\bigr\},
\bigl\{\{0,3\},\{1,2\}\bigr\}\Bigr\}.
\end{equation*}
Thus we get an epimorphism from $\Alt 4$ to $\Sym 3$ whose kernel is
therefore a proper nontrivial normal subgroup.
\end{proof}

The normal subgroup of $\Alt 4$ found in the proof is
  \begin{equation*}
    \gpgen{
      \begin{pmatrix}
	0 & 1
      \end{pmatrix}
      \begin{pmatrix}
	2 & 3
      \end{pmatrix}, 
      \begin{pmatrix}
	0 & 2
      \end{pmatrix}
      \begin{pmatrix}
	1 & 3
      \end{pmatrix}, 
      \begin{pmatrix}
	0 & 3
      \end{pmatrix}
      \begin{pmatrix}
	1 & 2
      \end{pmatrix}}.
  \end{equation*}
We can obtain it by considering $\Alt 4$ as the group of
rotational symmetries of the regular tetrahedron.  The vertices of
this tetrahedron can be taken as $4$ of the $8$ vertices of a cube:
say, the vertices with coordinates $(1,1,1)$, $(1,-1,-1)$, $(-1,1,-1)$,
and $(-1,-1,1)$.  Then a symmetry of the tetrahedron determines a
permutation of the $3$ coordinate axes, hence an element of $\Sym 3$.

\section{Determinants}\label{sect:det}

Let $R$ be a commutative ring.
We define the
function $X\mapsto\det(X)$ from $\MatR$ to $R$ by
\begin{equation*}
  \det((a^i_j)^{i<n}_{j<n})=\sum_{\sigma\in\Sym
    n}\sgn{\sigma}\prod_{i<n}a^i_{\sigma(i)}.
\end{equation*}

\begin{theorem}
  The function $X\mapsto\det(X)$ is a multiplicative homomorphism,
  that is,
  \begin{equation*}
    \det(XY)=\det(X)\det(Y).
  \end{equation*}
\end{theorem}

\begin{proof}
  We shall use the identity
  \begin{equation*}
    \prod_{i<k}\sum_{j<n}f(i,j)=\sum_{\phi\colon k\to n}\prod_{i<k}f(i,\phi(i)).
  \end{equation*}
Let $A=(a^i_j)^{i<n}_{j<n}$ and $B=(b^i_j)^{i<n}_{j<n}$.  Then
\begin{align*}
  \det(AB)
&=\det((\sum_{j<n}a^i_jb^j_k)^{i<n}_{k<n})\\
&=\sum_{\sigma\in\Sym n}\sgn{\sigma}\prod_{i<n}\sum_{j<n}a^i_jb^j_{\sigma(i)}\\
&=\sum_{\sigma\in\Sym n}\sgn{\sigma}\sum_{\phi\colon n\to
    n}\prod_{i<n}(a^i_{\phi(i)}b^{\phi(i)}_{\sigma(i)})\\
&=\sum_{\phi\colon n\to
    n} \sum_{\sigma\in\Sym
    n}\sgn{\sigma}\prod_{i<n}(a^i_{\phi(i)}b^{\phi(i)}_{\sigma(i)}). 
\end{align*}
We shall eliminate from the sum those terms in any $\phi$ that is not
injective. 
Suppose $k<\ell<n$, but
$\phi(k)=\phi(\ell)$.  The function 
$\sigma\mapsto\sigma\circ
\begin{pmatrix}
  k&\ell
\end{pmatrix}$ is a bijection between $\Alt n$ and $\Sym
n\setminus\Alt n$.
  Also, if $\tau=\sigma\circ
\begin{pmatrix}
  k&\ell
\end{pmatrix}$, then
\begin{equation*}
  a^k_{\phi(k)}b^{\phi(k)}_{\sigma(k)}
  a^{\ell}_{\phi(\ell)}b^{\phi(\ell)}_{\sigma(\ell)}
=  a^k_{\phi(k)}b^{\phi(\ell)}_{\tau(\ell)}
  a^{\ell}_{\phi(\ell)}b^{\phi(k)}_{\tau(k)}
=  a^k_{\phi(k)}b^{\phi(k)}_{\tau(k)}
  a^{\ell}_{\phi(\ell)}b^{\phi(\ell)}_{\tau(\ell)}.
\end{equation*}
Hence
\begin{equation*}
\sgn{\sigma}\prod_{i<n}(a^i_{\phi(i)}b^{\phi(i)}_{\sigma(i)})
+
\sgn{\tau}\prod_{i<n}(a^i_{\phi(i)}b^{\phi(i)}_{\tau(i)})=0.  
\end{equation*}
Now we have
\begin{align*}
  \det(AB)
&=\sum_{\tau\in\Sym n} \sum_{\sigma\in\Sym
    n}\sgn{\sigma}\prod_{i<n}(a^i_{\tau(i)}b^{\tau(i)}_{\sigma(i)})\\
&=\sum_{\tau\in\Sym n} \sum_{\sigma\in\Sym
    n}\sgn{\sigma}\prod_{i<n}(a^i_{\tau(i)}b^{i}_{\tau\inv\sigma(i)})\\
&=\sum_{\tau\in\Sym n} \sum_{\sigma\in\Sym
    n}\sgn{\tau}\sgn{\tau\inv\sigma}
  \prod_{i<n}(a^i_{\tau(i)}b^{i}_{\tau\inv\sigma(i)})\\  
&=\sum_{\tau\in\Sym n} \sum_{\sigma\in\Sym
    n}\sgn{\tau}\sgn{\sigma}
  \prod_{i<n}(a^i_{\tau(i)}b^{i}_{\sigma(i)})\\  
&=\sum_{\tau\in\Sym n}\sgn{\tau} \prod_{i<n}a^i_{\tau(i)}
\sum_{\sigma\in\Sym
    n}\sgn{\sigma}
  \prod_{i<n}b^{i}_{\sigma(i)}=\det(A)\det(B).\qedhere  
\end{align*}
\end{proof}

\begin{corollary}
  An element $A$ of $\MatR$ has an inverse only if $\det(A)\in\unit R$.
\end{corollary}

\begin{theorem}
  An element $A$ of $\MatR$ has an inverse if $\det(A)\in\unit R$.
\end{theorem}

\begin{proof}
Let $A=(a^i_j)^{i<n}_{j<n}$.  If $i<n$, then
\begin{align*}
  \det(A)
&=\sum_{\sigma\in\Sym
    n}\sgn{\sigma}\prod_{\ell<n}a^{\ell}_{\sigma(\ell)}\\
&=\sum_{\sigma\in\Sym
    n}\sgn{\sigma}a^i_{\sigma(i)}\prod_{\ell\in
    n\setminus\{i\}}a^{\ell}_{\sigma(\ell)}\\
&=\sum_{j<n}a^i_j
\sum_{\substack{\sigma\in\Sym n\\\sigma(i)=j}}\sgn{\sigma}\prod_{\ell\in
    n\setminus\{i\}}a^{\ell}_{\sigma(\ell)}\\
&=\sum_{j<n}a^i_jb^j_i,
\end{align*}
where
\begin{equation*}
  b^j_k=
\sum_{\substack{\sigma\in\Sym n\\\sigma(k)=j}}\sgn{\sigma}\prod_{\ell\in
    n\setminus\{k\}}a^{\ell}_{\sigma(\ell)}.
\end{equation*}
However, if $i\neq k$, then
\begin{align*}
  \sum_{j<n}a^i_jb^j_k
&=\sum_{j<n}a^i_j\sum_{\substack{\sigma\in\Sym
      n\\\sigma(k)=j}}\sgn{\sigma}\prod_{\ell\in 
    n\setminus\{k\}}a^{\ell}_{\sigma(\ell)}\\
&=\sum_{\sigma\in\Sym n}\sgn{\sigma}a^i_{\sigma(k)}\prod_{\ell\in 
    n\setminus\{k\}}a^{\ell}_{\sigma(\ell)}\\
&=\sum_{\sigma\in\Sym n}\sgn{\sigma}a^i_{\sigma(k)}a^i_{\sigma(i)}\prod_{\ell\in 
    n\setminus\{i,k\}}a^{\ell}_{\sigma(\ell)}=0,
\end{align*}
since the map $\sigma\mapsto\sigma\circ
\begin{pmatrix}
  i&k
\end{pmatrix}$ is a bijection between $\Alt n$ and $\Sym
n\setminus\Alt n$.
Thus
\begin{equation*}
  A(b^j_k)^{j<n}_{k<n}=(\det(A)\delta^i_k)^{i<n}_{k<n}.
\end{equation*}
Finally,
\begin{align*}
  \sum_{j<n}b^i_ja^j_k
&=\sum_{j<n}\sum_{\substack{\sigma\in\Sym
      n\\\sigma(j)=i}}\sgn{\sigma}\prod_{\ell\in
    n\setminus\{j\}}a^{\ell}_{\sigma(\ell)}a^j_k\\
&=\sum_{\sigma\in\Sym n}\sgn{\sigma}\prod_{\ell\in
    n\setminus\{\sigma\inv(i)\}}a^{\ell}_{\sigma(\ell)}a^{\sigma\inv(i)}_k\\
&=\sum_{\sigma\in\Sym n}\sgn{\sigma}\prod_{\ell\in
    n\setminus\{i\}}a^{\sigma\inv(\ell)}_{\ell}a^{\sigma\inv(i)}_k,
\end{align*}
which is $\det(A)$ if $i=k$, but is otherwise $0$, so
\begin{equation*}
  (b^i_j)^{i<n}_{j<n}A=(\det(A)\delta^i_k)^{i<n}_{k<n}.
\end{equation*}
In particular, if $\det(A)$ is invertible, then so is $A$, and
\begin{equation*}
  A\inv=(\det(A)\inv b^j_k)^{j<n}_{k<n}. \qedhere
\end{equation*}
\end{proof}


\section{Dihedral groups}

We can consider the elements of $n$ as vertices of a regular $n$-gon.
The group of symmetries of this object is called a \textbf{dihedral
  group}\index{dihedral group} and is denoted by
\begin{equation*}
\Dih n.
\end{equation*}
Formally, this is the subgroup $\gpgen{\sigma_n,\beta}$ of $\Sym n$,
where as in the last section $\sigma_n$ is the $n$-cycle
$\begin{pmatrix}
      0&1&\dots&n-1
    \end{pmatrix}$, while
\begin{equation*}
\beta=
\begin{cases}
      \begin{pmatrix}
      1&n-1
    \end{pmatrix}
    \begin{pmatrix}
      2&n-2
    \end{pmatrix}\dotsm
    \begin{pmatrix}
      m-1&m+1
    \end{pmatrix},& \text{if } n=2m,\\
    \begin{pmatrix}
      1&n-1
    \end{pmatrix}
    \begin{pmatrix}
      2&n-2
    \end{pmatrix}\dotsm
    \begin{pmatrix}
      m&m+1
    \end{pmatrix},&\text{ if }n=2m+1.
\end{cases}
\end{equation*}
Note that both $\beta$ and $\sigma_n\beta$ here have order $2$.

\begin{theorem}\label{thm:Dn}
If $n>2$, and $G=\gpgen{a,b}$, where $\order a=n$ and $\order
b=2=\order{ab}$, then $G\cong\Dih n$.
\end{theorem}

\begin{proof}
Assume $n\geq 2$.
Since $abab=\gid$ and $b\inv=b$, we have
\begin{align*}
  ba&=a\inv b,&
ba\inv&=ab.
\end{align*}
Therefore $ba^k=a^{-k}b$ for all integers
  $k$.  This shows 
\begin{equation*}
G=\{a^ib^j\setcolon(i,j)\in n\times 2\}.
\end{equation*}
It remains to show $\order G=2n$.
  Suppose 
  \begin{equation*}
  a^ib^j=a^kb^{\ell},
  \end{equation*}
  where $(i,j)$ and $(k,\ell)$ are in $n\times 2$.  Then 
  \begin{equation*}
  a^{i-k}=b^{\ell-j}.
  \end{equation*}
If $b^{\ell-j}=\gid$, then $\ell=j$ and $i=k$.  The alternative is that
 $b^{\ell-j}=b$.  In this case,
\begin{equation*}
n\divides2(i-k).  
\end{equation*}
If $n\divides i-k$, then $i=k$ and hence $j=\ell$.  The only other
possibility is that $n=2m$ for some $m$, and $i-k=\pm m$, so that $a^m=b$.  
But then $aa^maa^m=a^2$, while $abab=\gid$, so $n=2$.
\end{proof}


\section{Products and sums}\label{sect:prod-sum}

\begin{theorem}\label{thm:prod}
  Let $G_0$, $G_1$ and $H$ be groups.  For each $i$ in $2$, let
  $\pi_i$ be the homomorphism $(x_0,x_1)\mapsto x_i$ from $G_0\times
  G_1$ to $G_i$, and let $f_i$ be a homomorphism from $H$ to $G_i$.
Then there is a homomorphism
\begin{equation*}
  x\mapsto(f_0(x),f_1(x))
\end{equation*}
 from $H$ to $G_0\times G_1$, and this 
 is the unique homomorphism $f$ from $H$ to $G_0\times G_1$ such
  that, for each $i$ in~$2$,
  \begin{equation*}
    \pi_if=f_i
  \end{equation*}
---that is, the following diagram commutes:
\begin{equation*}
  \xymatrix{
G_0 & \ar[l]_-{\pi_0} G_0\times G_1 \ar[r]^-{\pi_1} & G_1\\
& \ar[ul]^{f_0} \ar[u]_f H \ar[ur]_{f_1} &
}
\end{equation*}
\end{theorem}

\begin{proof}
If $u\in G_0\times G_1$, then $u=(\pi_0(u),\pi_1(u))$.  Hence,
  if $f\colon H\to G_0\times G_1$, then $f(x)=(\pi_0f(x),\pi_1f(x))$.
  In particular then, $f$ is as desired if and only if
  $f(x)=(f_0(x),f_1(x))$. 
\end{proof}

We can generalize this theorem by considering an indexed family
$(G_i\colon i\in I)$ of groups.  The \textbf{direct product}\index{direct product}
of this family is denoted by
\begin{equation*}
  \prod_{i\in I}G_i.
\end{equation*}
This is, first of all, the set
whose elements are $(x_i\colon i\in I)$ (that is, functions $i\mapsto x_i$ on~$I$) such
that $x_i\in G_i$ for each $i$ in $I$.  An operation of multiplication
on this set is given by
\begin{equation*}
  (x_i\colon i\in I)(y_i\colon i\in I)=(x_iy_i\colon i\in I).
\end{equation*}
Under this multiplication, $\prod_{i\in I}G_i$ becomes a group.
If $i\in I$, we define a homomorphism $\pi_i$ from $\prod_{i\in I}G_i$
to $G_i$ by
\begin{equation*}
  \pi_i(x_j\colon j\in I)=x_i.
\end{equation*}
In case $I=n$, we may write $\prod_{i\in I}G_i$ also as
\begin{equation*}
  G_0\times\cdots\times G_{n-1},
\end{equation*}
and a typical element of this as
\begin{equation*}
  (x_0,\dots,x_{n-1}).
\end{equation*}
To the previous theorem we have:

\begin{porism}
  Suppose $(G_i\colon i\in I)$ is an indexed family of groups, and $H$
  is a group, and for each $i$ in $I$ there is a homomorphism from $H$
  to $G_i$.  Then there is a homomorphism
  \begin{equation*}
    x\mapsto(f_i(x)\colon i\in I)
  \end{equation*}
 from $H$ to
  $\prod_{i\in I}G_i$, and this
is the unique homomorphism $f$ from $H$ to
  $\prod_{i\in I}G_i$ such that, for each $i$ in~$I$,
  \begin{equation*}
    \pi_if=f_i.
  \end{equation*}
\end{porism}

The direct product of a family of abelian groups is an abelian group.
When we restrict attention to abelian groups, then we can
reverse the arrows in Theorem~\ref{thm:prod}: 

\begin{theorem}\label{thm:oplus}
  Let $G_0$, $G_1$ and $H$ be abelian groups.  Let $\iota_0$ be the
  homomorphism $x\mapsto(x,0)$ from $G_0$ to $G_0\oplus G_1$, and let
  $\iota_1$ be $x\mapsto(0,x)$ from $G_1$ to $G_0\oplus G_1$.
For each $i$ in $2$, let $f_i$ be a homomorphism from $G_i$ to $H$.
Then there is a homomorphism
\begin{equation*}
  (x_0,x_1)\mapsto f_0(x_0)+f_1(x_1)
\end{equation*}
 from $G_0\oplus G_1$ to $H$, and this is
 the unique homomorphism $f$ from $G_0\oplus G_1$ to $H$ such
  that, for each $i$ in~$2$,
  \begin{equation*}
    f\iota_i=f_i
  \end{equation*}
---that is, the following diagram commutes:
\begin{equation*}
  \xymatrix{
G_0 \ar[r]^-{\iota_0} \ar[dr]_{f_0} & G_0\oplus G_1 \ar[d]^f &
\ar[l]_-{\iota_1} \ar[dl]^{f_1} G_1\\ 
& H &
}
\end{equation*}
\end{theorem}

\begin{proof}
Every element $(x_0,x_1)$ of $G_0\oplus G_1$ is
$\iota_0(x_0)+\iota_1(x_1)$, so that, if $f$ is a homomorphism on $G_0\oplus G_1$, then
\begin{equation}\label{eqn:f+}
  f(x_0,x_1)=f\iota_0(x_0)+f\iota_1(x_1).
\end{equation}
Hence $f$ is as desired if and only if
$f(x_0,x_1)=f_0(x_0)+f_1(x_1)$.  The function so defined is indeed a
homomorphism, since 
\begin{multline*}
  f((x_0,x_1)+(u_0,u_1))
=f(x_0+u_0,x_1+u_1)
=f_0(x_0+u_0)+f_1(x_1+u_1)\\
\begin{aligned}
&=f_0(x_0)+f_0(u_0)+f_1(x_1)+f_1(u_1)\\
&=f_0(x_0)+f_1(x_1)+f_0(u_0)+f_1(u_1)
=f(x_0,x_1)+f(u_0,u_1),
\end{aligned}
\end{multline*}
because $H$ is abelian.
\end{proof}

In the proof, the definition of $f$ in~\eqref{eqn:f+} relies on the
\emph{finiteness} of the family $(G_i\colon i\in 2)$; more precisely,
it relies on the finiteness of $\{i\in 2\colon x_i\neq e)$.  Of an
arbitrary indexed family $(G_i\colon i\in I)$ of groups, we define the
\textbf{\emph{weak} direct product}\index{weak direct product} to be
the subgroup, denoted by  
\begin{equation*}
\weakprod_{i\in I}G_i,
\end{equation*}
of $\prod_{i\in I}G_i$ comprising those elements $(x_i\colon i\in I)$ such that $\{i\in I\colon x_i\neq\gid\}$ is finite.  
We define a homomorphism $\iota_i$ from each $G_i$ to
$\textweakprod_{j\in I}G_j$ by
\begin{equation*}
\iota_i(x)=(x_j\colon j\in I), 
\end{equation*}
where
\begin{equation*}
  x_j=
  \begin{cases}
    x,&\text{ if }j=i;\\
\gid,&\text{ otherwise.}
  \end{cases}
\end{equation*}
If $I$ is finite, then the weak direct product is the same as the (full)
direct product.

Proving that $f$ as in~\eqref{eqn:f+} is a \emph{homomorphism} uses that $H$ is abelian.
The weak direct product of a family $(G_i\colon i\in I)$ of abelian groups is called the \textbf{direct sum}\index{direct sum} and is denoted by 
\begin{equation*}
  \sum_{i\in I}G_i.
\end{equation*}
In case $I=n$, we may write $\sum_{i\in I}G_i$ also as
\begin{equation*}
  G_0\oplus\cdots\oplus G_{n-1}.
\end{equation*}
To the previous theorem we have:

\begin{porism}
  Suppose $(G_i\colon i\in I)$ is an indexed family of abelian groups, and $H$
  is an abelian group, and for each $i$ in $I$ there is a homomorphism $f_i$ from
  $G_i$ to $H$.  Then there is a homomorphism
  \begin{equation*}
    x\mapsto\sum_{i\in I}f_i(x_i)
  \end{equation*}
  from
  $\sum_{i\in I}G_i$ to $H$, and this is the unique homomorphism $f$ from
  $\sum_{i\in I}G_i$ to $H$
  such that, for each $i$ in~$I$, 
  \begin{equation*}
    f\iota_i=f_i.
  \end{equation*}
\end{porism}

Now we can provide an example promised in \S~\ref{sect:rings}.  Let
$E$ be the abelian group $\sum_{n\in\vnn}\Z$. 
Suppose $f$ is a singulary operation on $\vnn$.  An element
$f^*$ of $\End E$ is induced, given by
\begin{equation*}
  f^*(x_n\colon n\in\vnn)=(x_{f(n)}\colon n\in\vnn).
\end{equation*}
Then $f^*\iota_{f(n)}=\iota_n$.
Let
$f$ be the operation $x\mapsto x+1$ on $\vnn$, and let $g$ be the
operation given by
\begin{equation*}
  g(x)=
  \begin{cases}
    y,&\text{ if }f(y)=x,\\
    0,&\text{ if }x=0.
  \end{cases}
\end{equation*}
Then $gf(x)=x$, so $f^*g^*=(gf)^*$, the identity in $\End E$; but
$g^*f^*$ is not the identity, since it is $(fg)^*$, and
$fg(0)=1=fg(1)$. 

We have two kinds
of products so far, related as follows.

\begin{theorem}
Let $(G_i\colon i\in I)$ be an indexed family of groups.  Then
\begin{align*}
   \iota_j[G_j]&\nsubgp\weakprod_{i\in I}G_i,&  
\weakprod_{i\in I}G_i&\nsubgp\prod_{i\in I}G_i,&
   \iota_j[G_j]&\nsubgp\prod_{i\in I}G_i.\qedhere
\end{align*}
\end{theorem}

Theorem~\ref{thm:oplus} and its porism can be generalized to some
cases of arbitrary groups:

\begin{theorem}
  Suppose $(G_i\colon i\in I)$ is an indexed family of groups, and $H$
  is a group, and for each $i$ in $I$ there is a homomorphism $f_i$ from
  $G_i$ to $H$.  Suppose further that, for all $i$ and $j$ in $I$,
  \begin{equation*}
    f_i(x)f_j(y)=f_j(y)f_i(x).
  \end{equation*}
Then there is a homomorphism
\begin{equation*}
  x\mapsto\prod_{i\in I}f_i(x_i)
\end{equation*}
from $\textweakprod_{i\in I}G_i$ to $H$, and this is the unique
homomorphism $f$
from $\textweakprod_{i\in I}G_i$ to $H$
such that, for each $i$ in~$I$, 
  \begin{equation*}
    f\iota_i=f_i.
  \end{equation*}
\end{theorem}

As a special case of this theorem, we have the next theorem below, by
means of the following:

\begin{lemma}
  If $M$ and $N$ are normal subgroups of $G$, and 
  \begin{equation*}
  M\cap N=\gpgen{\gid},
  \end{equation*}
  then each element $m$ of $M$ commutes with each element $n$ of $N$,
  that is,
  \begin{equation*}
  mn=nm.
  \end{equation*}
\end{lemma}

\begin{proof}
We can analyze $mnm\inv n\inv$ both as the element
$(mnm\inv)n\inv$ of $N$ and as the element $m(nm\inv n\inv)$ in $M$; so
  the element is $\gid$, and therefore
$mn=(m\inv n\inv)\inv=nm$.
\end{proof}

\begin{theorem}\label{thm:wdp}
  If $(N_i\colon i\in I)$ is an indexed family of normal subgroups of
  a group, and for each $j$ in $I$,
\begin{equation}
  N_j\cap\Bigl\langle\bigcup_{i\in
  I\setminus\{j\}}N_i\Bigr\rangle=\gpgen{\gid},
\end{equation}
then 
\begin{equation*}
\Bigl\langle\bigcup_{i\in I}N_i\Bigr\rangle
\cong\weakprod_{i\in I}N_i.
\end{equation*}
\end{theorem}

\begin{proof}
Say the $N_i$ are normal subgroups of $G$.
Since $N_i\cap N_j=\gpgen{\gid}$ whenever $i\neq j$,
the last theorem and the lemma guarantee that there is a homomorphism
$h$ from $\textweakprod_{i\in I}N_i$ 
  into $G$ such that, for each $i$ in $I$, the composition
  $h\iota_i$ is just the inclusion of $N_i$ in $G$.  Then the range of $h$ is
$\Bigl\langle\bigcup_{i\in I}N_i\Bigr\rangle$.
To show that $h$ is injective, note that, if $n\in\textweakprod_{i\in
  I}N_i$ and $h(n)=\gid$, then, for each $j$ in $I$, we have
\begin{equation*}
  n_j{}\inv=\prod_{i\in I\setminus\{j\}}n_i.
\end{equation*}
The left member is in $N_j$, the right in
$\Bigl\langle\bigcup_{i\in
  I\setminus\{j\}}N_i\Bigr\rangle$, so each side is $\gid$; in
particular, $n_j=\gid$.  Therefore $n=\gid$.
\end{proof}

In the conclusion of the theorem, $G$ is the
\textbf{\emph{internal} weak direct product}\index{internal weak
  direct product} of the~$N_i$. 


\section{Free groups}

The direct sum $\sum_{i\in I}\Z$ has elements $\gid^i$, namely $\iota_i(1)$ or $(\delta_j^i\colon j\in I)$, where
\begin{equation*}
\delta_j^i=
\begin{cases}
1,&\text{ if }j=i,\\
0,&\text{ otherwise.}
\end{cases}
\end{equation*}
An arbitrary element of $\sum_{i\in I}$ is a \textbf{`formal
  sum,'}\index{formal sum}
\begin{equation*}
\sum_{i\in I}x_i\gid^i.
\end{equation*}

\begin{theorem}\label{thm:free-ab}
  Suppose $G$ is an abelian group, $I$ is a set, and $f$ is a
  function from $I$ to $G$.  Then there is 
  a homomorphism
  \begin{equation*}
  \sum_{i\in I}x_i\gid^i\mapsto\sum_{i\in I}x_if(i)
  \end{equation*}
 from $\sum_{i\in I}\Z$ to $G$, and this is the unique homomorphism $\tilde f$ from $\sum_{i\in I}$ to $G$ such that, for each $i$ in~$I$,
  \begin{equation*}
    \tilde f(\gid^i)=f(i)
  \end{equation*}
---that is, the following diagram commutes, where $\iota$ is the map $i
\mapsto\gid^i$:  
\begin{equation*}
  \xymatrix{
I \ar[r]^-{\iota} \ar[d]_f & \ar[dl]^{\tilde f} \displaystyle\sum_{i\in I}\Z \\
G &
}
\end{equation*}
\end{theorem}

The direct sum $\sum_{i\in I}\Z$ in the theorem is the \textbf{free
  abelian group}\index{free abelian group} on $I$ with respect to the map
$i\mapsto\gid^i$.  
There is also a \textbf{free group}\index{free group} on $I$, which we may denoted by
\begin{equation*}
  \free I.
\end{equation*}
This is the group of \emph{reduced words} on $I$.  A \textbf{word}\index{word} on $I$ is a 
finite nonempty string $t_0t_1\cdots t_n$,
where
each entry $t_k$ is either $\gid$, or else $a$ or $a\inv$ for some $a$
in $I$.  A word is \textbf{reduced}\index{reduced} if
$a$ and $a\inv$ are never adjacent in it, and
$\gid$ is never adjacent to any other entry (so $\gid$ can appear only
  in the string $\gid$).
We make $\free I$
  into a group when the multiplication is defined as juxtaposition followed by \textbf{reduction,}\index{reduction} namely, replacement of each occurrence of $aa\inv$ or $a\inv a$ with $\gid$, and replacement of each occurrence of $x\gid$ or $\gid x$ with $x$.  Thus,
  when an element $a$ of $I$ is written as $a^{+1}$, we have
  \begin{equation*}
    (a_{m}^{\epsilon(m)}\cdots a_{0}^{\epsilon(0)})
(b_{0}^{\zeta(0)}\cdots b_{n}^{\zeta(n)})=
a_{m}^{\epsilon(m)}\cdots a_{j}^{\epsilon(j)}b_j^{\zeta(j)}\cdots
b_{n}^{\zeta(n)},
  \end{equation*}
where $j$ is maximal such that, if $i<j$, then
 $a_i^{\epsilon(i)}= b_i^{-\zeta(i)}$.  We consider $I$ as a
subset of $\free I$.  An element of the latter other than $\gid$ can be written also as
\begin{equation*}
a_0{}^{n(0)}\cdots a_m{}^{n(m)},
\end{equation*}
where $a_i$ and $a_{i+1}$ are always distinct elements of $I$, and each $n(i)$ is in $\Z\setminus\{0\}$.

\begin{theorem}\label{thm:free-gp}
  Suppose $G$ is a group, $I$ is a set, and $f$ is a
  function from $I$ to $G$.  Then there is 
  a homomorphism 
  \begin{equation*}
  a_0^{\epsilon(0)}\cdots
  a_n^{\epsilon(n)}\mapsto f(a_0)^{\epsilon(0)}\cdots
  f(a_n)^{\epsilon(n)} 
\end{equation*}
  from $\free I$ to $G$, and this is
  the unique homomorphism
  $\tilde f$ from $\free I$ to $G$ such that
  \begin{equation*}
    \tilde f\restriction I=f
  \end{equation*}
---that is, the following diagram commutes, where $\iota$ is the
inclusion of $I$ in $\free I$:  
\begin{equation*}
  \xymatrix{
I \ar[r]^-{\iota} \ar[d]_f & \ar[dl]^{\tilde f} \free I \\
G &
}
\end{equation*}
\end{theorem}

The \textbf{free product}\index{free product} of a family
$(G_i\setcolon i\in I)$ of groups is the group, denoted by
  \begin{equation*}
    \freeprod_{i\in I}G_i,
  \end{equation*}
  comprising the string $\gid$ together with strings   
   $t_0\cdots t_m$, where each entry $t_i$ is an ordered pair
  $(g,n(i))$ such that $n(i)\in I$ and $g\in
  G_{n(i)}\setminus\{\gid\}$, and $n(i)\neq n(i+1)$.  This complicated
  definition allows for the possibility that $G_i$ might be the same
  as $G_j$ for some distinct $i$ and $j$; the groups $G_i$ and $G_j$
  must be considered as distinct in the formation of the free
  product. 
Multiplication on $\textfreeprod_{i\in I}G_i$, as on $\free I$, is
juxtaposition followed by reduction, so that if $(g,i)$ is followed
directly by $(h,i)$, then they are replaced with $(gh,i)$, and all
instances of $(\gid,i)$ are deleted, or replaced with $\gid$ if there
is no other entry. 
Each $G_j$ embeds in $\textfreeprod_{i\in I}G_i$ under $\iota_j$,
namely $x\mapsto(x,j)$.  We now have 
the following analogue of the porism to Theorem~\ref{thm:oplus}.

\begin{theorem}\label{thm:free-prod}
  Let $(G_i\colon i\in I)$ be an indexed family of groups, and let $H$
  be a group.  Suppose for each $i$ in $I$ there is a homomorphism
  $f_i$ from $G_i$ to $H$.  Then there is a homomorphism
    \begin{equation*}
    (g_0,n(0))\cdots(g_m,n(m))\mapsto f_{n(0)}(g_0)\cdots f_{n(n)}(g_n)
  \end{equation*}
from $\textfreeprod_{i\in I}G_i$ to $H$; this is the unique
homomorphism $f$ from $\textfreeprod_{i\in I}G_i$ to $H$ such that, for each $i$ in $I$,
\begin{equation*}
f\iota_i=f_i
\end{equation*}
---that is, the following diagram commutes:
\begin{equation*}
  \xymatrix{
G_j \ar[dr]_{f_j} \ar[r]^-{\iota_j} & \displaystyle\freeprod_{i\in I}G_i \ar[d]^f\\
& H
}
\end{equation*}
\end{theorem}

\section{Categories}


For any two groups $G$ and $H$ there is a set
\begin{equation*}
  \Hom{G,H}
\end{equation*}
comprising the homomorphisms from $G$ to $H$.  There is a map
\begin{equation*}
  (g,f)\mapsto g\circ f
\end{equation*}
from $\Hom{H,K}\times\Hom{G,H}$ to $\Hom{G,K}$,
and there is $\id_H$ in $\Hom {H,H}$, such that
\begin{equation*}
  \id_H\circ f=f,\quad g\circ\id_H=g,\quad k\circ(g\circ f)=(k\circ
  g)\circ f
\end{equation*}
whenever $f\in\Hom{G,H}$, $g\in\Hom{H,K}$, and $k\in\Hom{K,L}$.
Understood in this way, groups with their 
homomorphisms compose a prototypical example of a \emph{category.}

A \textbf{directed graph}\index{directed graph} is a certain kind of quadruple 
\begin{equation*}
  (\class C_0,\class C_1,t,h),
\end{equation*}
where $\class C_0$ and $\class C_1$ are classes, and $t$
and $h$ are 
functions from $\class C_1$ to $\class C_0$.  We may refer to each
element of $\class C_0$ as a \textbf{node,}\index{node} and to each element of
$\class C_1$ as an \textbf{arrow.}\index{arrow}  If $a$ is an arrow, then $t(a)$ is its
\textbf{tail,}\index{tail} and $h(a)$ is its \textbf{head,}\index{head} and $a$ is an
arrow \textbf{from}
$t(a)$ \textbf{to} $t(b)$.  If $f$ is an arrow from $A$ to $B$, we
may express this by writing 
\begin{equation*}
f\colon A\longrightarrow B
\end{equation*}
or
\begin{equation*}
A\overset{f}{\longrightarrow}B.  
\end{equation*}
We require the arrows from $A$ to $B$ to compose a \emph{set} (as
opposed to a proper class, like the class of all sets that do not
contain themselves).  We can define
\begin{equation*}
  \class C_2=\{(f,g)\in\class C_1{}^2\setcolon t(f)=h(g)\};
\end{equation*}
this is the class of paths of length $2$.
More generally,
\begin{equation*}
\class C_{n+1}=\Bigl\{(f_0,\dots,f_n)\in\class
G_1{}^{n+1}\setcolon \bigwedge_{i<n}t(f_i)=h(f_{i+1})\Bigr\}. 
\end{equation*}
The graph above is a \textbf{category}\index{category}
if there are
\begin{enumerate}
  \item
a function
$A\mapsto\id_A$ from $\class C_0$ to $\class C_1$, and
\item
a function $(f,g)\mapsto f\circ g$ from $\class C_2$ to $\class C_1$,
\end{enumerate}
such that
\begin{gather}\notag
t(\id_A)=A=h(\id_A),\qquad
t(f\circ g)=t(g),\qquad
h(f\circ g)=h(f),\\\label{eqn:cat}
  f\circ\id_{t(f)}=f,\qquad \id_{h(g)}\circ g=g,\qquad
h\circ(g\circ f)=(h\circ g)\circ f
\end{gather}
whenever these are defined.  In particular then, the category is a
sextuple
\begin{equation}\label{cat}
  (\class C_0,\class C_1,t,h,\id,\circ).
\end{equation}
Conditions~\eqref{eqn:cat} can be
diagrammed as follows.
\begin{align*}
  &\xymatrix{
A \ar[r]^f \ar[d]_f & B \ar[d]^g\\
B \ar[ur]|{\id_B} \ar[r]_g & C}\\
&\xymatrix{
B \ar[d]_g & A \ar[l]_f \ar[dl]|{g\circ f} \ar[d]%|{h\circ g\circ f} 
\ar[r]^f & B \ar[dl]|{h\circ g} \ar[d]^g\\
C \ar[r]_h & D & C \ar[l]^h}
\end{align*}
These are \textbf{commutative diagrams}\index{commutative diagram}%
\index{diagram!commutative ---}
in the sense that any two paths from one vertex to another represent
the same arrow.\footnote{One can define commutative diagrams
    formally.  A \textbf{diagram}\index{diagram} is a
homo\-morph\-ism from a directed graph to a category.  One then thinks of the
diagram as the graph with its nodes and arrows labelled with their
images in the category.  The diagram is
\textbf{commutative} if every 
path in the graph with the same tail and head is sent to the same
arrow in the category.}
The
arrows of a category are also called \textbf{morphisms.}\index{morphism}  
The class of morphisms from $A$ to $B$ can be denoted by
\begin{equation*}
  \Hom{A,B}.
\end{equation*}
The morphism
$f\circ g$ is the \textbf{composite}\index{composite} of $f$ and $g$.

A
category is \textbf{concrete}\index{concrete} if each of its objects has an underlying
set and the morphisms are functions in the way suggested by the notation.
For example,
  the class of sets, with the class of functions, is a concrete
  category; likewise the class of groups, with homomorphisms, and the
  class of topological spaces, with continuous functions. 
However, not all categories are concrete.  For example,
if $G$ is a group, then its elements can be considered as objects of
  a category in which $\Hom{a,b}=\{ba\inv\}$, $\id_a=1$, and $c\circ
  d=cd$.


In a category, a morphism $f$ is an \textbf{isomorphism}\index{isomorphism} if
\begin{equation*}
  g\circ f=\id_{t(f)}\quad\text{ and }\quad f\circ g=\id_{h(f)}
\end{equation*}
for some morphism $g$; then $g$ is an \textbf{inverse}\index{inverse} of $f$.

\begin{theorem}
  In a category, inverses are unique.
\end{theorem}

\begin{proof}
  If $g$ and $h$ are inverses of $f$, then
  $g=g\circ\id_{h(f)}=g\circ(f\circ h)=(g\circ f)\circ h=
  \id_{t(f)}\circ h=h$.
\end{proof}

If it exists, then the inverse of $f$ is $f\inv$.  It is immediate
then that $(f\inv)\inv=f$.


Suppose we have an arbitrary category as in~\eqref{cat}
and an element $(A_i\colon i\in I)$ or
$A$ of $\class C_0{}^I$ for some index-set $I$.  If it exists, the
\textbf{product}\index{product} of $A$ in the category is an element
\begin{equation*}
  \Bigl(\prod A,i\mapsto\pi_i\Bigr)
\end{equation*}
of $\class C_0\times\class C_1{}^I$, where
\begin{equation*}
  \pi_i\colon\prod A\to A_i
\end{equation*}
for each $i$ in $I$, such that,
whenever $(B,i\mapsto f_i)\in\class C_0\times\class C_1{}^I$, where
$f_i\colon B\to A_i$ for each $i$ in $I$, then there is a \emph{unique}
morphism $f$ from $B$ to $\prod A$ such that
\begin{equation*}
  \pi_i\circ f=f_i
\end{equation*}
for each $i$ in $I$.  Again this condition is expressed by a
commutative diagram.
\begin{equation*}
  \xymatrix%@!
{
&\prod A \ar[d]^{\pi_j}\\
H\ar[ur]^f \ar[r]_{f_j} &A_j}
\end{equation*}
The morphisms $\pi_i$ are the \textbf{canonical
  projections.}\index{projection}\index{canonical!--- projection}

\begin{theorem}
  Any two products of the same family of objects in the same category
  are isomorphic.\hfill\qedsymbol 
\end{theorem}

The porism to Theorem~\ref{thm:prod} is that direct products are
products in the category of groups \emph{and} in the category of
abelian groups.

Every category has a \textbf{dual,}%
\index{category!dual ---}%
\index{dual category}
in which the arrows are reversed.  To be precise, the dual of
$(\class C_0,\class C_1,t,h,\id,\circ)$ is
$(\class C_0,\class C_1,h,t,\id,\circ')$, where $f\circ'g=g\circ f$.
A \textbf{co-product}\index{product!co---} or \textbf{sum}\index{sum} in a
category is a product in the 
dual.  The co-product of $A$ may be denoted by
\begin{equation*}
  \Bigl(\coprod A,i\mapsto\iota_i\Bigr)\quad \text{ or }\quad\Bigl(\sum
  A,i\mapsto\iota_i\Bigr); 
\end{equation*}
the morphisms $\iota_i$ are the \textbf{canonical
  injections.}%
\index{injection}%
\index{canonical!--- injection}
The relevant commutative diagram is the following.
\begin{equation*}
  \xymatrix{
A_j\ar[r]^{f_j} \ar[d]_{\iota_j} & H\\
\coprod A \ar[ur]_f&}
\end{equation*}
Thus the coproduct of an indexed family of objects should be the
`simplest' object that contains all
of the `information' contained in each of the original objects.

The porism to Theorem~\ref{thm:oplus} is that direct sums are
coproducts in the category of abelian groups.
Theorem~\ref{thm:free-prod} is that free products are coproducts in
the category of groups.

Suppose $F$ is an object in a concrete category and $\fggen$ is a set.
Then $F$ is called 
\textbf{free}%
\index{free!--- group}%
\index{group!free ---} on $\fggen$
with respect to a function $\iota$ from $\fggen$ to $F$ if for any
function $f$ from 
$\fggen$ to an object $B$, there is a unique morphism
$\tilde f$ from $F$ to $B$ such that 
\begin{equation*}
  \tilde f\circ\iota=f.
\end{equation*}
That is, the
following diagram 
commutes (where the nodes
and arrows, except $\tilde f$, are from the category of sets):
\begin{equation*}
\xymatrix{
\fggen \ar[r]^{\iota} \ar[dr]_f & F \ar[d]^{\tilde f} \\
&B
}
\end{equation*}

Theorem~\ref{thm:free-ab} shows that free objects exist in the
category of abelian groups; Theorem~\ref{thm:free-gp}, in the category
of groups. 

\section{Presentation of groups}

\begin{theorem}
  Every group is isomorphic to a quotient of a free group.
\end{theorem}

\begin{proof}
Since every group $G$ is an image of the free group $\free G$, the claim follows by the First Isomorphism Theorem (a corollary to Theorem~\ref{thm:hom-n}).
\end{proof}

Suppose $G$ is a group, $A$ is a set, $f\colon A\to G$, and $G=\gpgen{f(a)\colon a\in A}$.  Suppose further
$B\included\free A$, and $N$ is the intersection of the set of normal subgroups of $\free A$ that include $B$.
The quotient $F/N$, denoted by
\begin{equation*}
  \gpres AB,
\end{equation*}
is referred to as the group with \textbf{generators}\index{generator} $A$ and
\textbf{relations}\index{relation} $B$, even though, strictly, $F/N$
here is generated, not by (the elements of) $A$, but by the cosets
$aN$, where $a\in A$.  If there is an isomorphism from $\gpres AB$ to
$G$ taking each of these cosets $aN$ to $f(a)$, then $\gpres AB$ is
called a \textbf{presentation}\index{presentation} of $G$.   

In this definition, rather than assuming $A\included G$, we use the
map $f$ so as to allow the possibility that $f$ is not injective.
Also, if $A=\{a_0,\dots,a_n\}$, and $B=\{w_0,\dots,w_m\}$, then
$\gpres AB$ can be written as $\gpres{a_0,\dots,a_n}{w_0,\dots,w_m}$. 

For example, $\free A$ can be presented as $\gpres A{\emptyset}$, and
in particular $\Z$ can be presented as $\gpres a{\emptyset}$, but also
as $\gpres{a,b}{ab\inv}$. 
The group $\Zmod n$ has the presentation $\gpres a{a^n}$.

\begin{theorem}[von Dyck\footnote{Walther von Dyck (1856--1934) gave an
      early (1882--3) definition of abstract groups \cite[ch.~49,
  p.~1141]{MR0472307}.}]\label{thm:vD}
   Suppose $G$ is a group, $A$ is a set, and $f\colon A\to G$, and let
   $\tilde f$ be the induced homomorphism from $\free A$ to $G$.
   Suppose further $B\included\free A$ and $\gpres AB=F/N$.  If
   $\tilde f(w)=\gid$ for each $w$ in $B$, then there is a
   well-defined homomorphism $g$ from $\gpres A B$ to $G$ such that
   $g(aN)=f(a)$ for each $a$ in $A$.  If $G=\gpgen{f(a)\colon a\in
     A}$, then $g$ is an epimorphism. 
  \begin{equation*}
  \xymatrix{
  A \ar[r]^f \ar[d] & G \\
  \free A \ar[ur]|{\tilde f} \ar[r]_{\pi} & \gpres AB \ar[u]_g
%    \xymatrix{
%F \ar[r]^f \ar[d]_h & G\\
%\gpres{\fggen}{\gprels} \ar[ur]_g &
}
  \end{equation*}
\end{theorem}

\begin{proof}
By definition of $N$, it is included in the kernel of $\tilde f$, so
$g$ is well-defined by Theorem~\ref{thm:hom-n}. 
\end{proof}

\begin{theorem}
If $n>2$, then $\Dih n$ has the presentation $\gpres{a,b}{a^n,b^2,abab}$.
\end{theorem}

\begin{proof}
Let $G=\gpres{a,b}{a^n,b^2,abab}$.  Then the order of (the image of)
$a$ in $G$ divides $n$, and the order of $b$ divides $2$. 
But by von Dyck's Theorem and Theorem~\ref{thm:Dn}, $G$ maps onto
$\Dih n$, and hence $n$ divides the order of $a$ in $G$, and $2$
divides the order of $b$.  Therefore 
$\Dih n\cong G$.
\end{proof}

\begin{theorem}
The group $\gpres{\mi,\mj}{\mi^4,\mi^2\mj^2,\mi\mj\mi^3\mj}$ has order
$8$, and its elements are (the images of) $\pm 1$, $\pm\mi$, $\pm\mj$,
$\pm\mk$, where $1=\gid$ and $\mk=\mi\mj$ and $-x=\mi^2x$. 
\end{theorem}

\begin{proof}
Let the group be called $G$.  In $G$, we have $\mj^2=\mi^{-2}=\mi^2$,
so $\mj^4=1$.  Hence also $\mk=\mi\mj=\mj^3\mi$, so $\mi^3\mj=\mj\mi$.
This shows that every element of $G$ can be written as $\mi^n\mj^m$,
where $n\in4$ and $m\in 2$; hence it is one of the given elements. 
\end{proof}


\section[Fin.~gen.~ab.~groups]{Finitely generated
  abelian groups} 

To \textbf{classify}\index{classify} a collection of groups is to find a function $f$ such
that
\begin{equation*}
  f(G)=f(H)\iff G\cong H
\end{equation*}
for all groups $G$ and $H$ in the collection.  We do this now with the
finitely generated abelian groups, and in particular with the finite
abelian groups.  The next theorem will be needed for
Theorem~\ref{thm:Zp-cross}. 


\begin{theorem}\label{thm:fin-gen-ab}
  For every abelian group $G$ on $n$ generators, there is a unique
  element $k$ of $n$, along with positive integers $d_0$, \dots,
  $d_{k-1}$, where  
  \begin{equation}\label{d}
  d_0\divides\dotsm\divides d_{k-1},
  \end{equation}
such that
  \begin{equation}\label{FH}
G\cong    \Zmod{d_0}\oplus\dotsb\oplus\Zmod{d_{k-1}}\oplus
    \underbrace{\Z\oplus\dotsb\oplus\Z}_{n-k}.
  \end{equation}
\end{theorem}

\begin{proof}
%Suppose $G$ is an abelian group with a generating set of size $n$.  
Let $F$ be the free abelian group $\sum_{i\in n}\Z$.  Then
\begin{equation*}
  G\cong F/N,
\end{equation*}
where $N$ is the kernel of the induced epimorphism from $F$ onto $G$.
As before, each element of $F$ can be understood as a formal sum $\sum_{i\in n}x_i\gid^i$.  Then $F$ itself is $\gpgen{\gid^0,\dots,\gid^{n-1}}$.
If $N=\gpgen{d_0\gid^0,\dots,d_{k-1}\gid^{k-1}}$, then $G$ is as in~\eqref{FH}.
Not every subgroup of $F$ is given to us so neatly, but we can use linear algebra to put it into this form.
Every element of $F$, considered as a formal sum, can be written also as a matrix product:
\begin{equation*}
  x_0a^0+\dotsb+x_{n-1}a^{n-1}=
  \begin{pmatrix}
  x_0&\cdots&x_{n-1}
  \end{pmatrix}
  \begin{pmatrix}
  {\gid}^0\\\vdots\\{\gid}^{n-1}
  \end{pmatrix}=\tuple x\mathbf e.
\end{equation*}
%for some \emph{unique} $n$-tuple $(x_0,\dots,x_{n-1})$ of integers.
The generators of a (finitely generated) subgroup of $F$ can be considered as the entries of a column vector, and this column can be considered as the product of a matrix over $\Z$ with $\mathbf e$: 
%Considering finite columns of such elements, we have
\begin{equation*}
  \begin{pmatrix}
    x_0^0{\gid}^0+\dotsb+x_{n-1}^0{\gid}^{n-1}\\
\vdots\\
    x_0^{m-1}{\gid}^0+\dotsb+x_{n-1}^{m-1}{\gid}^{n-1}
  \end{pmatrix}
=\begin{pmatrix}
    x_0^0&\dots&x_{n-1}^0\\
\vdots&\ddots&\vdots\\
    x_0^{m-1}&\dots&x_{n-1}^{m-1}
  \end{pmatrix}
\begin{pmatrix}
  {\gid}^0\\\vdots\\{\gid}^{n-1}
\end{pmatrix}
=X\mathbf e.
\end{equation*}
The subgroup of $F$ generated by the rows of $X\mathbf e$ can be
denoted by $\gpgen {X\mathbf e}$.  If
$P$ is an $m\times m$ matrix with integer entries, then
\begin{equation*}
  \gpgen{PX\mathbf e}\included\gpgen{X\mathbf e}.
\end{equation*}
If also $P$ is \emph{invertible}---that is, $\det(P)=\pm1$---then
\begin{equation*}
  \gpgen{PX\mathbf e}=\gpgen{X\mathbf e}.
\end{equation*}
We can therefore perform the following row-operations on $X$, without
changing the group $\gpgen{X\mathbf e}$.  We can
\begin{enumerate}
  \item
interchange two rows,
\item
multiply a row by $-1$,
\item
add an integer multiple of one row to another.
\end{enumerate}
These operations allow us to perform Gaussian elimination.
Adding rows of zeros as necessary, we may also assume that $m\geq n$.
Then for some invertible integer matrix $P$, we have
\begin{equation*}
PX=
  \begin{pmatrix}
    T\\\hline0
  \end{pmatrix},
\end{equation*}
where $T$ is an $n\times n$ upper-triangular matrix,
\begin{equation*}
T=
\begin{pmatrix}
  *&\cdots&*\\
   &\ddots&\vdots\\
  0&      &*
\end{pmatrix}.
\end{equation*}
By using also invertible \emph{column}-operations, we can diagonalize $T$.  That
is, there are invertible integer matrices $P$ and $Q$ such that
\begin{equation*}
  PXQ=
  \begin{pmatrix}
    D\\\hline0
  \end{pmatrix},
\end{equation*}
where
\begin{equation*}
  D=
  \begin{pmatrix}
    d_0&      &0\\
       &\ddots&\\
      0&      &d_{n-1}
  \end{pmatrix}.
\end{equation*}
We now have
\begin{equation*}
  \gpgen{X\mathbf e}=\gpgen{PXQQ\inv\mathbf e}=\gpgen{DQ\inv\mathbf e}\cong\gpgen{D\mathbf e}.
\end{equation*}
Working further on $D$ with invertible row- and column- operations, we
may assume~\eqref{d} holds, while $d_k=\cdots=d_{n-1}=0$.
Indeed, suppose $b,c\in\Z$ and $\gcd(b,c)=d$.  By invertible
operations, from
  \begin{equation*}
  \begin{pmatrix}
    b&0\\0&c
  \end{pmatrix}
  \end{equation*}
   we obtain
  $\begin{pmatrix}
    b&0\\c&c
  \end{pmatrix}$ and then
$\begin{pmatrix}
  d&e\\0&f
\end{pmatrix}$,
where $e$ and $f$ are multiples of $c$ and hence of $d$; hence, with
an invertible column-operation, we get
\begin{equation*}
  \begin{pmatrix}
    d&0\\0&f
  \end{pmatrix}.
\end{equation*}
where again $d\divides f$.  Applying such
transformations as needed to pairs of entries in $D$
yields~\eqref{d}. 
\end{proof}


\begin{porism}
 Every subgroup of a free abelian group on $n$ generators is free abelian on $n$ generators or fewer.
\end{porism}

We can show uniqueness of the numbers $d_j$ by an alternative analysis.

\begin{theorem}[Chinese Remainder]\label{thm:CRT}
  If $\gcd(m,n)=1$, then the homomorphism $x\mapsto(x,x)$ from
  $\Zmod{mn}$ to $\Zmod m\oplus\Zmod n$ is an isomorphism. 
\end{theorem}

\begin{proof}
  If $x\equiv0\pmod m$ and $x\equiv 0\pmod n$,
  then $x\equiv0\pmod{mn}$.  Hence the given homomorphism is
  injective.  Its surjectivity follows by counting.
\end{proof}

The Chinese Remainder Theorem will be generalized as
Theorem~\ref{thm:CRT-R}. 
In the usual formulation of the theorem, every system
\begin{align*}
  x&\equiv a\pmod m,&x&\equiv b\pmod n
\end{align*}
has a unique solution \emph{modulo} $mn$; but this solution is just
the inverse image of $(a,b)$ under the isomorphism $x\mapsto(x,x)$.

\begin{theorem}
For every finite abelian group, there are unique primes $p_0$, \dots,
$p_{k-1}$, not necessarily distinct, along with unique positive
integers $m(0)$, \dots, $m(k-1)$, such that 
\begin{equation*}
G\cong  \Zmod{p_0{}^{m(0)}}\oplus\dotsb\oplus\Zmod{p_{k-1}{}^{m(k-1)}}.
\end{equation*}
\end{theorem}

\begin{proof}
To obtain the analysis, apply the Chinese Remainder Theorem to
Theorem~\ref{thm:fin-gen-ab}. 
The analysis is
unique, provided it is unique in the case where all of the $p_j$ are
the same.  But in this case, the analysis is unique, by repeated
application of the observation that the order of the group is the
highest prime power appearing in the factorization.
\end{proof}

\section{Semidirect products}\label{sect:semidirect}

An isomorphism from a structure to itself is an \textbf{automorphism.}\index{automorphism}

\begin{theorem}
The automorphisms of a group $G$ compose a subgroup of
$\Sym G$.
\end{theorem}

The subgroup in the theorem is denoted by
\begin{equation*}
  \Aut G.
\end{equation*}

\begin{theorem}
For every group $G$,
there is a homomorphism 
\begin{equation*}
g\mapsto(x\mapsto gxg\inv)
\end{equation*}
from $G$ to $\Aut G$.
\end{theorem}

An automorphism $x\mapsto gxg\inv$ as in the theorem is
\textbf{conjugation}\index{conjugation} by $g$ and is an \textbf{inner
  automorphism}\index{inner automorphism} of $G$.  The kernel of the homomorphism in the theorem
is the \textbf{center}\index{center} of $G$, denoted by\footnote{An alternative
  formulation of the center of a group is given and generalized in
  \S~\ref{sect:nilpotent}.} 
\begin{equation*}
  \centr G.
\end{equation*}
Then $G$ is \textbf{centerless}\index{centerless} if $\centr G$ is trivial.
Repeating
the process of forming inner automorphisms, we obtain a chain
\begin{equation*}
  G\to\Aut G\to\Aut{\Aut G}\to\dotsb,
\end{equation*}
called the \textbf{automorphism tower}\index{automorphism tower} of
$G$.  The tower reaches a fixed point, perhaps after
transfinitely many steps: Simon Thomas~\cite{MR801316} shows this in
case $G$ is centerless; Joel Hamkins~\cite{MR1487370},
in the general case.

\begin{theorem}\label{thm:GNG}
For every group $G$, if $N\nsubgp G$, then there is a homomorphism
\begin{equation*}
g\mapsto(x\mapsto gxg\inv) 
\end{equation*}
from $G$ to $\Aut N$.
\end{theorem}

In the theorem, let the homomorphism be $g\mapsto\sigma_g$.  Suppose
also $H\subgp G$, and $N\cap H=\gpgen{\gid}$.  Then the conditions
of Theorem~\ref{thm:isdp} are met, and $NH$ is an
internal semidirect product.  Equation~\eqref{eqn:sdp}, describing
multiplication on $NH$, can be rewritten as 
\begin{equation*}
  (mg)(nh)=(m\cdot\sigma_g(n))(gh).
\end{equation*}

\begin{theorem}
  Suppose $N$ and $H$ are groups, and $g\mapsto\sigma_g$ is a
  homomorphism from $H$ to $\Aut N$.  Then the set $N\times H$ becomes
  a group when multiplication is defined by
  \begin{equation*}
  (m,g)(n,h)=(m\cdot\sigma_g(n),gh).    
  \end{equation*}
\end{theorem}

\begin{proof}
  To check that the multiplication is associative means checking that 
  \begin{equation*}
    \lambda_{(m,g)}\lambda_{(n,h)}=\lambda_{(m,g)(n,h)}.
  \end{equation*}
We can write $\lambda_{(m,g)}$ as $\lambda_m\sigma_g\times
\lambda_g$.  Then
\begin{align*}
  \lambda_{(m,g)}\lambda_{(n,h)}
=(\lambda_m\sigma_g\times\lambda_g)(\lambda_n\sigma_h\times\lambda_h)
&=\lambda_m\sigma_g\lambda_n\sigma_h\times\lambda_g\lambda_h\\
&=\lambda_m\lambda_{\sigma_g(n)}\sigma_g\sigma_h\times\lambda_{gh}\\
&=\lambda_{m\cdot\sigma_g(n)}\sigma_{gh}\times\lambda_{gh}\\
&=\lambda_{(m\cdot\sigma_g(n),gh)}\\
&=\lambda_{(m,g)(n,h)}.
\end{align*}
Finally, $(\gid,\gid)$ is an
identity, and $(\sigma_{h\inv}(n\inv),h\inv)$ is an inverse of $(n,h)$.
\end{proof}

The group given by the theorem is the \textbf{semidirect
  product}\index{semidirect product} of
$N$ and $H$ with respect to $\sigma$; it can be denoted by
\begin{equation*}
  N\rtimes_{\sigma}H.
\end{equation*}
The bijection in Theorem~\ref{thm:isdp} is an isomorphism from
$N\rtimes_{\sigma}H$ to $NH$ when $\sigma$ is as in
Theorem~\ref{thm:GNG}.
% $g\mapsto(x\mapsto gxg\inv)$. 

\begin{theorem}\label{thm:Zp-cross}
If $p$ is prime, then $\Zmodu p\cong\Zmod{p-1}$.
\end{theorem}

\begin{proof}
The group $\Zmodu p$ has order $p-1$ and,
by Theorem~\ref{thm:fin-gen-ab}, is isomorphic to
  \begin{equation*}
    \Zmod {d_0}\oplus\Zmod{d_{k-1}}\oplus \Zmod m,
  \end{equation*}
where $d_0\divides\cdots\divides d_{k-1}\divides m$.
Hence every element of $\Zmodu p$ is a root of the polynomial $x^m-1$.
But this polynomial can have at most $m$ roots in $\Zmod
p$, since this is a \emph{field.}  Hence $p-1\leq m$, so $m=p-1$, and
$k=0$.
\end{proof}

\begin{theorem}
The embedding $x\mapsto\lambda_x$ of a ring $(E,\cdot)$
in $(\End E,\circ)$ restricts to an embedding of
$\unit{(E,\cdot)}$ in $\Aut E$.  In case $E$ is $\Zmod n$, each
embedding is an isomorphism.  In particular, if $a$ is an element of
$\Zmodu n$ of order $m$, and $m\divides t$, then $\Zmod t$ acts on
$\Zmod n$ by $(x,y)\mapsto a^xy$.  Conversely, if some $\Zmod t$ acts
on $\Zmod n$, then the action is so given for some such $a$.
\end{theorem}

\begin{theorem}\label{thm:pq}
  For every odd prime $p$, for every prime divisor $q$ of $p-1$, there is
  a non-abelian semidirect product $\Zmod p\rtimes_{\sigma}\Zmod q$,
  which is unique up to isomorphism.
\end{theorem}

\begin{proof}
  As $\Zmodu p$ is cyclic, it has a unique subgroup $G$ of order $q$.
  As $q$ is prime, every nontrivial element of $G$ is a generator.
  If $a\in G\setminus\{1\}$, let $\sigma$ be the homomorphism
  $x\mapsto(y\mapsto a^xy)$ from $\Zmod q$ to $\Aut{\Zmod p}$.
  Then
  we can form
  \begin{equation*}
    \Zmod p\rtimes_{\sigma}\Zmod q.
  \end{equation*}
If $\Zmod p\rtimes_{\tau}\Zmod q$ is some other non-abelian
semidirect product, then  $\tau_1$ is $x\mapsto b\cdot x$ for some
$b$ in $G\setminus\{1\}$.  But then $b^n=a$ for some $n$, so there is
an isomorphism from $\Zmod p\rtimes_{\sigma}\Zmod q$ to
$\Zmod p\rtimes_{\tau}\Zmod q$ that takes $(x,y)$ to $(x,ny)$.
\end{proof}

Because of its uniqueness, we may refer to the semidirect product of
the theorem as
\begin{equation*}
 \Zmod p\rtimes\Zmod q.
\end{equation*}
In case $q=2$, this group is $\Dih p$.
The next section develops the tools used in \S~\ref{sect:class-small}
to show that there is no other way to obtain a group of order $pq$
for distinct primes $p$ and $q$. 


\section{Actions of groups}\label{sect:actions}

\begin{theorem}
  Let $G$ be a group, and $\setactedon$ a set.  There is a one-to-one
  correspondence between
  \begin{enumerate}
    \item
homomorphisms $g\mapsto(a\mapsto ga)$ from $G$ into
$\Sym{\setactedon}$, and 
\item
functions $(g,a)\mapsto ga$ from $G\times A$ into $A$ such that
\begin{gather}\label{act:1}
\gid a=a,\\\label{act:gha}
(gh)a=g(ha).
\end{gather}
for all $h$ and $h$ in $G$ and $a$ in $A$.
  \end{enumerate}
\end{theorem}

\begin{proof}
If $g\mapsto(a\mapsto ga)$ maps $G$ homomorphically into
$\Sym{\setactedon}$, then~\eqref{act:1} and~\eqref{act:gha} follow.
Suppose conversely that these hold.  Then, in particular,
\begin{equation*}
g(g\inv a)=(gg\inv)a=\gid a=a
\end{equation*}
 and likewise $g\inv(ga)=a$, so $a\mapsto g\inv a$ is the inverse of
$a\mapsto ga$, and the function $g\mapsto(a\mapsto ga)$ does map $G$
into $\Sym{\setactedon}$, homomorphically by~\eqref{act:gha}.  
\end{proof}

Either of two functions that correspond as in the theorem is a
\textbf{(left) action}\index{action}\index{left!--- action} of $G$ on
$A$.  Examples include the following. 

\begin{asparaenum}[1.]
  \item
A symmetry group of a set acts on the set in the obvious way, by
\begin{equation*}
(\sigma,x)\mapsto\sigma(x). 
\end{equation*}
\item
An arbitrary group $G$ acts on itself by left
multiplication: 
\begin{equation*}
(g,x)\mapsto\lambda_g(x).
\end{equation*}
\item
If $H\subgp G$, then $G$ acts on
the set $G/H$ by
\begin{equation*}
  (g,xH)\mapsto gxH.
\end{equation*}
\item
Finally, $G$ acts on itself by conjugation:
\begin{equation*}
  (g,x)\mapsto x\mapsto gxg\inv.
\end{equation*}
\end{asparaenum}

Suppose $(g,x)\mapsto gx$ is an arbitrary action of $G$ on
$\setactedon$.  
If $a\in\setactedon$, then
the subset $\{g\setcolon ga=a\}$ of $G$ is the \textbf{stabilizer}\index{stabilizer} of $a$,
denoted by 
\begin{equation*}
G_a;
\end{equation*}
the subset $\{ga\setcolon g\in G\}$ of ${\setactedon}$ is the
\textbf{orbit}\index{orbit} of $a$, denoted by 
\begin{equation*}
  Ga.
\end{equation*}
The subset $\{x\setcolon G_x=G\}$ of ${\setactedon}$ can be denoted by
\begin{equation*}
  {\setactedon}_0.
\end{equation*}
See Appendix~\ref{App:ga} for an alternative development of these
notions. 

\begin{theorem}\label{thm:action}
  Let $G$ act on ${\setactedon}$ by
 $(g,x)\mapsto gx$.
  \begin{enumerate}
\item
The orbits partition $\setactedon$;
    \item
$G_a\subgp G$;
\item\label{GGa}
$[G:G_a]=\size {Ga}$;
  \end{enumerate}
\end{theorem}

\begin{proof}
  For~\eqref{GGa}, we establish a bijection between $G/G_a$ and $Ga$
  by noting that
  \begin{equation*}
    gG_a=hG_a\iff h\inv g\in G_a\iff ga=ha;
  \end{equation*}
so the bijection is $gG_a\mapsto ga$.
\end{proof}

\begin{corollary}
If there are only finitely many orbits in $\setactedon$ under $G$,
then
\begin{equation}\label{eqn:class}
  \size {\setactedon}=\size{{\setactedon}_0} +\sum_{a\in X}[G:G_a]
\end{equation}
for some set $X$ of elements of $\setactedon$ whose orbits are nontrivial.  
\end{corollary}

Equation~\eqref{eqn:class} is the \textbf{class equation.}\index{class
  equation} 
For example,
suppose $G$ acts on itself by conjugation, and $g\in G$.
Then $Gg$ is the \textbf{conjugacy class}\index{conjugacy class} of $g$, while
$G_g$ is the \textbf{centralizer}\index{centralizer} of $g$, denoted
by\footnote{More generally, if $H<G$, then
  $\centralizer gH=\{h\in H\colon hgh\inv=g\}$.}
  \begin{equation}
  \centralizer gG.
  \end{equation}
Finally, $G_0$ is the \textbf{center}\index{center} of $G$, denoted by
    \begin{equation*}
\centr G.      
    \end{equation*}
The class equation for the present case can now be written as
\begin{equation*}
  \size G=\size{\centr G}+\sum_{a\in X}[G:\centralizer aG].
\end{equation*}

A \textbf{finite $p$-group}\index{finite
  $p$-group}\index{p-group@$p$-group} is a finite group whose 
order is a power of $p$.

\begin{theorem}\label{thm:act-cong}
  If ${\setactedon}$ is acted on by a $p$-group, then $\size
  {\setactedon}\equiv\size{{\setactedon}_0}\pmod p$.
\end{theorem}

\begin{proof}
In the class equation,  $[G:G_a]$ is a multiple of $p$ in each
  case.
\end{proof}

A first application of this theorem is

\begin{theorem}[Cauchy]\label{thm:Cauchy}
  If $p$ divides $\size G$, then $\order g=p$ for some $g$ in $G$.
\end{theorem}

\begin{proof}[Proof (J. H. McKay \cite{MR0098777}).]
Suppose $p$ divides $\size G$.  We seek a nontrivial solution in $G$
  of the equation
  \begin{equation*}
  x^p=\gid.  
  \end{equation*}
  Let $\setactedon$ be the set
  \begin{equation*}
    \{\tuple x\in G^p\setcolon x_0\dotsm x_{p-1}=\gid\};
  \end{equation*}
so we seek $g$ in $G$ such that $(g,\dots,g)\in\setactedon$ and
$g\neq\gid$. 
If $(g_0,\dots,g_{p-1})\in\setactedon$ and $k<p$, then
\begin{align*}
  (g_0\dotsm g_{k-1})(g_k\dotsm g_{p-1})&=\gid,
&(g_k\dotsm g_{p-1})(g_0\dotsm g_{k-1})&=\gid,
\end{align*}
and therefore
\begin{equation*}
(g_k,\dots,g_{p-1},g_0,\dots,g_{k-1})\in\setactedon.
\end{equation*}
Thus $\Zmod p$ acts on $\setactedon$ by
\begin{equation*}
  (k,(g_0,\dots,g_{p-1})\mapsto
  (g_k,\dots,g_{p-1},g_0,\dots,g_{k-1}).
\end{equation*}
With respect to this action,
\begin{equation*}
    {\setactedon}_0=\{(g,\dots,g)\colon g^p=\gid\};
\end{equation*}
also $\Zmod p$ is a finite $p$-group, 
Now, the map
\begin{equation*}
(g_1,\dots,g_{p-1})\longmapsto\bigl((g_1\dotsm
  g_{p-1})\inv,g_1,\dots,g_{p-1}\bigr) 
\end{equation*}
is a bijection
from $G^{p-1}$ onto ${\setactedon}$, so
$\size {\setactedon}$ is a multiple of~$p$; hence
$\size{{\setactedon}_0}$ 
is a multiple of~$p$, by Theorem~\ref{thm:act-cong}.  Since ${\setactedon}_0$
contains $(\gid,\dots, \gid)$, it contains some $(g,\dots,g)$, where $\order
g=p$. 
\end{proof}

\begin{corollary}
  A finite group is a $p$-group if and only if the order of every
  element is a power of~$p$. 
\end{corollary}

\begin{proof}
  If $\ell$ is a prime dividing $\order g$, then $\ell$ divides $\size
  G$.  Conversely, if $\ell$ divides $\size G$, then $G$ has an
  element of order $\ell$.
\end{proof}

Hence an arbitrary group is a \textbf{$p$-group} if the order of its
every element is a power of~$p$.  

\begin{theorem}\label{thm:p-ntc}
  Every nontrivial $p$-group has nontrivial center.
\end{theorem}

\begin{proof}
  By Theorem~\ref{thm:act-cong},
  \begin{equation*}
    \size G\equiv\size{\centr G}\pmod p,
  \end{equation*}
so $p$ divides $\size{\centr G}$.  Since $\centr G$ contains at least one
element, it contains at least $p$ of them.
\end{proof}

\begin{theorem}
  All groups of order $p^2$ are abelian.
\end{theorem}

\begin{proof}
  Let $G$ have order $p^2$.  Then either $\centr G$ is
  all of $G$, or else $\size{\centr G}=p$, by the previous theorem.
  In any case, there is $a$ in $G$ such that
  \begin{equation*}
    G=\gpgen{\{a\}\cup\centr G}.
  \end{equation*}
But elements of $\centr G$ commute with all elements of $G$; and
powers of $a$ commute with each other (and with elements of $\centr
G$); hence $G$ is abelian.
\end{proof}

Supposing $G$ is an arbitrary group and $H\subgp G$, let $\setactedon$
be the set
\begin{equation*}
  \{gHg\inv\colon g\in G\}
\end{equation*}
of conjugates of $H$.  Then $G$ acts on $\setactedon$ by
conjugation,
\begin{equation*}
  (g,K)\mapsto gKg\inv.
\end{equation*}
The stabilizer of $H$ under this action is the \textbf{normalizer}\index{normalizer} of $H$
in $G$, denoted by\footnote{More generally, if also $K\subgp G$, then
  $\normalizer HK=\{k\in K\colon kHk\inv=H\}$.}
\begin{equation*}
\normalizer HG.
\end{equation*}
If $H\subgp K\subgp G$, then 
\begin{equation*}
  H\nsubgp K\iff K\subgp \normalizer HG.
\end{equation*}

\begin{theorem}\label{thm:normalizer}
Suppose $G$ is a group with subgroups $H$ and $K$.
Under the action of $H$ on $G/K$ by left multiplication,
\begin{equation*}
  gK\in(G/K)_0\iff H\subgp gKg\inv.
\end{equation*}
In case $H=K$, a finite group,
\begin{equation*}
(G/H)_0=\normalizer HG/H.
\end{equation*}
\end{theorem}

\begin{proof}
We compute:
\begin{align*}
gK\in(G/K)_0
&\iff hgK=gK&&\text{for all $h$ in $H$}\\
&\iff g\inv hgK=K&&\text{for all $h$ in $H$}\\
&\iff g\inv hg\in K&&\text{for all $h$ in $H$}\\
&\iff h\in gKg\inv&&\text{for all $h$ in $H$}\\
&\iff H\subgp gKg\inv.&&
%\\
%&\iff g\inv Hg=H&&\\
%&\iff g\inv\in\normalizer HG&&\\
%&\iff g\in\normalizer HG&&\\
%&\iff gH\in\normalizer HG/H.&&
\end{align*}
If $H$ is finite, then 
\begin{equation*}
H<gHg\inv\iff H=gHg\inv\iff g\in\normalizer HG.\qedhere
\end{equation*}
\end{proof}

A \textbf{$p$-subgroup} of a group is a subgroup that is a
$p$-group. 

\begin{lemma}%\label{thm:HpGNGH}
  If $H$ is a $p$-subgroup of $G$, then
  \begin{equation*}
    [G:H]\equiv[\normalizer HG:H]\pmod p.
  \end{equation*}
\end{lemma}

\begin{proof}
Theorems~\ref{thm:normalizer} and~\ref{thm:act-cong}.
\end{proof}

\begin{lemma}
  If $H$ is a $p$-subgroup of $G$, and $p$ divides $[G:H]$, then $H$
  is a normal subgroup of some $p$-subgroup $K$ of $G$ such that
  $[K:H]=p$.  
\end{lemma}

\begin{proof}
By the last lemma, $p$ divides $[\normalizer HG:H]$.  
Since $H\nsubgp\normalizer HG$, the quotient $\normalizer HG/H$ is a group.
By Cauchy's Theorem (Theorem~\ref{thm:Cauchy},
this group has an element $gH$ of order
$p$.  So $\gpgen{\{g\}\cup H}$ is the desired $K$.  
\end{proof}

 A \textbf{Sylow $p$-subgroup}\index{Sylow!--- subgroup}
is a maximal $p$-subgroup.  The following is a partial converse to
Lagrange's Theorem (Theorem~\ref{thm:Lagrange}).

\begin{theorem}[Sylow I]\label{thm:Sylow-1}\index{Sylow!--- Theorems|(}
  \index{theorem!Sylow Th---s|(} 
For every finite group of order $p^nm$, where $p\ndivides m$, there is
a chain 
\begin{equation*}
  H_1\subgp H_2\subgp\cdots<H_n
\end{equation*}
of subgroups, where $\size{H_1}=p$ and in each case $H_i\nsubgp
H_{i+1}$ and $[H_{i+1}:H_i]=p$. 
Every $p$-subgroup of such a group appears on such a chain.
In particular, every $p$-subgroup is included in a Sylow subgroup,
whose index is indivisible by $p$. 
\end{theorem}

\begin{proof}
Cauchy's Theorem (Theorem~\ref{thm:Cauchy}) and repeated application of
the last lemma.
\end{proof}

\begin{corollary}
  The conjugate of a Sylow $p$-subgroup is a
  Sylow $p$-subgroup.
  A \emph{unique} Sylow $p$-subgroup is normal.
\end{corollary}

A converse to the corollary is the following.

\begin{theorem}[Sylow II]\label{thm:Sylow-2}
  All Sylow $p$-subgroups are conjugate.
\end{theorem}

\begin{proof}
  Say $H$ and $P$ are $p$-subgroups of $G$, where $P$ is maximal.
  Then $H$ acts
  on  the set $G/P$ by left multiplication.  By
  Theorem~\ref{thm:act-cong}, since $[G:P]$ is not a multiple of $p$,
  the set $(G/P)_0$ has an element $aH$.  By
  Theorem~\ref{thm:normalizer}, $H\subgp aPa\inv$.  If $H$ is
also Sylow, then $H=aPa\inv$ by Theorem~\ref{thm:Sylow-1}.
\end{proof}

\begin{theorem}[Sylow III]\label{thm:Sylow-3}
The number of Sylow $p$-subgroups of a finite group is congruent to
$1$ \emph{modulo} $p$ and divides the order of the group.
\end{theorem}

\begin{proof}
Let $A$ be the set of Sylow $p$-subgroups of a finite group $G$.
Then $G$ acts on $A$ by conjugation.  Let $H\in A$.  By
Theorem~\ref{thm:Sylow-2},  the orbit of
  $H$ is precisely $A$.  The stabilizer of $H$ is $\normalizer
  HG$.  Then by Theorem~\ref{thm:action} \eqref{GGa},
  \begin{equation*}
    [G:\normalizer HG]=\size A,
  \end{equation*}
so $\size A$ divides $\size G$.

Now consider $H$ as acting on $A$ by conjugation.  
Then the following are equivalent:
\begin{enumerate}
  \item
$P\in A_0$,
\item
$H\subgp \normalizer PG$,
\item
$H$ is a Sylow subgroup of $\normalizer PG$,
\item
$H=P$,
\end{enumerate}
since $P\nsubgp\normalizer PG$, so $P$ is the unique Sylow $p$-subgroup
of $\normalizer PG$.  
Therefore $A_0=\{H\}$, so by
Theorem~\ref{thm:act-cong}
\begin{equation*}
  \size A\equiv \size{A_0}\equiv1\pmod p.\qedhere
\end{equation*}
\end{proof}\index{Sylow!--- Theorems|)} \index{theorem!Sylow Th---s|)}


\section{Classification of small groups}\label{sect:class-small}

We can now complete the work, begun in \S~\ref{sect:semidirect}, of
classifying the groups of order $pq$ for primes $p$ and $q$.

\begin{lemma}
  Suppose $p$ and $q$ are distinct primes such that $q\not\equiv 1\pmod
  p$, and $\size G=pq$.  Then $G$ has a unique Sylow $p$-subgroup,
  which is therefore normal.
\end{lemma}

\begin{proof}
  Let $A$ be the set of Sylow $p$-subgroups of $G$.  Then $\size
  A\equiv 1\pmod p$ by Theorem~\ref{thm:Sylow-3}, so $\size A$ is not
  $q$ or $pq$; but $\size A$ divides $pq$;  so $\size A=1$.
\end{proof}

\begin{theorem}
    Suppose $p$ and $q$ are primes, where $p<q$, so that $p\not\equiv
    1\pmod q$, and $G$ is a group of
    order $pq$.
    \begin{enumerate}\renewcommand{\labelenumi}{\theenumi.}
    \item 
If $q\not\equiv 1\pmod p$, then $G$ is cyclic.  
    \item 
If $q\equiv 1\pmod p$, then either $G$ is cyclic group, or else $G$ is
the unique non-abelian semidirect product $\Zmod p\rtimes\Zmod q$.  
    \end{enumerate}
In particular, every non-abelian group of order $2q$ is isomorphic to
$\Dih q$.
\end{theorem}

\begin{proof}
By the lemma, $G$ has a normal subgroup $N$ of order $q$, and $N$ is
cyclic by a corollary to Lagrange's Theorem (Theorem~\ref{thm:Lagrange}).
By the first Sylow Theorem (Theorem~\ref{thm:Sylow-1}), $G$ has a
Sylow $p$-subgroup $H$, which has order $p$ and is
therefore cyclic.  Then $N\cap H=\gpgen{\gid}$, so $G=NH$ by
Theorem~\ref{thm:isdp} and counting.
  \begin{asparaenum}[1.]
    \item
If $q\not\equiv 1\pmod p$, then $H\nsubgp G$ by the lemma, so 
$G=N\times H$ by Theorem~\ref{thm:wdp}.  The product is
cyclic by the Chinese Remainder Theorem (Theorem~\ref{thm:CRT}). 
\item
  If $q\not\equiv 1\pmod p$, then $G$ might still be $N\times H$;
  otherwise, $G$ is isomorphic to $\Zmod p\rtimes\Zmod q$ by
  Theorem~\ref{thm:pq}. \qedhere
  \end{asparaenum}
\end{proof}

We now know all groups of order less than $36$, but different from
$8$, $12$, $16$, $18$, $20$, $24$, $27$, $28$, $30$, and $32$.

\begin{theorem}
  Every group of order $8$ is isomorphic to one of
  \begin{align*}
    &\Zmod 8,&
&\Zmod 2\oplus\Zmod 4,&
&\Zmod 2\oplus\Zmod 2\oplus\Zmod 2,&
&\Dih 4,&
&\quat.
  \end{align*}
\end{theorem}

\begin{proof}
  Say $\order G=8$.  If $G$ is abelian, then its possibilities are
  given by Theorem~\ref{thm:fin-gen-ab}.  Suppose $G$ is not abelian.
  Then $G$ has an element $a$ of order greater than $2$
  by \cite[Exercise I.1.13, p.~30]{MR600654}, and so $\order a=4$ (since
  $G\ncong\Zmod 8$).  Then
  $\gpgen a\nsubgp G$ by \cite[Exercise I.5.1, p.~45]{MR600654}.  Let
  $b\in G\setminus\gpgen a$.  Then $b^2$ is either $\gid$ or $a^2$
  (since otherwise $b$ would generate $G$).  In the former case,
  $G=\gpgen a\rtimes\gpgen b$, so $G\cong\Dih 4$.  In the latter case,
  $G\cong\quat$. 
\end{proof}

\begin{theorem}
  Every group of order $12$ is isomorphic to one of
  \begin{align*}
    \Zmod{12},&
&\Zmod 2\oplus\Zmod 6,&
&\Alt 4,&
&\Dih 6,&
&\gpres{a,b}{a^6,a^3b^2,bab\inv a}.
  \end{align*}
\end{theorem}

\begin{proof}
  Suppose $\order G=12$, but $G$ is not abelian.  A Sylow
  $3$-subgroup of $G$ has order $3$, so it is $\gpgen a$ for some $a$.
  Then $G$ acts on $G/\gpgen a$ by left multiplication, 
  and $[G:\gpgen a]=4$, so there is a homomorphism from $G$ to $\Sym 4$.  If
  this is an embedding, then $G\cong\Alt 4$.  Assume is is not an
  embedding.  Then the kernel must be $\gpgen a$, so $\gpgen a\nsubgp
  G$.

Let $H$ be a Sylow $2$-subgroup of $G$.  Then $H$ is isomorphic to
$\Zmod 4$ or $\Zmod 2\oplus\Zmod 2$.  In any case, $H$ has two
elements $b$ and $c$ such that none of $b$, $c$, or $bc$ is $\gid$.
Since $G$ is not $\gpgen a\times H$, we may assume
\begin{equation*}
  bab\inv=a^2.
\end{equation*}
If also $cac\inv=a^2$, then $bcac\inv b\inv=a$.  Thus $H$ has an
element that commutes with $a$.  Hence $G$ has a subgroup $K$ isomorphic
to $\Zmod 6$.  If $G\setminus K$ has an element of order $2$, then
$G\cong\Dih 6$; otherwise, $G$ is the last possibility above.
\end{proof}


\section{Nilpotent groups}\label{sect:nilpotent}

For a group, what is the next best thing to being abelian?
A group $G$ is abelian if and only if $\centr G=G$.  (See
\S~\ref{sect:semidirect}.)  To weaken this
condition, we define
the \textbf{commutator}\index{commutator} of two elements $a$ and $b$ of $G$ to be
\begin{equation*}
  aba\inv b\inv;
\end{equation*}
this can be denoted by
\begin{equation*}
[a,b].
\end{equation*}
Then
\begin{equation*}
  \centr G=\{g\in G\setcolon \Forall x[g,x]=\gid\}.
\end{equation*}
We now generalize this by defining
\begin{gather*}
  \cseries 0G=\gpgen{\gid},\\
\cseries{n+1}G=\{g\in G\setcolon \Forall x[g,x]\in\cseries nG\}.
\end{gather*}
Then $\centr G=\cseries 1G$.

\begin{theorem}\label{thm:central}
Let $G$ be a group.
\begin{enumerate}
\item\label{item:C1}
$\cseries nG\nsubgp G$.
\item\label{item:C2}
$\cseries nG\subgp \cseries{n+1}G$.
\item\label{item:C3}
$\cseries{n+1}G/\cseries nG=\centr{G/\cseries nG}$.
\end{enumerate}
\end{theorem}

\begin{proof}
We use induction to prove~\ref{item:C1}, and
incidentally~\ref{item:C2} and~\ref{item:C3}.
  Trivially, $\cseries 0G\nsubgp G$.  Suppose $\cseries kG\nsubgp G$.
  Then the following are equivalent:
  \begin{gather*}
g\in \cseries{k+1}G;\\
\Forall x[g,x]\in\cseries kG;\\
\Forall x gxg\inv x\inv\in\cseries kG;\\
\Forall x\cseries kGgx=\cseries kGxg;\\
\cseries kGg\in\centr{G/\cseries kG}.
  \end{gather*}
Thus $\cseries kG\subgp \cseries {k+1}G$, and $\cseries{k+1}G/\cseries
kG=\centr{G/\cseries kG}$; in particular, 
\begin{equation*}
\cseries{k+1}G/\cseries
kG\nsubgp G/\cseries kG, 
\end{equation*}
so $\cseries{k+1}G\nsubgp G$. 
\end{proof}

The \textbf{ascending central series}\index{ascending central series}
of $G$ is the sequence $(\cseries 
nG\colon n\in\vnn)$, usually written out as
\begin{equation*}
  \gpgen{\gid}\nsubgp\centr G\nsubgp\cseries 2G\nsubgp\cseries 3G\nsubgp\dotsb.
\end{equation*}
A group is called \textbf{nilpotent}\index{nilpotent} if the terms in the sequence are
eventually the group itself, that is, for some $n$ in~$\vnn$,
\begin{equation*}
  \cseries nG=G.
\end{equation*}
So an abelian group is nilpotent, since its center is itself. 

Suppose $G$ is nilpotent, and in particular $\cseries nG=G$.  For some
$g$ in $G$, and let $f$
be the operation $x\mapsto[g,x]$ on $G$.  Writing $f^0$ for $\id_G$
and $f^{n+1}$ for $f\circ f^n$, we have
\begin{align*}
  f^0(x)&\in G,&
f(x)&\in\cseries{n-1}G,&
f^2(x)&\in\cseries{n-2}G,&
&\dots,&
f^n(x)&=\gid.
\end{align*}
Thus $f$ is ``nilpotent'' in the monoid of operations on $G$.
However, this should not be taken as a sufficient condition for $G$ to
be nilpotent. 

Examples of nilpotent groups are given by:

\begin{theorem}
  Finite $p$-groups are nilpotent.
\end{theorem}

\begin{proof}
Suppose $G$ is a $p$-group.  If $H$ is a proper normal subgroup of
  $G$, then $G/H$ is a nontrivial $p$-group, so by
Theorem~\ref{thm:p-ntc} it has a nontrivial 
  center.  By Theorem~\ref{thm:central} the ascending central series
  of $G$ is strictly 
  increasing, until it reaches $G$ itself.
\end{proof}

The converse fails, because of:

\begin{theorem}
  A finite direct product of nilpotent groups is nilpotent.
\end{theorem}

\begin{proof}
Use that 
\begin{equation*}
\centr{G\times H}=\centr G\times\centr H.
\end{equation*}
If $\cseries nG=G$ and $\cseries mH=H$, then
$\cseries{\max\{n,m\}}{G\times H}=G\times H$.
\end{proof}

We now proceed to the converse of this theorem.

\begin{lemma}
If  $\cseries nG\subgp H$, then $\cseries{n+1}G
  \subgp \normalizer HG$. 
\end{lemma}

\begin{proof}
  Say $g\in\cseries{n+1}G$; we show $gHg\inv\included H$.  But if $h\in
  H$, then $[g,h]\in\cseries nG$, 
  so $ghg\inv\in\cseries nGh\included H$.  Therefore $gHg\inv\included
  H$.
\end{proof}

\begin{lemma}
  If $G$ is nilpotent, and $H\psubgp G$, then
  $H\psubgp\normalizer HG$.  
\end{lemma}

\begin{proof}
  Let $n$ be maximal such that $\cseries nG\subgp H$.  Then
  $\cseries{n+1}G\setminus H$ is non-empty, but, by the last lemma, it
  contains members of $\normalizer HG$.
\end{proof}

\begin{theorem}
  A finite nilpotent group is the direct product
  of its Sylow subgroups.
\end{theorem}

\begin{proof}
  Suppose $G$ is a finite nilpotent group.  We shall show that every
  Sylow subgroup of $G$ is a normal subgroup.  By
  Theorem~\ref{thm:wdp}, the first and second Sylow Theorems
  (Theorems~\ref{thm:Sylow-1} and~\ref{thm:Sylow-2}), and counting, $G$
  will be the direct product of its Sylow subgroups.

Suppose then $P$ is a Sylow
  $p$-subgroup of $G$.  We shall show that $P\nsubgp G$.
To do this, it is
  enough to show $\normalizer PG=G$.  To do \emph{this,} by the last
  lemma, it is enough
  to show $\normalizer{\normalizer PG}G\subgp \normalizer PG$.  To do
  \emph{this,} note that, as $P\nsubgp\normalizer PG$, so $P$ is the
  unique Sylow $p$-subgroup of $\normalizer PG$.  Hence, in
  particular, for any $x$ in $G$, if $xPx\inv\subgp \normalizer PG$, then
  $xPx\inv=P$, so $x\in\normalizer PG$.  But every $x$ in
  $\normalizer{\normalizer PG}G$ satisfies the
  hypothesis. 
\end{proof}

\section{Soluble groups}

The \textbf{commutator subgroup}\index{commutator!--- subgroup} of a
group $G$ is the subgroup
\begin{equation*}
  \gpgen{[x,y]\setcolon(x,y)\in G^2},
\end{equation*}
which is denoted by
\begin{equation*}
  G'.
\end{equation*}

\begin{theorem}\label{thm:G'}
  $G'$ is the smallest of the normal subgroups $N$ of $G$ such that
  $G/N$ is abelian.
\end{theorem}

\begin{proof}
  If $f$ is a homomorphism defined on $G$, then
  \begin{equation}\label{eqn:f([x,y])}
    f([x,y])=f(xyx\inv y\inv)=f(x)f(y)f(x)\inv f(y)\inv=[f(x),f(y)].
  \end{equation}
Thus, if $f\in\Aut G$, then
 $f(G')\subgp G'$.  In particular,
  $xG'x\inv\subgp G'$ for 
  all $x$ in $G$; so $G'\nsubgp G$.  Suppose $N\nsubgp G$; then the
  following are equivalent:
  \begin{enumerate}
    \item
$G/N$ is abelian;
\item
$N=[x,y]N$ for all $(x,y)$ in $G^2$;
\item
$G'\subgp N$.\qedhere
  \end{enumerate}
\end{proof}

We now define the \textbf{derived subgroups}\index{derived subgroup}
$\dsubgp nG$ of $G$ by 
\begin{gather*}
  \dsubgp 0G=G,\\
\dsubgp {n+1}G=(\dsubgp nG)'.
\end{gather*}
We have a descending sequence
\begin{equation*}
  G\nsupgp G'\nsupgp\dsubgp 2G\nsupgp\dotsb
\end{equation*}
The group $G$ is called \textbf{soluble}\index{soluble}
if this sequence reaches $\gpgen{\gid}$ (after finitely many steps).


For examples, let $K$ be a field.  Let
$G$ be the subgroup of $\GL K$ consisting of \textbf{upper triangular
  matrices.}\index{upper triangular}  So 
$G$ comprises the matrices
\begin{equation*}
    \begin{pmatrix}
    a_0&      &*\\
       &\ddots& \\
    0  &      &a_{n-1}
  \end{pmatrix}
\end{equation*}
where $a_0\dotsm a_{n-1}\neq0$.
We have
\begin{equation*}
  \begin{pmatrix}
    a_0&      &*\\
       &\ddots& \\
    0  &      &a_{n-1}
  \end{pmatrix}
  \begin{pmatrix}
    b_0&      &*\\
       &\ddots& \\
    0  &      &b_{n-1}
  \end{pmatrix}
=
  \begin{pmatrix}
    a_0b_0&      &*\\
       &\ddots& \\
    0  &      &a_{n-1}b_{n-1}
  \end{pmatrix}
\end{equation*}
and therefore every element of $G'$ is
\textbf{unitriangular,}\index{unitriangular} that is, it takes the form of
\begin{equation*}
  \begin{pmatrix}
    1&      &*\\
      &\ddots& \\
    0  &      &1
  \end{pmatrix}.
\end{equation*}
We also have
\begin{equation*}
    \begin{pmatrix}
    1&a_1&      &*\\
     &1  &\ddots& \\
     &   &\ddots&a_{n-1}\\
    0&   &      &1
  \end{pmatrix}
    \begin{pmatrix}
    1&b_1&      &*\\
     &1  &\ddots& \\
     &   &\ddots&b_{n-1}\\
    0&   &      &1
  \end{pmatrix}
=
    \begin{pmatrix}
    1&a_1+b_1&      &*\\
     &1  &\ddots& \\
     &   &\ddots&a_{n-1}+b_{n-1}\\
    0&   &      &1
  \end{pmatrix}
\end{equation*}
so the elements of $G''$ take the form of
\begin{equation*}
  \begin{pmatrix}
    1&0&      &*\\
     &1  &\ddots& \\
     &   &\ddots&0\\
    0&   &      &1
  \end{pmatrix}.
\end{equation*}
Proceeding, we find $\dsubgp{n+1}G=\gpgen{\gid}$. 

\begin{theorem}
  Nilpotent groups are soluble.
\end{theorem}

\begin{proof}
  Each $\cseries{k+1}G/\cseries kG$ is the
  center of some group (namely $G/\cseries kG$), so it is abelian.
  By Theorem~\ref{thm:G'} then,
  \begin{equation*}
  \cseries{k+1}G'\subgp \cseries kG.
  \end{equation*}
Suppose $G$ is nilpotent, so that $G=\cseries
nG$ for some $n$ in $\vnn$.  Working left to right,  
we can build up the following commutative
diagram, where arrows are inclusions:
\begin{equation*}
\xymatrix{
G \ar[d] & G' \ar[l]\ar[d] & \dsubgp 2G \ar[l]\ar[d] & \ar[l] \dsubgp
3G \ar[d] & \dsubgp nG \ar@{.>}[l]\ar[d]\\
G \ar[d] & \cseries nG' \ar[l]\ar[d] & \cseries{n-1}G'\ar[l]\ar[d] &
\ar[l]\ar[d]\cseries{n-2}G'\ar[l] & \centr G' \ar@{.>}[l] 
\ar[d]\\
\cseries nG & \ar[l] \cseries {n-1}G & \ar[l] \cseries{n-2}G & \ar[l]
\cseries{n-3}G & \gpgen{\gid} \ar@{.>}[l]
}
\end{equation*}
That is, we know $\dsubgp 0G\subgp\cseries nG$; and if
$\dsubgp k G \subgp  \cseries{n-k}G$
for some $k$ in $n$, then
\begin{equation*}
  \dsubgp{k+1}G=(\dsubgp kG)'\subgp \cseries{n-k}G'\subgp \cseries{n-(k+1)}G. 
\end{equation*}
By induction then, $\dsubgp nG\subgp \cseries 0G=\gpgen{\gid}$, so
$\dsubgp nG=\gpgen{\gid}$. 
\end{proof}


\begin{theorem}
  Solubility is preserved in subgroups and quotients.  If $N\nsubgp
  G$, and $N$ and $G/N$ are soluble, then $G$ is soluble.
\end{theorem}

\begin{proof}
  Suppose $f\colon G\to H$.
  By~\eqref{eqn:f([x,y])}, we have $f(\dsubgp
  nG)\subgp \dsubgp nH$,
  with equality is $f$ is surjective.  The case where $f$ is an
  inclusion of $G$ in $H$ shows that subgroups of soluble groups are
  soluble.  The case where $f$ is a quotient map shows that quotients
  of soluble groups are soluble.


Finally, if $N\nsubgp G$, then $(G/N)'=G'N/N$.  Suppose $\dsubgp
n{(G/N)}=\gpgen{\gid}$, and
$\dsubgp mN=\gpgen{\gid}$.
Then $\dsubgp nG\subgp N$ and so $\dsubgp{n+m}G=\gpgen{\gid}$.
\end{proof}

\begin{theorem}
Groups with non-abelian simple subgroups are not soluble.  In particular,
  $\Sym 5$ is not soluble if $n\geq 5$.
\end{theorem}

\begin{proof}
  Suppose $H$ is simple.  Since $H'\nsubgp H$, we have either
  $H'=\gpgen{\gid}$ or $H'=H$.  In the former case, $H$ is abelian; in
  the latter, $H$ is insoluble.
\end{proof}

The last theorem suggests the origin of the notion of solubility of
groups: the general 5th-degree polynomial equation
\begin{equation*}
  a_0+a_1x+a_2x^2+a_3x^3+a_4x^4+x^5=0
\end{equation*}
is ``insoluble by radicals'' precisely because $\Sym 5$ is an
insoluble group.


\section{Normal series}

A \textbf{normal series}\index{normal!--- series} for a group $G$ is
a sequence $(G_n\colon n\in\vnn)$ of subgroups, where $G_{n+1}\nsubgp
G_n$ in  each case; the situation can be depicted by
\begin{equation*}
  G=G_0\nsupgp G_1\nsupgp G_2\nsupgp\dotsb
\end{equation*}
(If one wants to distinguish, one may call this a \textbf{subnormal
  series}\index{subnormal series}\index{series!subnormal ---}, normal if each $G_i$ is
  normal in $G$.)  The \textbf{factors}\index{factor} of 
the normal series are the quotients $G_i/G_{i+1}$.  If $G_n=\gpgen{\gid}$
for some $n$, then the series is called
\begin{enumerate}
  \item
a \textbf{composition series,}\index{composition series}\index{series!composition ---} if the
factors are simple; 
\item
a \textbf{soluble series,}\index{soluble series}\index{soluble!---
  series}\index{series!soluble ---} if the factors are abelian.
\end{enumerate}

For example,
  if $G$ is nilpotent, then the series
  \begin{equation*}
    \gpgen{\gid}\nsubgp \centr G\nsubgp \cseries 2G\nsubgp\dotsb \nsubgp G 
  \end{equation*}
is a soluble series.  


\begin{theorem}
A group is soluble if and only if it has a soluble series.
\end{theorem}

\begin{proof}
  If the series
  \begin{equation*}
    G\nsupgp G_1\nsupgp G_2\nsupgp\dotsb\nsupgp G_n=\gpgen{\gid}
  \end{equation*}
is soluble, then, by Theorem~\ref{thm:G'}, we have
\begin{align*}
  G'&<G_1,&
G''&<G_1{}'<G_2,&
G'''&<G_1{}''<G_2'<G_3,&
\dsubgp nG&=\gpgen{\gid},
\end{align*}
so $G$ is soluble.
Conversely, if $G$ is soluble, then the series
\begin{equation*}
  G\nsupgp G'\nsupgp\dsubgp 2G\nsupgp\dotsb\nsupgp\gpgen{\gid}
\end{equation*}
is a soluble series.
\end{proof}

So not every group has a soluble series.  However:

\begin{theorem}\label{thm:comp}
  Every finite group has a composition series.
\end{theorem}

\begin{proof}
  A finite group $G$ has a maximal proper normal subgroup $N$.  Then
  $G/N$ is simple.  Indeed, every normal subgroup of $G/N$ is $H/N$ for
  some normal subgroup $H$ of $G$ such that $N\subgp H$, and therefore
  $H$ is either $N$ or $G$.  

So we can form
  $G=G_0\nsupgp G_1\nsupgp\cdots$, where each $G_{n+1}$ is a maximal
  proper normal subgroup of $G_n$.  The factors are simple, and, since
  $G$ is finite, the series must terminate.
\end{proof}

If, from a normal series, another can be got by deleting some terms,
then the former is a \textbf{refinement}\index{refinement} of the latter.
As a normal series, a composition series is maximal in that it has no
nontrivial refinement, that is, no refinement without trivial factors.

A soluble series for a finite group has a refinement in which the
nontrivial factors are cyclic of prime order.

Any normal series is \textbf{equivalent}\index{equivalent} to the series that results
when all repeated terms are deleted (so that all trivial factors are
removed).  Then two normal series
\begin{equation*}
  G_i(0)\nsupgp G_i(1)\nsupgp G_i(2)\nsupgp\dotsb\nsupgp G_i(n)
\end{equation*}
(where $i<2$) with no trivial factors are \textbf{equivalent}\index{equivalent} if there
is $\sigma$ in $\Sym n$ such that 
\begin{equation*}
  G_0(i)/G_0(i+1)\cong G_1(\sigma(i))/G_1(\sigma(i+1))
\end{equation*}
for each $i$ in $n$.  We now aim to prove Theorem~\ref{thm:JH} below.

\begin{lemma}[Zassenhaus or Butterfly]\index{Zassenhaus
    Lemma}\index{Butterfly Lemma}\index{theorem!Zassenhaus Lemma}
  \index{theorem!Butterfly Lemma}\index{lemma|see{theorem}}
  Suppose $N_i\nsubgp H_i\subgp G$ for each $i$ in $2$.  Let
  $H=H_0\cap H_1$.  Then:
  \begin{enumerate}
    \item
$N_i(H_i\cap N_{1-i})\nsubgp N_iH$ for each $i$;
\item
the two groups $N_iH/N_i(H_i\cap N_{1-i})$ are isomorphic.
  \end{enumerate}
\end{lemma}

\begin{proof}
We have $H_i\cap N_{1-i}\nsubgp H$.
Let 
\begin{equation*}
K=(H_0\cap N_1)(H_1\cap N_0); 
\end{equation*}
then
$K\nsubgp H$.  The groups we have to work with form the commutative
diagram below, arrows being inclusions.
\begin{equation*}
  \xymatrix{
   &            H_0        &               & H_1                   &\\
   &            N_0H\ar[u] &               & N_1H\ar[u]            &\\
   &                       &H\ar[ul]\ar[ur]&                       &\\
   &N_0(H_0\cap N_1)\ar[uu]&               &N_1(H_1\cap N_0)\ar[uu]&\\
N_0\ar[ur]&&K\ar[ul]\ar[ur]\ar[uu]&&N_1\ar[ul]\\
&H_1\cap N_0\ar[ul]\ar[ur]&&H_0\cap N_1\ar[ul]\ar[ur]&
}
\end{equation*}
We exhibit an epimorphism from $N_iH$ onto $H/K$
whose kernel is $N_i(H_i\cap N_{1-i})$.  Now, if $n,n'\in N_i$ and
$h,h'\in H$ and $nh'=n'h$, then
\begin{equation*}
  h'h\inv=n\inv n'\in N_i\cap H\subgp K,
\end{equation*}
so that $Kh=Kh'$.  Hence there is a well-defined homomorphism $f$ from
 $N_iH$ into $H/K$ such that, if $n\in N_i$ and $h\in H$, then
\begin{equation*}
  f(nh)=Kh.
\end{equation*}
That $f$ is surjective is clear.
Moreover, the following are equivalent conditions on such $n$ and $h$:
\begin{enumerate}
  \item
$nh\in \ker f$;
\item
$h\in K$;
\item\label{item:h=n_0n_1}
$h=n_0n_1=n_1n_0$ for some $n_i$ in $H_{1-i}\cap N_i$.
\end{enumerate}
Also,~\eqref{item:h=n_0n_1} implies that $nh=nn_in_{1-i}$, which is in
$N_i(H_i\cap N_{1-i})$; thus
\begin{enumerate}\setcounter{enumi}{3}
\item\label{item:nh-in}
 $nh\in N_i(H_i\cap N_{1-i})$.  
\end{enumerate}
Conversely,
suppose~\eqref{item:nh-in} holds.  Then also $h=n\inv nh$, which is
also in $N_i(H_i\cap
N_{1-i})$, so $h=n'h'$ for some $n'$ in $N_i$ and $h'$ in $N_{1-i}\cap
H_i$.  Then $n'=h(h')$, which is in $\in H_{1-i}$, so $n'\in
N_i\cap H_{1-i}$, 
and therefore $h\in K$.
\end{proof}

\begin{theorem}[Schreier]\index{Schreier Theorem}
  \index{theorem!Schreier Th---}
  Any two normal series have equivalent refinements.
\end{theorem}

\begin{proof}
  Suppose that
  \begin{equation*}
    G=G_i(0)\nsupgp G_i(1)\nsupgp\dotsb\nsupgp G_i(n_i)=\gpgen{\gid},
  \end{equation*}
where $i<2$, are normal series for $G$.  
In particular,
\begin{equation*}
  G_i(j+1)\nsubgp G_i(j)\subgp G.
\end{equation*}
Define
\begin{equation*}
  G_i(j,k)=G_i(j+1)(G_i(j)\cap G_{1-i}(k)),
\end{equation*}
where $(j,k)\in n_i\times n_{1-i}$.  Then
\begin{gather*}
  G_i(j)=G_i(j,0)\nsupgp G_i(j,1)\nsupgp\dotsb\nsupgp G_i(j,n_{1-i}-1)
  \nsupgp G_i(j,n_{1-i})=G_i(j+1),
\end{gather*}
giving us normal series that are refinements of the original ones; but also
\begin{equation*}
  G_0(j,k)/G_0(j,k+1)\cong G_1(k,j)/G_1(k,j+1)
\end{equation*}
by the Butterfly Lemma.
\end{proof}

\begin{theorem}[Jordan--H\"older]\label{thm:JH}\index{Jordan--H\"older
    Theorem}\index{theorem!Jordan--H\"older Th---}
  Any two composition series of a group are equivalent.
\end{theorem}

Combining this with Theorem~\ref{thm:comp}, we have that every finite
group has a uniquely determined set of simple ``factors''.  Hence the
interest in the classification of the finite simple groups.

\newpage
\part{Rings}

\section{Not-necessarily-associative rings}\label{sect:nna-rings}

Rings were introduced in \S~\ref{sect:rings}.  A more general
definition is possible.  If $E$ is an abelian
group (written additively), then a \textbf{multiplication}\index{multiplication} on $E$ is a
binary operation that distributes in both senses over addition.  In
the most general sense then, a \textbf{ring}\index{ring} is an abelian group with
a multiplication.  The ring is \textbf{associative}\index{associative} if the
multiplication is associative.  


Associative rings are not the only rings of interest.  For example,
the associative ring 
$\Ham$ defined in \S~\ref{sect:new} has the automorphism
$z+w\mj\mapsto\bar z-w\mj$; then the same 
constuction that creates $\Ham$ out of $\C$
can be applied to $\Ham$ 
itself, yielding the ring $\Oct$ of \textbf{octonions;}\index{octonion}
but this ring is not associative.  Also, if $(E,\cdot)$ is a ring,
then there is another multiplication on $E$, namely $\bracket$ or
$(x,y)\mapsto[x,y]$, where 
\begin{equation*}
[x,y]=x\cdot y-y\cdot x; 
\end{equation*}
this multiplication makes
$E$ into a \textbf{Lie ring,}\index{Lie ring} namely a ring that 
respects the identity
\begin{equation*}
  [x,x]=0
\end{equation*}
along with the \textbf{Jacobi identity,}\index{Jacobi identity}
\begin{equation*}
[[x,y],z]=[x,[y,z]]-[y,[x,z]].
\end{equation*}
For example, from the associative ring $(\End E,\circ)$, we obtain the
Lie ring $(\End E,\bracket)$.  Then $\End E$ has a subgroup $\Der{E,\cdot}$,
which is closed under $\bracket$, but not generally under~$\circ$.
Specifically, $\Der{E,\cdot}$ consists of the
\textbf{derivations}\index{derivation} of 
$(E,\cdot)$, which are the endomorphism $D$ of $E$ respecting the
\textbf{Leibniz rule,}
\begin{equation*}
  D(x\cdot y)=Dx\cdot y+x\cdot Dy.
\end{equation*}
In particular, `taking the derivative' on the field of meromorphic
functions on $\C$ is a derivation.   Derivations will be used in \S~\ref{sect:fact-pol}. 

\begin{theorem}
Every ring respects the identities
\begin{align*}
  (x-y)\cdot z&=x\cdot z-y\cdot z,&
x\cdot(y-z)&=x\cdot y-x\cdot z.
\end{align*}
Hence, in particular,
\begin{gather}\label{eqn:0.x=0}
    0\cdot x=0=x\cdot 0,\\\notag
(-x)\cdot y=-(x\cdot y)=x\cdot(-y).
\end{gather}
\end{theorem}

A ring is \textbf{unital} if it has
a multiplicative identity, generally denoted by $1$.
The result of Theorem~\ref{thm:exp-in-groups} can be strengthened when
the scope of the theorem is restricted to abelian groups:

\begin{theorem}\label{thm:Z-action}
  Let $E$ be an abelian group.  Then $n\mapsto (x\mapsto nx)$ is a
  homomorphism of unital rings from $(\Z,\cdot,1)$ to $(\End
  E,\circ,\id_E)$.
\end{theorem}

In a word, we can say that, as a unital ring, $\Z$ \textbf{acts} on
the endomorphism group of every abelian group.  Compare the notion of
action defined in \S~\ref{sect:actions}.  
In the notation of Theorem~\ref{thm:Z-action},
\begin{gather}\label{eqn:0x=0}
  0x=0,\\\notag
1x=x,\\\label{eqn:-x}
(-1)x=-x;
\end{gather}
here~\eqref{eqn:0x=0} is~\eqref{eqn:a^0} written additively; combining
it with~\eqref{eqn:0.x=0}, we have
\begin{equation*}
  0\cdot x=0x,
\end{equation*}
where the zeros come from the ring and from $\Z$ respectively.
More generally, we have

\begin{theorem}
For every integer $n$, every ring respects the identity
  \begin{equation*}
(nx)\cdot y=n(x\cdot y)=x\cdot ny.
  \end{equation*}
\end{theorem}

\begin{proof}
  Induction and~\eqref{eqn:-x}.
\end{proof}

\section{Not-necessarily-unital rings}

Henceforth the word \emph{ring} means associative ring.
By
Theorem~\ref{thm:x-lambda_x}, a unital ring also acts on the
endomorphism group of the underlying abelian group.  We have in particular
\begin{equation*}
  1\cdot x=1x.
\end{equation*}

Again a ring is
\textbf{commutative}\index{commutative} if the multiplication is
commutative.
As examples of commutative rings with identity, we have $\Z$ and $\Zmod n$ by
Theorems~\ref{thm:Z} and~\ref{thm:Z-mod-n}; and if $R$ is a
commutative ring with identity, then $\MatR$ is a ring with
identity, by Theorem~\ref{thm:M}.
  The continuous functions on $\R$ with compact
  support compose a ring with respect to the operations induced from
  $\R$: this ring has no identity. 

The \textbf{characteristic}\index{characteristic} of a ring $(E,\cdot)$ is the non-negative
integer $n$ such that $\Zmod n$ is the kernel of the homomorphism
$n\mapsto(y\mapsto ny)$ from $\Z$ to $\End E$.  This kernel is the kernel
of $n\mapsto n1$, if $(E,\cdot)$ has an identity. 
For example,
If $0\leq n$, then  $\Zmod n$ has characteristic $n$.

\begin{theorem}
  Every ring embeds in a ring with identity having the same
  characteristic, and in a ring with identity having characteristic $0$.
\end{theorem}

\begin{proof}
 Suppose $R$ is a ring of characteristic $n$.  Let $A$ be $\Z$ or
 $\Zmod n$, and give $A\oplus R$ the multiplication defined by
 \begin{equation*}
   (m,x)(n,y)=(mn,my+nx+xy);
 \end{equation*}
then $(1,0)$ is an identity, and $x\mapsto(0,x)$ is an embedding.
\end{proof}

\section{Rings}

\emph{Henceforth in the word \emph{ring} means ring with
  identity,} as it did in \S~\ref{sect:rings}.  
We know from
Theorem~\ref{thm:units} that a ring $R$ has a group of units, $\unit
R$.  The example in 
\S~\ref{sect:prod-sum} shows that some ring elements can have right
inverses without being units.  However, if $a$ has both a left and a
right inverse, then they are the same, since if $ab=1=ca$, then
\begin{equation*}
c=c1=c(ab)=(ca)b=1b=b.
\end{equation*}
A \textbf{zero-divisor}\index{zero-divisor}\index{divisor!zero ---} of
$R$ is a element 
$b$ distinct from $0$ such
that the equations $bx=0$ and $yb=0$ are soluble in $R$.  So
zero-divisors are not units. 
For example, if $m>1$ and $n>1$, then $m+\gpgen {mn}$ and $n+\gpgen{mn}$ are
  zero-divisors in $\Zmod {mn}$.  
The unique element of the trivial ring $\Zmod 1$ is a unit, but not a
zero-divisor.

A commutative ring is an
\textbf{integral domain}\index{integral domain}\index{domain!integral ---}%
\index{ring|seealso{domain}} if it has no zero-divisors and $1\neq0$.
So fields are integral domains.
But $\Z$ is an integral domain that is not a field.
If $p$ is prime, then $\Zmod
  p$ is a field, denoted by $\F_p$. 

An arbitrary ring $R$ such that $R\setminus\unit R=\{0\}$ is a
\textbf{division ring.}\index{division ring}  So fields are division rings; but $\Ham$ is a
non-commutative division ring.

If $R$ is a ring, and $G$ is a group, we can form the direct sum
$\sum_{g\in G}R$, which is, first of all, an abelian group; we can
give it a multiplication as follows.  We write an element $(r_g\colon
g\in G)$ of the direct sum as
\begin{equation*}
  \sum_{g\in G}r_gg;
\end{equation*}
this is a \textbf{formal finite $R$-linear combination}\index{linear
  combination}  of the elements of
$G$.  Then multiplication is defined as one expects: if $r,s\in R$ and
$g,h\in G$, then
\begin{equation*}
  (rg)(sh)=(rs)(gh),
\end{equation*}
and the definition extends to all of $\sum_{g\in G}R$ by
distributivity.  The resulting ring can be denoted by
\begin{equation*}
  R(G);
\end{equation*}
it is the \textbf{group ring}\index{group ring} of $G$ over $R$.

We can do the same construction with monoids, rather than
groups.  For example, if we start with the free monoid generated by a
symbol $X$, we get a \textbf{polynomial ring}\index{polynomial ring}
in one variable, denoted by 
\begin{equation*}
  R[X];
\end{equation*}
this is the ring of formal $R$-linear combinations
\begin{equation*}
  \sum_{k=0}^na_kx^k,
\end{equation*}
where $n\in\vnn$ and $a_k\in R$.  We could use a second variable,
getting for example $R[X,Y]$.  Usually $R$ here is commutative and is
in particular a field.

\section{Ideals}

If $A$ is a sub-ring of $R$, then we can form the abelian group
$R/A$.  We could try to define a multiplication on this by
\begin{equation*}
  (x+A)(y+A)=xy+A.
\end{equation*}
However, if $x-x'\in A$, and $y-y'\in A$, we need not have $xy-x'y'\in
A$. 

A \textbf{left ideal}\index{left!--- ideal}\index{ideal!left ---}
of $R$ is a sub-ring $I$ such that
\begin{equation*}
  RI\included I,
\end{equation*}
that is, $rx\in I$ whenever $r\in R$ and $x\in I$.  Likewise,
\textbf{right}\index{right!--- ideal}\index{ideal!right ---} and
\textbf{two-sided}
\index{two-sided ideal}\index{ideal!two-sided ---}ideal. 
For example,
the set of matrices
\begin{equation*}
  \begin{bmatrix}
    * & 0 & \dots & 0\\
\vdots & \vdots & & \vdots\\
* & 0 & \dots & 0
  \end{bmatrix}
\end{equation*}
is a left ideal of $\MatR$, but not a right ideal unless $n=1$.
Also,  $Rx$ is a left ideal of $R$, while $RxR$ is a two-sided ideal.


\begin{theorem}
  If $I$ is a two-sided ideal of $R$, then $R/I$ is a well-defined
  ring.  The kernel of a ring-homomorphism is a two-sided ideal.
\end{theorem}

Suppose $(A_i\setcolon i\in I)$ is an indexed family of left ideals of
a ring $R$.  Let the abelian subgroup of $R$ generated by
$\bigcup_{i\in I}A_i$ be denoted by 
\begin{equation*}
  \sum_{i\in I}A_i;
\end{equation*}
this is the \textbf{sum}\index{sum} of the left ideals $A_i$.  This must not be
confused with the \emph{direct sums} defined in \S~\ref{sect:prod-sum}.
If in particular $I=n$,
let the abelian subgroup of $R$ generated by
\begin{equation*}
  \{a_0\dotsm a_{n-1}\setcolon a_i\in A_i\}
\end{equation*}
be denoted by
\begin{equation*}
  A_0\dotsb A_{n-1};
\end{equation*}
this is the \textbf{product}\index{product} of the left ideals $A_i$.


\begin{theorem}
Sums and finite products of left ideals are left ideals; sums and
products of two-sided ideals are two-sided ideals.  Addition and
multiplication of ideals are associative; addition is commutative;
multiplication distributes over addition.
\end{theorem}

\begin{theorem}
  If $A$ and $B$ are left ideals of a ring, then so is $A\cap B$, and
  $AB\included A\cap B$.
\end{theorem}

Usually $AB$ does not include $A\cap B$, since for example $A^2$ might
not include $A$; such is the case when $A=2\Z$, since then $A^2=4\Z$.

\begin{theorem}
  If $f\colon R\to S$, a homomorphism of rings, and $I$ is a two-sided
  ideal of $R$
  included in $\ker f$, then there is a unique homomorphism $\tilde
  f$ from $R/I$ to $S$ such that $f=\tilde f\circ\pi$.
\end{theorem}

Hence the isomorphism theorems, as for groups.

\section{Commutative rings}\label{sect:comm}

Henceforth, let all rings be commutative, so all ideals are two-sided.
A subset $A$ of a ring $R$ determines the ideal denoted by
\begin{equation*}
  (A),
\end{equation*}
namely the smallest ideal including $A$.  This consists of the
\textbf{$R$-linear combinations}\index{linear combination} of elements
of $A$, namely the well-defined sums
\begin{equation*}
\sum_{a\in
  A}r_aa, 
\end{equation*}
where $r_a\in R$; in particular, $r_a=0$ for all but finitely many
  $a$. 

If $A=\{a\}$, then $(A)$ is denoted by
\begin{equation*}
  (a)
\end{equation*}
or $Ra$
and is called a \textbf{principal
  ideal.}\index{principal!--- ideal}\index{ideal!principal ---}  A
\textbf{principal ideal domain}\index{principal!--- ideal
  domain}\index{domain!principal ideal ---} or
\pid\ is an integral domain whose 
every ideal 
is principal.
For example,
  $\Z$ is a \pid\ by Theorem~\ref{thm:Z-subg}.
But in the polynomial ring $\R[X,Y]$, the ideal $(X,Y)$ is not
  principal. 


An ideal is proper if and only if it does not contain a unit.
A \emph{proper} ideal $P$ is \textbf{prime}\index{prime} if
\begin{equation}\label{eqn:p-ideal}
  ab\in P\implies a\in P\lor b\in P.
\end{equation}
So a ring in which $1\neq0$ is an integral domain if and only if $(0)$
is a prime ideal. 
Compare the definition of prime ideal with the following: a positive
integer $p$ is prime if and only if
\begin{equation*}
  p\divides ab\implies p\divides a\lor p\divides b.
\end{equation*}
We shall address the relation between prime integers and prime ideals
in \S~\ref{sect:factor}.  Meanwhile, an 
equivalent formulation of prime ideals is given by the following.

\begin{theorem}
  A proper ideal $P$ of a ring is prime if and only if, for all ideals $I$
  and $J$ of the ring,
  \begin{equation}\label{eqn:IJ}
    IJ\included P\iff I\included P\lor J\included P.
  \end{equation}
\end{theorem}

\begin{proof}
  The given condition has~\eqref{eqn:p-ideal} as a special case, since
  the latter can be written as
  \begin{equation*}
    (a)(b)\included P\implies(a)\included P\lor (b)\included P.
  \end{equation*}
Also, if~\eqref{eqn:IJ} fails, so that $IJ\included P$, but
$I\setminus P$ contains some $a$, and $J\setminus P$ contains some
$b$, then $ab\in P$, so~\eqref{eqn:p-ideal} fails.
\end{proof}


\begin{theorem}
  A proper ideal $P$ of a ring $R$ is prime if and only if $R/P$ is an
  integral domain.
\end{theorem}

\begin{proof}
  That $I$ is prime means~\eqref{eqn:p-ideal}, which can be written as
\begin{equation*}
  (a+I)(b+I)=I\implies a+I=I\lor b+I=I;
\end{equation*}
but this means $R/I$ is integral.
\end{proof}

An ideal is called \textbf{maximal}\index{maximal} if it is maximal as
a proper ideal. 
A ring is a field if and only if $(0)$ is a maximal ideal.  (Note that
$(0)$ is in fact the ideal with \emph{no} generators, so it could be
written as $(\ )$; but it usually is not.)

\begin{theorem}
  A proper ideal $I$ of a ring $R$ is maximal if and only if $R/I$ is
  a field.
\end{theorem}

\begin{proof}
That $R/I$ is a field means that, if $a\in R\setminus I$, then for
some $b$,
\begin{equation*}
  ab\in 1+I.
\end{equation*}
That $I$ is maximal means that, if $a\in R\setminus I$, then
\begin{equation*}
  I+(a)=R,
\end{equation*}
equivalently,
$1\in I+(a)$, which means that, for some $b$,
$ba-1\in I$.
\end{proof}

\begin{corollary}
  Maximal ideals are prime.
\end{corollary}

The converse fails easily, since
 the prime ideals of $\Z$ are the ideals $(0)$ and $(p)$, where $p$ is
 prime, and the latter are maximal, but $(0)$ is not.  However, it is
 not even the case that prime ideals other than $(0)$ are always maximal.
For example, $\R[X,Y]$ has the prime ideal $(X)$, which
is not maximal.


A ring is \textbf{Boolean}\index{Boolean} if it respects the identity
  \begin{equation*}
    x^2=x.
  \end{equation*}
For example, if $\Omega$ is a set, then $\pow{\Omega}$ is a Boolean
ring, where multiplication is
intersection, and addition is the taking of \textbf{symmetric differences,}\index{symmetric difference} where the symmetric difference of $x$ and $y$ is $x\setminus
y)\cup(y\setminus x)$, denoted by $x\symdiff y$.    

  \begin{theorem}\label{thm:Boole}
In Boolean rings, all prime ideals are maximal.
  \end{theorem}

  \begin{proof}
    In a Boolean ring, we have $2x=(2x)^2=4x^2=4x$,
%$x+x=(x+x)^2=x^2+2x+x^2=x+2x+x$,
so 
\begin{equation*}
2x=0.  
\end{equation*}
(Thus nontrivial Boolean rings have characteristic $2$.)
Hence
\begin{equation*}
x(1+x)=x+x^2=x+x=0,
\end{equation*}
so $x$ is a zero-divisor unless it or $1+x$ is $0$, that is, unless
$x$ is $0$ or $1$.
Therefore there are no Boolean integral domains besides $\F_2$, which
is a field.
  \end{proof}

In $\Z$, the ideal $(a,b)$ is the principal ideal generated by
$\gcd(a,b)$.  So $a$ and $b$ are coprime if $(a,b)=\Z$.  This
condition can be written as $(a)+(b)=\Z$.  Then the following
generalizes Theorem~\ref{thm:CRT}.

\begin{theorem}[Chinese Remainder]\label{thm:CRT-R}\index{Chinese
    Remainder Theorem} 
  \index{theorem!Chinese Remainder Th---}
  Suppose $R$ has an indexed family $(I_i\colon i<n)$ of ideals such
  that $I_i+I_j=R$ in each case.  Let $I=\bigcap_{i<n}I_i$.  Then the
  monomorphism 
  \begin{equation}\label{eqn:xxx}
  x+I\mapsto(x+I_0,\dots,x+I_{n-1})
  \end{equation}
  from $R/I$ to $\sum_{i<n}R/I_i$
  is an isomorphism.
\end{theorem}

\begin{proof}
We proceed by induction.  The claim is trivially true when $n=1$.
Proving the inductive step reduces to the proving the claim when $n=2$.
In that case, we have $a_0+a_1=1$ for some
$a_0$ in $I_0$ and $a_1$ in $I_1$.  Then
\begin{align*}
  a_0&\equiv 1\pmod{I_1},&a_0&\equiv0\pmod{I_0},
\end{align*}
and similarly for $a_1$.  Therefore
\begin{align*}
  a_0x_0+a_1x_1&\equiv x_0\pmod{I_0},&
  a_0x_0+a_1x_1&\equiv x_1\pmod{I_1}.
\end{align*}
Thus $(x_0+I_0,x_1+I_1)$ is in the image of the map in~\eqref{eqn:xxx}.
\end{proof}

\section{Factorization}\label{sect:factor}

(Recall that all rings are now commutative with identity.)  In a ring 
$R$, an element $a$ is a \textbf{divisor}\index{divisor} of $b$, or
$a$ \textbf{divides}\index{divides} 
$b$, and we write
\begin{equation*}
  a\divides b,
\end{equation*}
if $ax=b$ for some $x$ in $R$.  Two elements that divide each other
are \textbf{associates.}\index{associates}

\begin{theorem}
  In any ring:
  \begin{enumerate}
    \item
$a\divides b \iff (b)\included (a)$;
\item
$a$ and $b$ are associates if and only if $(a)=(b)$.
  \end{enumerate}
Suppose $a=bx$.
\begin{enumerate}\setcounter{enumi}{2}
\item
If $x$ is a unit, then $a$ and $b$ are associates.
\item
If $b$ is a zero-divisor or $0$, then so is $a$.
\item
If $a$ is a unit, then so is $b$.
  \end{enumerate}
\end{theorem}

For example, in $\Zmod 6$, the elements $1$ and $5$ are units; the
other non-zero elements are zero-divisors.  Of these, $2$ and $4$ are
associates, since 
\begin{align}\label{eqn:2.2=4}
2\cdot 2&\equiv4,&4\cdot 2&\equiv 2\pmod 6; 
\end{align}
but $3$
is not an associate of these.  

In $\Z$, a \textbf{prime number} can be defined as a positive number
$p$ with either of two properties: 
\begin{enumerate}
\item\label{item:p=ab}
if $p=ab$, then one of $a$ and $b$ is $\pm 1$;
\item\label{item:p|ab}
if $p\divides ab$, then $p\divides a$ or $p\divides b$.
\end{enumerate}
Easily~\eqref{item:p|ab} implies~\eqref{item:p=ab}, since if $p=ab$, then
$p\divides ab$, so that, if also $p\divides b$, then, since $b\divides
p$, we have $b=\pm p$, so $a=\pm 1$.
Conversely,~\eqref{item:p=ab} implies~\eqref{item:p|ab}, with more
difficulty.  Indeed,
property~\eqref{item:p=ab} implies that, if $p\ndivides a$, then
$\gcd(p,a)=1$, so $px+ay=1$ for some $x$ and $y$.  If also
$p\divides ab$, but $p\ndivides a$, then, since $b=pbx+aby$, we have
$p\divides b$.

We let~\eqref{item:p|ab} be the defining property of \emph{primes;}
and~\eqref{item:p=ab}, \emph{irreducibles.}
More precisely,
an element of a ring is \textbf{irreducible}\index{irreducible} if
it is not a unit or $0$, and 
its only divisors are associates and units.  So the element is
irreducible just in case the ideal it generates is maximal amongst the
proper principal ideals.

For example,
  in $\R[X,Y]$, the element $X$ is irreducible, although $(X)$ is not
  a maximal ideal.  However, if $(X)\included(f(X,Y))\pincluded
  \R[X,Y]$, then $f(X,Y)$ must be constant in $Y$, and then it must
  have degree $1$ in $X$, and then its constant term must be $0$; so
  $f(X,Y)$ is just $aX$ for some $a$ in $\unit{\R}$.

An element of a ring is \textbf{prime}\index{prime} if it is not $0$
and the ideal that it generates is prime in the sense of
\S~\ref{sect:comm}.

For example:
\begin{asparaenum}[1.]
\item
The primes of $\Z$ are the integers $\pm p$, where
$p$ is a prime 
  natural number, and these are just the irreducibles of $\Z$. 
\item
In $\Z/6\Z$, the element $2$ is prime.  Indeed, the multiples of $2$
are $0$, $2$, and $4$, so the non-multiples are $1$, $3$, and $5$, and
the product of no two of these is a multiple of $2$.  Similarly, $4$
is prime.  However, $2$ and $4$ are not irreducible, by~\eqref{eqn:2.2=4}. 
\item
In $\C$ we have
\begin{equation}\label{eqn:236}
2\cdot 3=(1+\sqrt{-5})(1-\sqrt{-5}),
\end{equation}
so, because the factors $2$, $3$, and $1\pm\sqrt{-5}$ are all irreducible in the smallest sub-ring of $\C$ that contains $\sqrt{-5}$, those factors cannot be prime in that ring.  Details are worked out in the next section.
\end{asparaenum}

\section{Some algebraic number theory}\label{sect:ant}

Suppose $d$ is a \textbf{squarefree} integer, that is, an integer
different from $1$ that is not
divisible by the square of a prime number.   
The subset $\{x+y\sqrt d\colon x,y\in\Q\}$ of $\C$ is a field, denoted by
\begin{equation*}
\Q(\sqrt d).
\end{equation*}
Define
\begin{equation*}
\tau_d=\begin{cases}
\sqrt d,&\text{if }d\not\equiv 1\pmod 4,\\
\displaystyle\frac{1+\sqrt d}2,&\text{ if }d\equiv 1\pmod 4.
\end{cases}
\end{equation*}
The abelian subgroup $\gpgen{1,\vnn}$ of $\Q(\sqrt d)$ is a sub-ring, denoted by
\begin{equation*}
\Z[\tau_d].
\end{equation*}
\begin{theorem}
The elements of $\Z[\tau_d]$ are precisely the solutions in
$\Q(\sqrt d)$ of an equation  
\begin{equation*}\label{eqn:x^2}
x^2+bx+c=0, 
\end{equation*}
where $b$ and $c$ are in $\Z$.
\end{theorem}

\begin{proof}
From school the solutions of~\eqref{eqn:x^2} are
\begin{equation*}
x=\frac{-b\pm\oldsqrt{b^2-4c}}2.
\end{equation*}
Suppose one of these is in $\Q(\sqrt d)$.  Then $b^2-4c=a^2d$ for some $a$ in $\Z$, so that
\begin{equation*}
x=\frac{-b\pm a\sqrt d}2.
\end{equation*}
If $b$ is odd, then $b^2-4c\equiv1\pmod 4$, so $a$ must be odd and $d\equiv1\pmod 4$.  If $b$ is even, then $b^2-4c\equiv0\pmod 4$, so $a$ is even.
This establishes $x\in\Z[\tau_d]$ in all cases.

Conversely, suppose $x=k+n\tau_d$ for some $k$ and $n$ in $\Z$.  If $d\equiv1\pmod 4$, then 
\begin{gather*}
2x-2k-n=n\sqrt d,\\
4x^2-4(2k+n)x+(2k+n)^2=n^2d,\\
x^2-(2k+n)x+k^2+kn+n^2\frac{1-d}4=0,
\end{gather*}
while if $d\not\equiv1\pmod 4$, then
\begin{equation*}
x^2-2kx+k^2-n^2d=0.
\end{equation*}
In either case, $x\in\Z[\tau_d]$.
\end{proof}

The elements of $\Z[\tau_d]$ are therefore called the \textbf{integers} of $\Q(\sqrt d)$.  Since $\Z[\tau_d]\cap\Q=\Z$, we may refer to the elements of $\Z$ as \textbf{rational integers.}
We have for example~\eqref{eqn:236} in $\Z[\tau_{-5}]$;
to show that $2$, $3$ and $1\pm\tau_{-5}$ are
irreducible in this ring, we define, in the general case, the operation $z\mapsto z'$ on
$\Q(\sqrt d)$ by 
\begin{equation*}
(x+y\sqrt d)'=x-y\sqrt d.
\end{equation*}
This is an \emph{automorphism} of $\Q(\sqrt d)$.  (It is the
restriction of complex conjugation, if $d<0$.)
Then we define a \textbf{norm} function $N$ from $\Q(\sqrt d)$ to $\Q$ by
\begin{equation*}
N(z)=zz'.
\end{equation*}
Then $N$ is multiplicative, that is, 
\begin{equation*}
N(\alpha\beta)=N(\alpha)N(\beta).  
\end{equation*}
Also,
\begin{equation*}
N(x+\tau_d y)=\begin{cases}
x^2-dy^2,&\text{ if }d\not\equiv1\pmod 4,\\
x^2+xy+\displaystyle\frac{1-d}4y^2,&\text{ if }d\equiv 1\pmod 4,
\end{cases}
\end{equation*}
so $N$ maps $\Z[\tau_d]$ into $\Z$.  If $d<0$, then it maps $\Z[\tau_d]$ into $\N$.
Let us restrict our attention to this case.
Here, $\alpha$ is a unit
in $\Z[\tau_d]$ if and only if $N(\alpha)=1$.  Therefore $\alpha$ in $\Z[\tau_d]$ is
irreducible if 
and only if it has no divisor $\beta$ such that
$1<N(\beta)<N(\alpha)$.  In case $d=-5$ we have
\begin{equation}\label{eqn:N23}
  \begin{array}{c||c|c|c}
    x&2&3&1\pm\tau_{-5}\\\hline
N(x)&4&9&6
  \end{array}.
\end{equation}
Since no elements of $Z[\tau_{-5}]$ have
norm $2$ or $3$, the elements $2$, $3$, and $1\pm\tau_{-5}$ are
irreducible. 

But they are not prime.  Indeed, if $\alpha\divides\beta$, then
$N(\alpha)\divides N(\beta)$; but no norm in~\eqref{eqn:N23} divides
another.  This is where \emph{ideals} come up.  There are 
factorizations of the relevant ideals:
\begin{equation}\label{eqn:factors}
\begin{gathered}
  (2)=(2,1+\tau_{-5})^2,\\
(3)=(3,1+\tau_{-5})(3,1-\tau_{-5}),\\
  (1+\tau_{-5})=(2,1+\tau_{-5})(3,1+\tau_{-5}),\\
  (1-\tau_{-5})=(2,1+\tau_{-5})(3,1-\tau_{-5}).
\end{gathered}
\end{equation}
For example,
\begin{equation*}
(2,1+\tau_{-5})(2,1+\tau_{-5})=(2,1+\tau_{-5})(2,1-\tau_{-5})=(4,2+2\tau_{-5},6)=(2).
\end{equation*}
The right-hand members of~\eqref{eqn:factors} are in fact prime factorizations.  To see this, we first note that, being a subgroup of $\gpgen{1,\tau_d}$ on more than one generator, an ideal $I$ of $\Z[\tau_d]$ can be written as
$\gpgen{a+b\tau_d,c+d\tau_d}$, where 
\begin{equation*}
\begin{pmatrix}
a&b\\
c&d
\end{pmatrix}
\in\MatZ[2]\cap\GL[2]{\Q}.
\end{equation*}
Multiplication on the left by a matrix in $\GL[2]{\Z}$ does not change the ideal.  Hence we can define
  \begin{equation*}
    N(I)=\abs{\det
    \begin{pmatrix}
      a&b\\
c&d
    \end{pmatrix}},
  \end{equation*}
  which is in $\N$.
In case $d<0$, this agrees with the function $N$ defined above in the sense that
$N((\alpha))=N(\alpha)$, because
\begin{equation*}
(a+b\tau_d)\gpgen{1,\tau_d}=\gpgen{a+b\tau_d,db+a\tau_d}.
\end{equation*}
Moreover, if
 $I\pincluded J\pincluded\Z[\tau_d]$, then $N(J)\divides N(I)$ and $N(I)>N(J)>1$.  
In case $d=-5$, we compute
\begin{gather*}
  (2,1+\tau_{-5})=\gpgen{2,2\tau_{-5},1+\tau_{-5},\tau_{-5}-5}
=\gpgen{2,1+\tau_{-5}},\\
(3,1\pm\tau_{-5})=\gpgen{3,3\tau_{-5},1\pm\tau_{-5},\tau_{-5}\mp 5}
=\gpgen{3,1\pm\tau_{-5}},
\end{gather*}
hence
\begin{equation*}
  \begin{array}{c||c|c}
    I&(2,1+\tau_{-5})&(3,1\pm\tau_{-5})\\\hline
N(I)&2&3
  \end{array}.
\end{equation*}
So these ideals are maximal, hence prime.
Ideals of the rings $\Z[\tau_d]$ were originally called \textbf{ideal numbers.}


\section{Integral domains}\label{sect:int-dom}

\begin{theorem}
In an integral domain, if $a$ and $b$ are non-zero associates, and
  $a=bx$, then 
  $x$ is a unit.
\end{theorem}

\begin{proof}
  We have also $b=ay=bxy$, $b(1-xy)=0$, $1=xy$ since $b\neq0$ and we
  are in an integral domain. 
\end{proof}

\begin{corollary}
  In an integral domain, prime elements are irreducible.
\end{corollary}

\begin{proof}
  If $p$ is prime, and $p=ab$, then $p$ is an associate of $a$ or $b$,
  so the other is a unit.
\end{proof}

A \textbf{unique factorization domain}\index{unique factorization
  domain}\index{domain!unique 
  factorization ---} or UFD is an integral domain whose every
non-zero element is `unique\-ly' a product of irreducibles.  This means that, if
\begin{equation*}
\prod_{i<n}\pi_i=\prod_{i<n'}\pi_i',
\end{equation*}
where the $\pi_i$ and $\pi_i'$ are irreducible, then $n=n'$, and (perhaps after re-indexing) $\pi_i$ and $\pi_i'$ are associates.  Hence:

\begin{theorem}
In a UFD, irreducibles are prime.  \hfill\qedsymbol
\end{theorem}

In any ring, a \textbf{greatest common divisor}\index{greatest common
  divisor}\index{divisor!greatest common ---} of elements $a$ and $b$ is an element of the set of all divisors of $a$ and $b$ that is a maximum with respect to dividing: that is, it is some $c$ such that $c\divides a$ and $c\divides b$, and for all $x$, if $x\divides a$ and $x\divides b$, then $x\divides c$.  There can be more than one greatest common divisor, but they are all associates.  Every element is a greatest common divisor of itself and $0$.

\begin{theorem}
In a UFD, any two elements have a greatest common divisor.
\end{theorem}

\begin{proof}
If they are nonzero, we can write the elements as 
\begin{align*}
  &u\prod_{i<n}\pi_i{}^{a(i)},&&v\prod_{i<n}\pi_i{}^{b(i)},
\end{align*}
where $u$ and $v$ are units and the $\pi_i$ are irreducibles; a greatest common divisor is then
\begin{equation*}
  \prod_{i<n}\pi_i{}^{\min(a(i),b(i))}.\qedhere
\end{equation*}
\end{proof}

In a PID, more is true:

\begin{theorem}
In a PID, any two elements have a greatest common divisor, which is some linear combination of those elements.
\end{theorem}

\begin{proof}
If $(a,b)=(c)$, then $c$ is a greatest common divisor of $a$ and $b$, and $c=ax=by$ for some $x$ and $y$ in the ring.
\end{proof}

\begin{lemma}
  In a PID, irreducibles are prime.
\end{lemma}

\begin{proof}
  Suppose the irreducible $\pi$ divides $ab$ but not $a$.  Then a
  greatest common divisor of $\pi$ and $a$ is $1$; hence $\pi x+ay=1$ for some $x$
  and $y$ in the ring.  Then $b=\pi xb+ aby$, and $\pi$ divides each summand, so
  $\pi\divides b$.
\end{proof}

\begin{lemma}
  In a PID, irreducible factorizations are unique.
\end{lemma}

A ring is \textbf{Noetherian}\index{Noetherian ring} if every strictly ascending
chain of ideals is finite.

\begin{theorem}
  PIDs are Noetherian.
\end{theorem}

\begin{proof}
  If $I_0\included I_1\included\dotsb$, then $\bigcup_{i\in\vnn}I_i$
  is an ideal $(a)$; then $a\in I_n$ for some $n$, so the chain cannot grow beyond $I_n$. 
\end{proof}

\begin{lemma}
  In a PID, every element is a product of irreducibles.
\end{lemma}

\begin{proof}
  A tree of factorizations has no infinite branches.  More precisely, let $a$ be an element of a PID.  For certain finite binary sequences $\sigma$, we define $a_{\sigma}$ thus: $a_{()}=a$, and if $a_{(e(0),\dots,e(n-1))}$ can be factorized as $bc$, where neither $b$ nor $c$ is a unit, then let
  $a_{(e(0),\dots,e(n-1),0)}=b$ and
  $a_{(e(0),\dots,e(n-1),1)}=c$; otherwise these are undefined.  Then every branch of the tree corresponds to a chain
  \begin{equation*}
(a_{()})\pincluded (a_{(e(0))})\pincluded (a_{(e(0),e(1))})\pincluded (a_{(e(0),e(1),e(2))})\pincluded\cdots,
\end{equation*}
so it must be finite.  Therefore the whole tree is finite, and $a$ is the product of the irreducibles found at the end of each branch.
\end{proof}

\begin{theorem}\label{thm:PID-->UFD}
A PID is a UFD.\hfill\qedsymbol
\end{theorem}

Recall how the Euclidean algorithm for finding greatest common divisors works.  To find $\gcd(201,27)$, compute:
\begin{align*}
201&=87\cdot 2+27,\\
87&=27\cdot3+6,\\
27&=6\cdot 4+3,\\
6&=3\cdot 2.
\end{align*}
So $\gcd(201,27)=3$.  In general, if $a_0\geq a_1>0$, then $\gcd(a_0,a_1)=a_n$, where there is a descending sequence $(a_0,\dots a_n)$ of positive integers such that $a_{k+2}=a_{k+1}\cdot b_k+a_k$ for some $b_k$.
A \textbf{Euclidean domain}\index{Euclidean domain}\index{domain!Euclidean ---} is then an integral domain in which the Euclidean algorithm works.  More precisely, a Euclidean domain is a domain $R$ equipped with a map $\phi$ from $R\setminus\{0\}$ to $\vnn$ such that,
and, for all $a$ and $b$ in $R\setminus\{0\}$, one of the following holds:
\begin{itemize}
\item
there exist $q$ in $R$ and $r$ in $R\setminus\{0\}$ such that $a=qb+r$ and $\phi(r)<\phi(b)$, or
\item
$b\divides a$ and $\phi(b)\leq\phi(a)$.
\end{itemize}

For example:
\begin{asparaenum}
\item
$\Z$ is Euclidean with respect to $x\mapsto\abs x$;
\item
a field, $x\mapsto 0$;
\item
a polynomial-ring $K[X]$ over a field $K$, $f\mapsto\deg f$ (see \S~\ref{sect:fact-pol}).
\end{asparaenum}
  
  The \textbf{Gaussian integers}\index{Gaussian integer} are the elements of $\Z[\tau_{-1}]$, where $\tau_{-1}=\sqrt{-1}=\mi$ as in \S~\ref{sect:ant}.
 This domain is
  Euclidean with respect to the norm function, namely $z\mapsto\abs z^2$, where $\abs{x+y\mi}^2=x^2+y^2$.  Indeed, if $a$ and $b$ are nonzero Gaussian integers, then there is a Gaussian integer $q$ such that $\abs{a/b-q}\leq\sqrt 2/2$.  Let $r=a-bq$; then $\abs r^2=\abs b^2\cdot\abs{a/b-q}^2\leq\abs b^2/2$.

\begin{theorem}
Euclidean domains are \pid s.
\end{theorem}

\begin{proof}
  An ideal of a Euclidean domain is generated by any non-zero element
  $x$ such that $\phi(x)$ is minimal. 
\end{proof}

\section{Localization}

A subset of a ring is \textbf{multiplicative}\index{multiplicative} if it is closed
under multiplication.  For example,
  the complement of a prime ideal is multiplicative.

\begin{lemma}
If $S$ is a multiplicative subset of a ring $R$, then on $R\times S$
there is an equivalence-relation $\sim$ given by
\begin{equation}\label{eqn:q}
  (a,b)\sim (c,d)\iff (ad-bc)\cdot e=0\text{ for some $e$ in }S.
\end{equation}
\end{lemma}

\begin{proof}
Reflexivity and symmetry are obvious.  For transitivity, note that, if $(a,b)\sim(c,d)$ and $(c,d)\sim(e,f)$, so that, for some $g$ and $h$ in $S$,
\begin{align*}
0&=(ad-bc)g=adg-bcg,&0&=(cf-de)h=cfh-deh,
\end{align*}
then 
\begin{equation*}
(af-be)cdgh
=afcdgh-becdgh
=adgcfh-bcgdeh
=bcgcfh-bcgcfh=0,
\end{equation*}
so $(a,b)\sim(e,f)$.
\end{proof}

In the notation of the lemma, the equivalence-class of $(a,b)$ is denoted by
\begin{equation*}
  \frac ab,
\end{equation*}
and the quotient $R\times S\modsim$ is denoted by
\begin{equation*}
  S\inv R.
\end{equation*}
If $R$ is an integral domain, and $0\notin S$, then~\eqref{eqn:q} can be simply
\begin{equation*}
  (a,b)\sim (c,d)\iff ad-bc=0.
\end{equation*}
If $0\in S$, then $S\inv R$ has a unique element.  An instance where $R$ is not an integral domain will be considered in the next section.

\begin{theorem}\label{thm:loc}
  Suppose $R$ is a ring with multiplicative subset $S$.
  \begin{enumerate}
  \item
  In $S\inv R$, if $c\in S$,
  \begin{equation*}
\frac ab=\frac{ac}{bc}.
\end{equation*}
  \item
  $S\inv R$ is a ring
in which the operations are given by
\begin{align*}
  \frac ab\cdot\frac cd&=\frac{ac}{bd},&
  \frac ab\pm\frac cd&=\frac{ad\pm bc}{bd}.  
\end{align*}
  \item
  There is a ring-homomorphism $\phi$ from $R$ to $S\inv R$ where, for every $a$ in $S$,
  \begin{equation*}
\phi(x)=\frac{xa}a.
\end{equation*}
\newcounter{local}
\setcounter{local}{\value{enumi}}
 \end{enumerate}
   Suppose in particular $R$ is an integral
domain and $0\notin S$.  
\begin{enumerate}\setcounter{enumi}{\value{local}}
\item
$S\inv R$ is an integral domain, and the homomorphism $\phi$ is an embedding.
\item
If $S=R\setminus\{0\}$, then $S\inv R$ is a field, and if
If $\psi$ is an embedding of $R$ in a field $K$, then there is an embedding $\tilde{\psi}$ of $S\inv R$ in $K$ such that $\tilde{\psi}\circ\phi=\psi$.
\end{enumerate}
\end{theorem}

In the most important case, $S$ is the complement of a prime ideal
$\primei$, and then $S\inv R$ is called the \textbf{localization}\index{local!---ization} of $R$ at $\primei$,  denoted by
\begin{equation*}
  R_{\primei}.
\end{equation*}
If $R$ is an integral domain, so that $(0)$ is prime, then $R_{(0)}$ (which is a field by the theorem)
is the
\textbf{quotient-field}%
\index{quotient!--- field}\index{field!quotient ---} of $R$. 
A \textbf{local ring}\index{ring!local ---}\index{local!--- ring}
is a ring with a unique 
maximal ideal.  The connection between localizations and local rings is made by the theorem below.

\begin{lemma}
An ideal $\maxi$ of a ring $R$ is a unique maximal ideal of $R$
if and only if $\unit R=R\setminus\maxi$.
\end{lemma}

\begin{theorem}
  The localization of a ring at a prime ideal is a local ring.
\end{theorem}

\begin{proof}
  The ideal generated by the image of $\primei$ in $R_{\primei}$
  consists of those $a/b$ such that $a\in\primei$.  In this case, if
  $c/d=a/b$, then 
  $cb=da\in\primei$, so $c\in\primei$ since $\primei$ is prime.  Hence
  the following are equivalent:
  \begin{enumerate}
    \item
$x/y\notin R_{\primei}\primei$;
\item
$x\notin\primei$;
\item
$x/y$ has an inverse, namely $y/x$.
  \end{enumerate}
By the lemma, we are done.
\end{proof}

\section{Ultraproducts of fields}

Suppose $\family K$ is an indexed family $(K_i\colon i\in A)$ of fields.
If $a\in\prod\family K$, there is an element $a^*$ of $\prod K$ given by
\begin{equation*}
\pi_i(a^*)=\begin{cases}
\pi_i(a)\inv,&\text{ if }\pi_i(a)\neq0,\\
0,&\text{ if }\pi_i(a)=0.
\end{cases}
\end{equation*}
Then
\begin{equation*}
aa^*a=a.
\end{equation*}
Because of this, $\prod\family K$ is an example of a \textbf{regular ring} (in the sense of von Neumann).\footnote{In general, a regular ring need not be commutative; see \cite[IX.3, ex.~5, p.~442]{MR600654}.}

\begin{theorem}
In a regular ring, all prime ideals are maximal.
\end{theorem}

\begin{proof}
Let $R$ be a regular integral domain.  If $a\in R\setminus\{0\}$, then, since
\begin{equation*}
0=aa^*a-a=a(a^*a-1),
\end{equation*}
we have $a^*a=1$.  Thus $R$ is a field.
\end{proof}

\begin{theorem}
If $\primei$ is a prime ideal of a regular ring $R$, then
\begin{equation*}
R/\primei\cong R_{\primei},
\end{equation*}
the isomorphism being $x+\primei\mapsto x/1$.
\end{theorem}

\begin{proof}
If $a\in R$ and $b\in R\setminus\primei$, then $a/b=ab^*/1$ since
\begin{equation*}
(a-bab^*)b=ab-abb^*b=ab-ab=0.
\end{equation*}
Thus the homomorphism $x\mapsto x/1$ guaranteed by Theorem~\ref{thm:loc} is surjective.
We also have $a/1=0/1$ if and only if $ab=0$ for some $b$ in $R\setminus\primei$; but the latter implies $ab\in\primei$, so $a\in \primei$ since the ideal is prime.  Conversely, if $a\in\primei$, then $a^*a\in\primei$, so $a^*a-1\notin\primei$ since the ideal is proper; but $a(a^*a-1)=0$, so $a/1=0/1$.  Therefore the kernel of the homomorphism is $\primei$.
\end{proof}

With $\family K$ as above,
there is a one-to-one correspondence between ideals of $\prod\family K$ and ideals of the Boolean ring $\pow A$.  To define this correspondence, we first define the \textbf{support} of an element $a$ of $\prod\family K$ to be the set of those $i$ in $A$ such that $\pi_i(a)\neq0$.  We may denote this set by $\supp a$.  Then
\begin{align*}
\supp{ab}&=\supp a\cap\supp b,&
\supp{a+b}&\included\supp a\cup\supp b.
\end{align*}
So $x\mapsto\supp x$ is not quite a ring-homomorphism from $\prod\family K$ to $\pow A$.
However,
if $I$ is an ideal of $\prod\family K$, then $\Supp I$ is an ideal of $\pow A$.
Indeed, for every subset $B$ of $A$, there is an element $e_B$ of $\prod\family K$ given by
\begin{equation*}
\pi_i(e_B)=\begin{cases}
1,&\text{ if }i\in B,\\
0,&\text{ if }i\notin B.
\end{cases}
\end{equation*}
Then $\supp{e_B}=B$.
If $a\in\prod\family K$, and $B=\supp a$, then $e_B=aa^*$.  If, further, $a\in I$,
and $C\included B$, then $e_C=e_Caa^*$, so this is in $I$ and therefore $C\in\Supp I$.  Also,
if $B$ and $C$ are in $\Supp I$, then $B\symdiff C=\supp{e_B-e_C}$, which is in $\Supp I$.  So $\Supp I$ is indeed an ideal of $\pow A$.  If $J$ is an ideal of $\pow A$, then $J=\Supp I$, where $I$ is the ideal of $\prod\family K$ generated by those $e_B$ such that $B\in J$. Since every ideal $I$ is generated by those $e_B$ such that $B\in\Supp I$,
we conclude that $\phi$ is the claimed bijection.

Let $\primei$ be a prime ideal of $\prod\family K$.  Then the quotient $\prod\family K/\primei$ is a field, called an \textbf{ultraproduct} of $\family K$.
Now, $\primei$ could be principal, in which case $\phi(\primei)$ would be principal; but since it is also maximal, it would have a set $A\setminus\{i\}$ as a generator.  In this case $\prod\family K/\primei\cong K_i$.

However, $\pow A$ has the ideal $I$ consisting of the the finite subsets of $A$.  If $A$ itself is infinite, then $I$ is a proper ideal.  In this case, if $I\included\Supp{\primei}$, then $\primei$ is not principal, and the field $\prod\family K/\primei$ is called a \textbf{nonprincipal ultraproduct} of $\family K$.  This is a sort of `average' of the $K_i$.  In particular, we have
\begin{align*}
a\equiv b\pmod{\primei}
&\iff a-b\in\primei\\
&\iff \supp{a-b}\in\Supp{\primei}\\
&\iff \{i\in A\colon \pi_i(a)\neq\pi_i(b)\}\in\Supp{\primei}.
\end{align*}
We may think of the elements of $\Supp{\primei}$ as `small' sets; their complements are `large'.  (Then every subset of $A$ is small or large.)  So all finite subsets of $A$ are small, and all cofinite subsets of $A$ are large.  Then elements of $\prod\family K$ represent the same element in the ultraproduct if they agree on a large set.

Say for example $A$ is the set of prime numbers in $\vnn$, along with $0$, and each $K_p$ has characteristic $p$.  Then $\prod\family K/\primei$ has characteristic $0$, since for each prime $p$, the element $p1$ of $\prod\family K$ disagrees with $0$ on a large set.


\section{Factorization of polynomials}\label{sect:fact-pol}

\begin{theorem}
If $R$ is a ring, then $R[X_0,\dots,X_{n-1}]$ is the unique ring-extension $A$ of $R$ such
  that, for all rings $S$, and all homomorphisms $\phi$ from $R$ to
  $S$, and all $\vec a$ in 
  $S^n$, there is a unique homomorphism $\tilde{\phi}$ from $A$ to $S$
  such that $\tilde{\phi}|_R=\phi$ and $\tilde{\phi}(X^i)=a^i$ in each
  case. 
\end{theorem}
An arbitrary element of $R[X]$ can be written
\begin{equation*}
  \sum_{i\leq n}a_iX^i;
\end{equation*}
the \textbf{degree}\index{degree} of this is $n$, if $a_n\neq0$; then
$a_n$ is the 
\textbf{leading coefficient}\index{leading coefficient} of the polynomial. 

We said in \S~\ref{sect:int-dom} that $K[X]$ is a Euclidean domain when equipped with
$\deg$.  More generally:

\begin{lemma}
If $f$ and $g$ are polynomials over $R$, then:
\begin{itemize}
  \item
$\deg (f+g)\leq\max(\deg f, \deg g)$;
\item
$\deg (f\cdot g)\leq \deg f+\deg g$, with equality if the product of
  the leading coefficients is not $0$.
\end{itemize}
In particular,
if $R$ is an integral domain, then so is $R[X]$.
\end{lemma}

\begin{proof}
  The leading coefficient of a product is the product of the leading
  coefficients. 
\end{proof}

\begin{lemma}[Division Algorithm]\index{Division Algorithm}
  \index{theorem!Division Algorithm} \index{algorithm!Division A---}
  If $f$ and $g$ are polynomials in $X$ over $R$, and the leading coefficient
  of $g$ is $1$, then
  \begin{equation*}
    f=qg+r
  \end{equation*}
for some unique $q$ and $r$ in $R[X]$ such that $\deg r<\deg g$.
\end{lemma}

\begin{proof}
  If $\deg g\leq \deg f$, and $a$ is the leading coefficient of $f$,
  then
  \begin{equation*}
    f=aX^{\deg f-\deg g}\cdot g + (f-aX^{\deg f-\deg g}\cdot g), 
  \end{equation*}
the second term having degree less than $f$.  Continue as necessary. 
\end{proof}

\begin{lemma}[Remainder Theorem]\index{Remainder Theorem}
  \index{theorem!Remainder Th---}
If $c\in R$, then any $f$ in $R[X]$ can be written uniquely as
$q(X)\cdot (X-c)+f(c)$.  
\end{lemma}

\begin{proof}
By the Division Algorithm,
  $f=q(X)\cdot (X-c)+d$ for some $d$ in $R$; letting $X$ be $c$ yields
  the claim.
\end{proof}

\begin{theorem}
A ring-element  $c$ is a zero of a polynomial $f$ if and only if
$(X-c)\divides f$.  If $f$ is over an integral domain, then the number
of its distinct zeros is at most $\deg f$.
\end{theorem}

\begin{proof}
By the Remainder Theorem, $c$ is a zero of $f$ if and only if $f=q(X)\cdot(X-c)$ for some $q$.  In this case, if the ring is an integral domain, and $d$ is another zero of $f$, then, since $d-c\neq0$, we must have that $d$ is a zero of $q$.  Hence,
if $\deg(f)=n$, and $f$ has the distinct zeros $r_0$, \dots, $r_{n-1}$, then repeated application of the Remainder Theorem yields
\begin{equation*}
f=(X-r_0)\dotsm(X-r_{m-1}).
\end{equation*}
Then every zero of $f$ is a zero of one of the $X-r_k$, so it must be $r_k$.
\end{proof}

Recall however from the proof of Theorem~\ref{thm:Boole} that every element of a Boolean ring is a zero of $X(1+X)$, that is, $X+X^2$; but some Boolean rings have more than two elemments.  In $\Zmod 6$, the same polynomial has the zeros $0$, $2$, $3$, and $5$.

\begin{theorem}
  If $K$ is a field, then $K[X]$ is a Euclidean domain whose units are
  precisely the elements of $K$.
\end{theorem}

\begin{proof}
Over a field, the Division Algorithm does not require the leading coefficient of the divisor to be $1$.
\end{proof}

A zero $c$ of a polynomial over an integral domain has \textbf{multiplicity} $m$ if the polynomial can be written as $g(X)\cdot(X-c)^m$, where $c$ is not a zero of $g$.  A zero with multiplicity greater than $1$ is \textbf{multiple.}
Derivations were defined in \S~\ref{sect:nna-rings}; they will be useful for recognizing the existence of multiple roots.

\begin{lemma}
  If $\delta$ is a derivation of a ring $R$, then for all $x$ in $R$ and $n$ in $\vnn$,
  \begin{equation*}  
   \delta(x^n)=nx^{n-1}\delta(x).
   \end{equation*}
\end{lemma}

\begin{proof}
Since $\delta(1)=\delta(1\cdot1)=\delta(1)\cdot 1+1\cdot\delta(1)=2\cdot\delta(1)$, we have $\delta(1)=0$, so the claim holds when $n=0$.  If it holds when $n=k$, then
\begin{equation*}
\delta(x^{k+1})=\delta(x)x^k+x\delta(x^k)=\delta(x)x^k+kx^k\delta(x)=(k+1)x^k\delta(x),
\end{equation*}
so the claim holds when $n=k+1$.
\end{proof}

\begin{theorem}
  On a polynomial ring $R[X]$, there is a
  unique derivation $f\mapsto
  f'$ such that 
  \begin{enumerate}
  \item
  $X'=1$,
  \item
   $c'=0$ for all $c$ in $R$.
   \end{enumerate}
   This derivation is given by
   \begin{equation}\label{eqn:der}
\Bigl(\sum_{k=0}^na_kX^k\Bigr)'=
\sum_{k=0}^{n-1}(k+1)a_{k+1}X^k.
\end{equation}
\end{theorem}

\begin{proof}
Uniqueness and~\eqref{eqn:der} follow from the lemma and the definition of a derivation.
  If $\delta$ is a derivation, then $\delta(x\cdot
  (y+z))=\delta(xy+xz)$.  
  Also,~\eqref{eqn:der} does define an endomorphism of the underlying group of $R[X]$ that meets the given conditions.  This endomorphism is a derivation, because
  \begin{equation*}
(X^k)'(X^{\ell})+X^k(X^{\ell})'
=kX^{k-1}X^{\ell}+\ell X^kX^{\ell-1}
=(k+\ell)X^{k+\ell+1}
=(X^{k+\ell})'.\qedhere
\end{equation*}
\end{proof}

In the notation of the theorem, $f'$ is the \textbf{derivative} of $f$.

\begin{lemma}
  Say $R$ is an integral domain, $f\in R[X]$ and $f(c)=0$.  Then $c$
  is a multiple zero of $f$ if and only if $f'(c)=0$.
\end{lemma}

\begin{proof}
  Write $f$ as $(X-c)^m\cdot g$, where $g(c)\neq0$.  Then $m\geq1$, so 
  \begin{equation*}
    f'=m(X-c)^{m-1}\cdot g+(X-c)^m\cdot g'.
  \end{equation*}
If $m>1$, then $f'(c)=0$.  If $f'(c)=0$, then $m\cdot 0^{m-1}\cdot
g(c)=0$, so $m>1$.
\end{proof}

If $L$ is a field with subfield $K$, then a polynomial over $K$ may be irreducible over $K$, but not over $L$.  For example, $X^2+1$ is irreducible over $\R$, but not over $\C$.  Likewise, the polynomial may have zeros from $L$, but not $K$.  Hence it makes sense to speak of zeros of an irreducible polynomial.

\begin{theorem}
Supppose $K$ is a field and $f\in K[X]$.  
  \begin{enumerate}
  \item
  If $\gcd(f,f')=1$, then $f$ has
  no multiple zeros.
  \item
  If $f$ is irreducible, then $\gcd(f,f')$ is $1$ or $0$.
  \item
  If $\gcd(f,f')=0$, then $K$ has a positive characteristic $p$, and $f=g(X^p)$ for some polynomial $g$ over $K$.
  \end{enumerate}
\end{theorem}

\begin{proof}
If $\gcd(f,f')=1$, then $1=g\cdot f+h\cdot f'$ for some polynomials $g$ and $h$, so $f$
  and $f'$ can have no common zero.  Since $\deg(f')<\deg(f)$ by~\eqref{eqn:der}, if $f$ is irreducible and $\gcd(f,f')\neq1$, then $\gcd(f,f')=0$.  The rest also follows from~\eqref{eqn:der}.
\end{proof}

A polynomial over a UFD is \textbf{primitive} if $1$ is a greatest common divisor of its coefficients.

\begin{lemma}[Gauss]
The product of primitive polynomials is primitive.
\end{lemma}

\begin{proof}
Let $f=\sum_{k=0}^ma_kX^k$ and $g=\sum_{k=0}^nb_kX^k$.  Then $fg=\sum_{k=0}^{mn}c_kX^k$, where
\begin{equation*}
c_k=\sum_{i+j=k}a_ib_j=a_0b_k+a_1b_{k-1}+\dotsb+a_kb_0.
\end{equation*}
Suppose the $c_k$ have a common prime factor $\pi$, but $f$ is primitive.  There is some $\ell$ such that $\pi\divides a_i$ when $i<\ell$, but $\pi\ndivides a_{\ell}$.  Since $\pi\divides c_{\ell}$, we have $\pi\divides b_0$; then, since $\pi\divides e_{\ell+1}$, we have $\pi\divides b_1$, and so on.  So $g$ is not primitive.
\end{proof}

Henceforth let $R$ be a UFD with quotient field $K$.


\begin{lemma}
Primitive polynomials over $R$ that are associated over $K$ are associated over~$R$.
\end{lemma}

\begin{proof}
If $f$ and $g$ are polynomials defined over $R$, but associated over $K$, then they must have the same degree, and so we have $af=bg$ for some $a$ and $b$ in $R$.  If $f$ and $g$ are primitive, then $a$ and $b$ must be associates, so $b=ua$ for some unit in $R$, and then $f=ug$, so $f$ and $g$ are associates.
\end{proof}

\begin{lemma}
Primitive polynomials over $R$ are irreducible over $R$ if and only if irreducible over~$K$. 
\end{lemma}

\begin{proof}
Say $f$ and $g$ are defined over $K$, but $fg$ is over $R$ and primitive.  Then $af$ and $bg$ are over $R$ and primitive for some $a$ and $b$ in $R$.  By a previous lemmma, $abfg$ is primitive; but so is $fg$, so $ab$ must be a unit in $R$.  Hence $a$ and $b$ are units in $R$, so $f$ and $g$ are over $R$.  Since units of $R[X]$ are units of $K[X]$, it follows that a primitive polynomial irreducible polynomial over $R$ is still irreducible over $K$.  Also, any non-unit factor of a \emph{primitive} polynomial over $R$ is still not a unit over $K$, so the polynomial is reducible over $K$.
\end{proof}

Note however that if $f$ is primitive and irreducible over $R$, and $a$ in $R$ is not a unit or $0$, then $af$ is still irreducible over $K$ (since $a$ is a unit in $K$) but not over $R$.

\begin{theorem}
$R[X]$ is a UFD.
\end{theorem}

\begin{proof}
Every element of $R[X]$ can be written as $af$, where $a\in R$ and $f$ is primitive.  Then $f$ has a prime factorization over $K$ (since $K[X]$ is a Euclidean domain): say $f=f_0\dotsm f_{n-1}$.  There are $b_k$ in $R$ such that $a_kf_k$ is a primitive polynomial over $R$.  The product of these is still primitive, so the product of the $a_k$ must be a unit in $R$, hence each $a_k$ is a unit in $R$.  Thus $f$ has an irreducible factorization over $R$.  Its uniqueness follows from its uniqueness over $K$ and the next-to-last lemma.
\end{proof}

\begin{theorem}[Eisenstein's Criterion]
If $f$ is a polynomial $\sum_{k=0}^na_kX^k$ over $R$, and $\pi$ is an irreducible element of $R$ such that
\begin{align*}
\pi^2&\ndivides a_0,&
\pi&\divides a_0,&
\pi&\divides a_1,&
&\dots,&
\pi&\divides a_{n-1},&
\pi\ndivides a_n,
\end{align*}
then $f$ is irreducible over $K$ and, if primitive, over $R$.
\end{theorem}

\begin{proof}
Suppose $f=gh$, where $g=\sum_{k=0}^nb_kX^k$ and $h=\sum_{k=0}^nc_kX^k$, all coefficients from $R$ (and some being $0$).  We may assume $f$ is primitive, so $g$ and $h$ must be primitive.  We may assume $\pi$ divides $b_0$, but not $c_0$.  Let $\ell$ be such that $\pi\divides b_k$ when $k<\ell$.  If $\ell=n$, then (since $g$ is primitive) we must have $b_n\neq0$, so $\deg g=n$, and $h=c_0$ and is a unit.  If $\ell<n$, then, since $\pi\divides a_{\ell}$, but
\begin{equation*}
a_{\ell}=b_0c_{\ell}+b_1c_{\ell-1}+\dotsb+b_{\ell}c_0,
\end{equation*}
we have $\pi\divides b_{\ell}$.  By induction, $\pi\divides b_k$ whenever $k<n$, so as before $\deg g=n$.
\end{proof}

An application is the following.

\begin{theorem}
If $p$ is prime, then $\sum_{k=0}^{p-1}X^k$ is irreducible.
\end{theorem}

\begin{proof}
Consider
\begin{equation*}
\sum_{k=0}^{p-1}(X+1)^k
=\sum_{k=0}^{p-1}\sum_{j=0}^k\binom kjX^j
=\sum_{j=0}^{p-1}X^j\sum_{k=j}^{p-1}\binom kj
=\sum_{j=0}^{p-1}X^j\binom p{j+1},
\end{equation*}
which meets the Eisenstein Criterion since
\begin{align*}
\binom p1&=p,&
\binom p{j+1}&=\frac{p!}{(p-j-1)!(j+1)!},
\end{align*}
which is divisible by $p$ if and only if $j<p-1$.
\end{proof}


\newpage
%\part{Extras}%{Appendices}

\part*{Appendices}

\appendix

\section{The German script}\label{app:German}

Writing in 1993, Wilfrid Hodges \cite[Ch.~1, p.~21]{MR94e:03002} observes
\begin{quotation}
  Until about a dozen years ago, most model theorists named structures
  in horrible Fraktur lettering.  Recent writers sometimes adopt a
  notation according to which all structures are named $M$, $M'$,
  $M^*$, $\bar M$, $M_0$, $M_i$ or occasionally $N$.  
%I hope I cause no offence by using a more freewheeling notation.
\end{quotation}
For Hodges, structures are $A$, $B$, $C$, and so forth; he refers to
their universes as
\textbf{domains}\index{domains}
and denotes these by $\operatorname{dom}(A)$ and so forth.  
%\begin{comment}
This practice is
convenient if one is using a typewriter (as in the preparation of
another of Hodges's books \cite{Hodges-Building}, from 1985).
In 2002, David Marker \cite{MR1924282} uses `calligraphic' letters for
structures, so that $M$ is the universe of~$\mathcal M$.
%\end{comment}
I still prefer the Fraktur letters:
%In \AmS\ \LaTeX\ (by which these notes are typeset) these letters are:
\begin{align*}
&\begin{array}{ccccccccc}
\mathfrak A&\mathfrak B&\mathfrak C&\mathfrak D&\mathfrak E&\mathfrak F&\mathfrak G&\mathfrak H&\mathfrak I\\\mathfrak J&\mathfrak K&\mathfrak L&\mathfrak M&\mathfrak N&\mathfrak O&\mathfrak P&\mathfrak Q&\mathfrak R\\\mathfrak S&\mathfrak T&\mathfrak U&\mathfrak V&\mathfrak W&\mathfrak X&\mathfrak Y&\mathfrak Z&
  \end{array}
&
&\begin{array}{ccccccccc}
\mathfrak a&\mathfrak b&\mathfrak c&\mathfrak d&\mathfrak e&\mathfrak f&\mathfrak g&\mathfrak h&\mathfrak i\\\mathfrak j&\mathfrak k&\mathfrak l&\mathfrak m&\mathfrak n&\mathfrak o&\mathfrak p&\mathfrak q&\mathfrak r\\\mathfrak s&\mathfrak t&\mathfrak u&\mathfrak v&\mathfrak w&\mathfrak x&\mathfrak y&\mathfrak z&
  \end{array}
\end{align*}
A way to write these by hand is seen in a textbook
of German from 1931 \cite{German}:
%\vfill

\begin{center}
\includegraphics[width=417pt,height=292pt]{german-script-cropped.eps}
\end{center}
%\vfill\vfill

\section{Group-actions}\label{App:ga}
The following is partially inspired by an expository article
\cite{MR1997347} by Serre.
Suppose a group $G$ acts on a set $\setactedon$ by $(g,x)\mapsto gx$.
Just as, for an
element $a$ of $\setactedon$, we define
\begin{equation*}
  G_a=\{g\in G\colon ga=a\},
\end{equation*}
so, for an element $g$ of $G$, we may define
\begin{equation*}
  \setactedon^g=\{x\in \setactedon\colon gx=x\}:
\end{equation*}
this is the set of \textbf{fixed points}\index{fixed point} of $g$.
The orbit of $a$ under the action of $G$ is defined by
\begin{equation*}
  Ga=\{ga\setcolon g\in G\}.
\end{equation*}
Then $ga=ha\iff gG_a=hG_a$, and therefore
\begin{equation*}
  \size{Ga}=[G:G_a],
\end{equation*}
and the sets $Ga$ partition $G$.  We may define
\begin{equation*}
  {\setactedon}/G=\{Gx\setcolon x\in {\setactedon}\}.
\end{equation*}
Assume $G$ is finite.
For any function $\phi$ from $G$ to $\R$ and subset $X$ of $G$, we
define 
\begin{align*}
  \int_X\phi&=\sum_{g\in X}\frac{\phi(g)}{\size
  G},&
\int\phi&=\int_G\phi. 
\end{align*}
Assume ${\setactedon}$ is also finite, and
let $\chi$ be the function
\begin{equation*}
  g\mapsto\size{{\setactedon}^g}
\end{equation*}
from $G$ to $\vnn$.

\begin{lemma}[Burnside]\index{Burnside Lemma} \index{theorem!Burnside
    Lemma} 
  $\size{{\setactedon}/G}=\int\chi$.
\end{lemma}

\begin{proof}
Letting $R=\{(g,x)\in G\times\setactedon\colon gx=x\}$, we define $\pi_G$ as
$(g,x)\mapsto g$ from $R$ to $G$, and $\pi_{\setactedon}$ as
$(g,x)\mapsto x$ from $R$ to $\setactedon$.
Then
\begin{equation*}
  \size R=\sum_{g\in G}\size{\pi_G{}\inv(g)}=\sum_{g\in G}\chi(g),
\end{equation*}
but also
\begin{equation*}
\size{R}
=\sum_{x\in {\setactedon}}\size{G_x}
= \sum_{C\in {\setactedon}/G}\sum_{x\in C}\size{G_x}.
\end{equation*}
But if $C\in {\setactedon}/G$ and $a\in C$, then $C=[G:G_a]$.  Hence
\begin{equation*}
 \sum_{C\in {\setactedon}/G}\sum_{x\in C}\size{G_x}
=  \sum_{C\in {\setactedon}/G}\sum_{x\in C}\frac{\size G}{\size C}
=  \sum_{C\in {\setactedon}/G}\size G
=\size{{\setactedon}/G}\cdot\size G.\qedhere
\end{equation*}
\end{proof}
Now define
\begin{equation*}
  G_0=\{g\in G\setcolon {\setactedon}^g=\emptyset\},
\end{equation*}
the set of elements of $G$ with no fixed points.

\begin{theorem}[Jordan]\index{Jordan Theorem} \index{theorem!Jordan Th---}
  If $\size{{\setactedon}/G}=1$ and $\size {\setactedon}\geq 2$, then 
  \begin{equation*}
      G_0\neq\emptyset.
  \end{equation*}
\end{theorem}

\begin{proof}
  By the Burnside Lemma, the average size of ${\setactedon}^g$ is $1$.  Since
  ${\setactedon}^1={\setactedon}$, and $\size {\setactedon}\geq 2$, we must have $\size {\setactedon}^g<1$ for some
  $g$ in $G$.
\end{proof}

A stronger result is the following:

\begin{theorem}[Cameron--Cohen]\index{Cameron--Cohen Theorem}
  If $\size{{\setactedon}/G}=1$ and $\size {\setactedon}\geq 2$, then 
  \begin{equation*}
    \size{G_0}\cdot\size
  {\setactedon}\geq\size G.
  \end{equation*}
\end{theorem}

\begin{proof}
  The action of $G$ on ${\setactedon}$ induces an action on ${\setactedon}\times {\setactedon}$, and
  $\size{({\setactedon}\times {\setactedon})^g}=\chi(g)^2$.  Now, $({\setactedon}\times {\setactedon})/G$ contains the
  diagonal $G(1,1)$ and at least one other element, so
  \begin{equation*}
    \int\chi^2\geq 2
  \end{equation*}
by Burnside's Lemma.  Let $n=\size {\setactedon}$.  Then for all $g$
in $G\setminus G_0$, we have $1\leq\chi(g)\leq n$ and therefore
\begin{equation*}
(\chi(g)-1)(\chi(g)-n)\leq0;
\end{equation*}
but $(\chi(g)-1)(\chi(g)-n)=n$ when $g\in G_0$.
Consequently,
\begin{equation*}
  \frac{\size{G_0}\cdot\size {\setactedon}}{\size G}=n\int_{G_0}1=
  \int_{G_0}(\chi-1)(\chi-n) \geq  \int_G(\chi-1)(\chi-n)=
  \int_G(\chi^2-1)\geq1.\qedhere 
\end{equation*}
\end{proof}

Serre's article gives applications to topology and number-theory.

\newpage


%\backmatter

\part*{}

%\bibliographystyle{plain}
%\bibliography{../../../../../TeX/references}

\def\cprime{$'$}
\begin{thebibliography}{10}

\bibitem{MR0159773}
Richard Dedekind.
\newblock {\em Essays on the theory of numbers. {I}: {C}ontinuity and
  irrational numbers. {II}: {T}he nature and meaning of numbers}.
\newblock authorized translation by Wooster Woodruff Beman. Dover Publications
  Inc., New York, 1963.

\bibitem{MR1487370}
Joel~David Hamkins.
\newblock Every group has a terminating transfinite automorphism tower.
\newblock {\em Proc. Amer. Math. Soc.}, 126(11):3223--3226, 1998.

\bibitem{German}
Roe-Merrill~S. Heffner.
\newblock {\em Brief {G}erman Grammar}.
\newblock D. C. Heath and Company, Boston, 1931.

\bibitem{MR94e:03002}
Wilfrid Hodges.
\newblock {\em Model theory}, volume~42 of {\em Encyclopedia of Mathematics and
  its Applications}.
\newblock Cambridge University Press, Cambridge, 1993.

\bibitem{Hodges-Building}
Wilfrid Hodges.
\newblock {\em Building models by games}.
\newblock Dover Publications, Mineola, New York, 2006.
\newblock original publication, 1985.

\bibitem{MR600654}
Thomas~W. Hungerford.
\newblock {\em Algebra}, volume~73 of {\em Graduate Texts in Mathematics}.
\newblock Springer-Verlag, New York, 1980.
\newblock Reprint of the 1974 original.

\bibitem{MR0472307}
Morris Kline.
\newblock {\em Mathematical thought from ancient to modern times}.
\newblock Oxford University Press, New York, 1972.

\bibitem{MR12:397m}
Edmund Landau.
\newblock {\em Foundations of Analysis. {T}he Arithmetic of Whole, Rational,
  Irrational and Complex Numbers}.
\newblock Chelsea Publishing Company, New York, N.Y., third edition, 1966.
\newblock translated by F. Steinhardt; first edition 1951; first German
  publication, 1929.

\bibitem{MR1924282}
David Marker.
\newblock {\em Model theory: an introduction}, volume 217 of {\em Graduate
  Texts in Mathematics}.
\newblock Springer-Verlag, New York, 2002.

\bibitem{MR0098777}
James~H. McKay.
\newblock Another proof of {C}auchy's group theorem.
\newblock {\em Amer. Math. Monthly}, 66:119, 1959.

\bibitem{Peano}
Giuseppe Peano.
\newblock The principles of arithmetic, presented by a new method (1889).
\newblock In Jean van Heijenoort, editor, {\em From {F}rege to {G}{\"o}del},
  pages 83--97. Harvard University Press, 1976.

\bibitem{MR1997347}
Jean-Pierre Serre.
\newblock On a theorem of {J}ordan.
\newblock {\em Bull. Amer. Math. Soc. (N.S.)}, 40(4):429--440 (electronic),
  2003.

\bibitem{MR801316}
Simon Thomas.
\newblock The automorphism tower problem.
\newblock {\em Proc. Amer. Math. Soc.}, 95(2):166--168, 1985.

\bibitem{von-Neumann}
John von Neumann.
\newblock On the introduction of transfinite numbers (1923).
\newblock In Jean van Heijenoort, editor, {\em From {F}rege to {G}{\"o}del},
  pages 346--354. Harvard University Press, 1976.

\end{thebibliography}


\printindex

\end{document}