\documentclass[%
version=last,%
a5paper,
11pt,%
headings=small,%
bibliography=totoc,%
index=totoc,%
twoside,%
reqno,%
cleardoublepage=empty,%
open=any,%
%parskip=half,%
draft=true,%
%DIV=classic,%
DIV=12,%
%headinclude=true,%
pagesize]
{scrbook}

%\usepackage[notref,notcite]{showkeys}

%\usepackage[headsepline]{scrpage2}
\usepackage{scrpage2}
\pagestyle{scrheadings}
\clearscrheadings
\ofoot{\pagemark}
\ifoot{\headmark}

\usepackage{pstricks}
\renewcommand{\captionformat}{\ }

\usepackage{cclicenses}
\usepackage{url}

%%%%%%%%%%%%%% TIME %%%%%%%%%%%%%%%

\usepackage{calc}
\newcounter{ampm}\newcounter{hours}\newcounter{minutes}
\newcommand{\printtime}{%
\setcounter{ampm}{\time/720}%
\setcounter{hours}{(\time-\value{ampm}*720)/60}%
\setcounter{minutes}{\time-\value{ampm}*720-\value{hours}*60}%
\ifthenelse{\value{ampm}=0}%
           {\ifthenelse{\value{minutes}>9}%
                       {\thehours:\theminutes~a.m.}%
                       {\thehours:0\theminutes~a.m.}}% 
           {\ifthenelse{\value{minutes}>9}%
                       {\thehours:\theminutes~p.m.}%
                       {\thehours:0\theminutes~p.m.}}}	% 
                    % code adapted from the
                                % LaTeX Companion (2d ed), p. 871  

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\usepackage{relsize} % Here \smaller scales by 1/1.2; \relscale{X} scales by X

\renewenvironment{quote}{\begin{list}{}
{\relscale{.90}\setlength{\leftmargin}{0.05\textwidth}
\setlength{\rightmargin}{\leftmargin}}
\item[]}
{\end{list}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\usepackage[polutonikogreek,english]{babel}

% From the Greek Text Society

%\usepackage{gfsneohellenic}
\usepackage{gfsporson}

\newcommand{\gk}[1]{\foreignlanguage{polutonikogreek}{%
%\relscale{0.8}
\textporson{%
#1}}%
}%  Greek text

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\bce}{\textsc{b.c.e.}}
\newcommand{\ce}{\textsc{c.e.}}

\usepackage{amsmath,amssymb,amsthm,amscd}
\allowdisplaybreaks
\usepackage[mathscr]{euscript}
\usepackage{upgreek}
\usepackage{multicol}
\usepackage{stmaryrd}  % \triangle{left,right}eqslant
\usepackage[matrix,arrow]{xy}
\usepackage{hfoldsty}
\usepackage{verbatim}
\usepackage[neverdecrease]{paralist}
\usepackage{graphicx,rotating} % for the German script picture

%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  Theorems
%
%%%%%%%%%%%%%%%%%%%%%%%%

%\swapnumbers

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem{porism}{Porism}
\newtheorem{corollary}{Corollary}

\numberwithin{porism}{theorem}
\numberwithin{corollary}{theorem}

\theoremstyle{definition}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{example}[theorem]{Example}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\usepackage{makeidx}
\makeindex

\newcommand{\zfc}{\mathrm{ZFC}}
\newcommand{\zf}{\mathrm{ZF}}
\newcommand{\lto}{\Rightarrow}
\newcommand{\liff}{\Leftrightarrow}
%\renewcommand{\land}{\mathrel{\&}}


\usepackage{bm}
\newcommand{\tuple}[1]{\bm{#1}}

\newcommand{\included}{\subseteq}      % [the name suggests the meaning here]
\newcommand{\nincluded}{\nsubseteq}      % [the name suggests the meaning here]
\newcommand{\pincluded}{\subset}      % [the name suggests the meaning here]
\renewcommand{\leq}{\leqslant}
\renewcommand{\geq}{\geqslant}
\renewcommand{\nleq}{\nleqslant}
\renewcommand{\emptyset}{\varnothing}
\renewcommand{\setminus}{\smallsetminus}
\let\standardphi\phi
\renewcommand{\phi}{\varphi}
\let\standardepsilon\epsilon
\renewcommand{\epsilon}{\varepsilon}

\newcommand{\stnd}[1]{\mathbb{#1}}
\newcommand{\N}{\stnd{N}}
\newcommand{\Z}{\stnd{Z}}         % integers
\newcommand{\Q}{\stnd{Q}}         % rationals
\newcommand{\Qp}{\Q^+}         % positive rationals
\newcommand{\rc}[1]{#1^{\mathrm{rc}}}         % real closure
\newcommand{\C}{\stnd{C}}         % complex numbers
\newcommand{\R}{\stnd{R}}         % real numbers
\newcommand{\Rp}{\R^+}         % positive real numbers
\newcommand{\diff}[1][\R]{\textrm C_{\infty}(#1)}
\newcommand{\F}{\stnd{F}}         % 
\newcommand{\Ham}{\stnd{H}}         % quaternions 
\newcommand{\Oct}{\stnd{O}}         % octonions
%\newcommand{\primes}{\stnd{P}}      % prime numbers
\newcommand{\on}{\mathbf{ON}}       % ordinals
\newcommand{\cn}{\mathbf{CN}}       % cardinals
\DeclareMathOperator{\id}{id}          % identity-map
%\newcommand{\id}{\operatorname{id}}          % identity-map
\newcommand{\gid}{\mathrm e}  % identity of group
\newcommand{\bv}{\mathbf e}   % "basis vector"  
\newcommand{\mE}{\mathrm E}   % matrix with these as rows
\newcommand{\inv}{^{-1}}                % mult. inverse

\newcommand{\It}{\mathbf{I}}

\newcommand{\Mat}[2][n\times n]{\operatorname M_{#1}(#2)}
\newcommand{\MatR}[1][n\times n]{\Mat[#1]{R}}
\newcommand{\MatZ}[1][n\times n]{\Mat[#1]{\Z}}
\newcommand{\GL}[2][n]{\operatorname{GL}_{#1}(#2)}
\newcommand{\GLZ}[1][n]{\GL[#1]{\Z}}
\newcommand{\GLR}[1][n]{\GL[#1]{R}}
\newcommand{\IM}{\mathrm I}
\newcommand{\Kfg}{\mathrm V_4}     % Klein four group
\newcommand{\quat}{\mathrm Q_8}  % Quaternion group

\newcommand{\str}[1]{\mathfrak{#1}}     % structure
\newcommand{\qsep}{\;}                 % follows a quantified variable
\newcommand{\Forall}[1]{\forall{#1}\qsep }
\newcommand{\Exists}[1]{\exists{#1}\qsep }
\newcommand{\modsim}{/\mathord{\sim}}  % modulo the eq-ren \sim
\newcommand{\eqc}[1]{[#1]}             % equivalence-class

\newcommand{\divides}{\mathrel{|}}
\newcommand{\ndivides}{\mathrel{\nmid}}
\newcommand{\order}[1]{\lvert#1\rvert}
\newcommand{\gpgen}[1]{\langle#1\rangle}% subgroup generated by #1

\newcommand{\trivgp}{\{\gid\}}  % trivial group
\newcommand{\nsubgen}[1]{\langle\langle#1\rangle\rangle}% normal subgroup generated by #1
\newcommand{\unordered}[2]{[#2]^{#1}}  % unordered #1-tuples from #2
\newcommand{\free}[1]{\operatorname{F}(#1)}  % free group on #1
\newcommand{\fggen}{I}  % generating set of a free group
\newcommand{\gprels}{B} % relations
\newcommand{\setactedon}{A}  % set acted on by a group

\newcommand{\setcolon}{\colon}

\newcommand{\subgp}{<}              % subgroup
\newcommand{\nsubgp}{\vartriangleleft}  % normal subgroup
\newcommand{\nnsubgp}{\ntriangleleft}  % not a normal subgroup
\newcommand{\nsupgp}{\vartriangleright}  % normal supergroup
%\newcommand{\nnsupgp}{\ntriangleright}  % normal supergroup
\newcommand{\psubgp}{\lneqq}
\newcommand{\psupgp}{\gneqq}

\newcommand{\Ker}[1]{\ker(#1)}
%\DeclareMathOperator{\im}{im}          % image
\newcommand{\im}[1]{\operatorname{im}(#1)}

\newcommand{\congruence}{\equiv}
\newcommand{\siml}{\congruence_{\ell}^H}
\newcommand{\simr}{\congruence_{\mathrm r}^H}

\newcommand{\weakprod}{\sideset{}{^{\mathrm{w}}}\prod}
\newcommand{\textweakprod}{\prod^{\mathrm w}}
\newcommand{\freeprod}{\sideset{}{^*}\prod}
\newcommand{\textfreeprod}{\prod^*}
\newcommand{\gpres}[2]{\gpgen{#1\mid#2}}% group on #1 with rel'ns #2
\newcommand{\centr}[1]{\operatorname{C}(#1)}  % center
\newcommand{\cseries}[2]{\operatorname{C}_{#1}(#2)} % central series
%\newcommand{\cseriesplain}[1]{\operatorname{C}_{#1}} % central series
\newcommand{\centralizer}[2]{\operatorname{C}_{#2}(#1)} % centralizer
\newcommand{\normalizer}[2]{\operatorname{N}_{#2}(#1)}
\newcommand{\dsubgp}[2]{{#2}^{(#1)}}  % n-th derived subgroup of #2, where n=#1.
\newcommand{\tsubgp}[1]{#1_{\mathrm{t}}} % torsion sub-group


\newcommand{\family}[1]{\mathcal{#1}}  % family (of sets)
\newcommand{\class}[1]{\mathbf{#1}}    % class

\newcommand{\units}[1]{{#1}^{\times}}    % group of units of a ring
\newcommand{\Zmod}[1]{\Z_{#1}}
\newcommand{\Zmodu}[1]{\units{\Zmod{#1}}}
\DeclareMathOperator{\lcm}{lcm}
\newcommand{\rest}[1]{\restriction{#1}}% restriction of function to #1
\newcommand{\modulo}{\emph{modulo}}

\newcommand{\bracket}{\operatorname b}  % (Lie) bracket

% Concerning permutations:

\newenvironment{cycle}{(}{)}
\newcommand{\cdiv}{\;\,} % space between terms in a cycle
\newcommand{\sgn}{\operatorname{sgn}}
\newcommand{\sq}[2][\sigma]{q_{#1}(#2)}  % used to define sgn

\newcommand{\Sym}[1]{\operatorname{Sym}(#1)}
\newcommand{\Alt}[1]{\operatorname{Alt}(#1)}       % alternating group
%\newcommand{\Dih}[1]{D_{#1}}       % dihedral group
\newcommand{\Dih}[1]{\operatorname{Dih}(#1)}       % dihedral group

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\mi}{\mspace{2mu}\mathrm i\mspace{2mu}}
\newcommand{\mj}{\mathrm j\mspace{2mu}}
\newcommand{\mk}{\mathrm k}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%\newcommand{\setimb}[1]{[#1]}   % image of a set, using brackets
\newcommand{\abs}[1]{\left\lvert#1\right\rvert}  % absolute value
\newcommand{\size}[1]{\lvert#1\rvert}  % cardinality

\newcommand{\so}[1]{\operatorname{E}(#1)}
\newcommand{\End}[1]{\operatorname{End}(#1)}
\newcommand{\Aut}[1]{\operatorname{Aut}(#1)}
\newcommand{\Hom}[1]{\operatorname{Hom}(#1)}
\newcommand{\Inn}[1]{\operatorname{Inn}(#1)}
\newcommand{\Der}[1]{\operatorname{Der}(#1)}

\newcommand{\pid}{\textsc{pid}}
\newcommand{\ufd}{\textsc{ufd}}
\newcommand{\ed}{\textsc{ed}}

%\newcommand{\pid}{PID}
%\newcommand{\Pid}{PID}
%\newcommand{\ufd}{UFD}
%\newcommand{\ed}{ED}

\newcommand{\primei}{\mathfrak{p}}      % a prime ideal
\newcommand{\maxi}{\mathfrak{m}}        % a maximal ideal
\newcommand{\supp}[1]{\operatorname{supp}(#1)}
\newcommand{\Supp}[1]{\operatorname{supp}[#1]}
\newcommand{\symdiff}{\mathbin{\vartriangle}}
\newcommand{\spec}[1]{\operatorname{Spec}(#1)}

%\newcommand{\lang}{\mathcal{L}}        % a language or signature

\usepackage{mathrsfs}
\newcommand{\pow}[1]{\mathscr{P}(#1)}  % power set
\let\oldsqrt\sqrt
\renewcommand{\sqrt}[2][1]{\oldsqrt{\vphantom{#1}}{#2}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%\renewcommand{\theequation}{\roman{equation}}
%\renewcommand{\theequation}{\fnsymbol{equation}}

\renewcommand{\thepart}{\Roman{part}}

\begin{document}
\title{Groups and Rings}
\author{David Pierce}
\date{%December 17, 2014\\
%July 22, 2015}
October 1, 2015}
\publishers{Matematik B\"ol\"um\"u\\
Mimar Sinan G\"uzel Sanatlar \"Universitesi\\
\url{dpierce@msgsu.edu.tr}\\
\url{http://mat.msgsu.edu.tr/~dpierce/}}
\uppertitleback{\centering
\emph{Groups and Rings}\\
\mbox{}\\
This work is licensed under the\\
 Creative Commons Attribution--Noncommercial--Share-Alike
License.\\
 To view a copy of this license, visit\\
  \url{http://creativecommons.org/licenses/by-nc-sa/3.0/}\\
\mbox{}\\
\cc \ccby David Pierce \ccnc \ccsa\\
\mbox{}\\
Mathematics Department\\
Mimar Sinan Fine Arts University\\
Istanbul, Turkey\\
\url{http://mat.msgsu.edu.tr/~dpierce/}\\
\url{dpierce@msgsu.edu.tr}
}
%\frontmatter
\maketitle

\chapter*{Preface}

There have been several versions of the present text.
\begin{asparaenum}
\item
The first draft was my record of the first semester 
of the graduate course in algebra 
given at Middle East Technical University in Ankara in 2008--9.
I had taught the same course also in 2003--4.
The main reference for the course was Hungerford's \emph{Algebra} \cite{MR600654}.  
\item  
I revised my notes when teaching algebra a third time, in 2009--10.
Here I started making some attempt to indicate how theorems were going to be used later.
What is now \S\ref{sect:N} 
(the development of the natural numbers from the Peano Axioms) 
was originally  prepared for a course called Non-Standard Analysis, 
given at the Nesin Mathematics Village, 
\c Sirince, in the summer of 2009.  
I built up the foundational Chapter~\ref{ch:N} around this section. 
\item
Another revision, but only partial,
came in preparation for a course 
at Mimar Sinan Fine Arts University in Istanbul in 2013--4.
I expanded Chapter \ref{ch:N},
out of a desire to give some indication of how
mathematics, and especially algebra, could be built up from some simple axioms
about the relation of membership---that is, from set theory.
This building up, however, is not part of the course proper.
\item
The present version of the notes represents a more thorough-going revision,
made during and after the course at Mimar Sinan.
I try to make more use of examples, introducing them as early as possible.
The number theory that has always been in the background has been integrated more explicitly into the text (see page~\pageref{no-th}).
I have tried to distinguish more clearly
between what is essential to the course and what is not;
the starred sections comprise most of what is not essential.
\end{asparaenum}

All along, I have treated groups,
not merely as structures satisfying certain axioms, but
as structures isomorphic to groups of symmetries of sets.
The equivalence of the two points of view has been established
in the theorem named for Cayley 
(in \S\ref{sect:groups}, on page~\pageref{thm:Cay}).
Now it is pointed out (in that section) that
standard structures like
$(\Qp,1,{}\inv,\cdot)$
and
$(\Q,0,-,+)$, 
are also groups,
even though they are not obviously symmetry groups.
Several of these structures 
are constructed in Chapter \ref{ch:N}.
(In earlier editions they were constructed later.)

Symmetry groups as such are investigated more thoroughly now,
in \S\S\ref{sect:sym} and \ref{sect:ms},
\emph{before} the group axioms are simplified in \S\ref{sect:simp}.

Rings are defined in Part~\ref{part:groups}, on groups,
so that their groups of units are available as examples of groups, 
especially in \S\ref{sect:semidirect} on semidirect products (page~\pageref{sect:semidirect}).
Also rings are needed to produce rings of matrices and their groups of units, 
as in \S\ref{sect:gl} (page~\pageref{sect:gl}).

I give many page-number references, 
first of all for my own convenience in the composition of the text at the computer.
Thus the capabilities of Leslie Lamport's \LaTeX\ program 
in automating such references are invaluable.
Writing the text could hardly have been contemplated in the first place
without Donald Knuth's original \TeX\ program.
I now use the \texttt{scrbook} document class of \textsf{KOMA-Script,}
``developed by Markus Kohm and based on earlier work by Frank Neukam'' \cite[p.~236]{LaTeX-Comp}.

Ideally every theorem would have an historical reference.
This is a distant goal, but I have made some moves in this direction.

The only exercises in the text are the theorems whose proofs are not already supplied.
Ideally more exercises would be supplied, perhaps in the same manner.


%\newpage

\tableofcontents

\listoffigures

%\setcounter{chapter}{-1}

\part{Preliminaries}

\chapter{Introduction}

Published around 300 \bce,
the \emph{Elements} of Euclid 
is a model of mathematical exposition.
Each of its thirteen books
consists mainly of statements followed by proofs.
The statements are usually called \textbf{Propositions} 
today~\cite{MR17:814b,MR1932864},
although they have no particular title 
in the original text \cite{Euclid-Heiberg}.
By their content, they can be understood as \emph{theorems} or \emph{problems.}
Writing six hundred years after Euclid,
Pappus of Alexandria explains the difference \cite[p.~566]{MR13:419b}:
\begin{quote}
Those who wish to make more skilful distinctions in geometry find it worthwhile to call
\begin{compactitem}
\item
a \textbf{problem} (\gk{pr'oblhma}), 
that in which it is \emph{proposed}
%\linebreak 
(\gk{pro\-b'al\-letai}) to do or construct something;
\item
a \textbf{theorem} (\gk{je'wrhma}), 
that in which the consequences and necessary implications 
of certain hypotheses \emph{are investigated} (\gk{jewre~itai}).
\end{compactitem}
%But among the ancients some described them all as problems, some as theorems.
\end{quote}
(The Greek letters are listed and discussed
in Appendix \ref{app:Greek}, p.\ \pageref{app:Greek}.)
For example, Euclid's first proposition 
is the the problem of constructing an equilateral triangle.
His fifth proposition is the theorem 
that the base angles of an isosceles triangle are equal to one another.

Each proposition of the present notes has one of four titles:  
\textbf{Lemma, Theorem, Corollary,} or \textbf{Porism.}
Each proposition may be followed by an explicitly labelled proof,
which is terminated with a box $\qedsymbol$.
\emph{If there is no proof, the reader is expected to supply her or his own proof,
as an exercise.}
No propositions are to be accepted on faith.

Nonetheless, for an algebra course, some propositions are more important than others.
The full development of the foundational Chapter~\ref{ch:N} below 
would take a course in itself, 
but is not required for algebra as such.

In these notes, a proposition may be called a lemma 
if it will be used to prove a theorem,
but then never used again.
Lemmas in these notes are numbered sequentially.
Theorems are also numbered sequentially,
independently from the lemmas.
A statement that can be proved easily from a theorem
is called a corollary and is numbered with the theorem.
So for example Theorem~\ref{thm:rec} on page~\pageref{thm:rec}
is followed by Corollary~\ref{cor:rec}.

Some propositions can be obtained easily,
not from a preceding theorem itself,
but from its proof.
Such propositions are called \emph{porisms}
and, like corollaries, are numbered with the theorems from whose proofs they are derived.
So for example Porism~\ref{por:prod} on p.\ \pageref{por:prod}
follows Theorem~\ref{thm:prod}.

The word \emph{porism} and its meaning are explained, in the 5th century \ce,
by Proclus in his commentary 
on the first book of Euclid's \emph{Elements}~\cite[p.~212]{MR1200456}:
\begin{quote}
``Porism'' is a term applied to a certain kind of problem, 
such as those in the \emph{Porisms} of Euclid.  
But it is used in its special sense 
when as a result of what is demonstrated 
some other theorem comes to light without our propounding it.  
Such a theorem is therefore called a ``porism,''
as being a kind of incidental gain resulting from the scientific demonstration.
\end{quote}
The translator explains that  the word \emph{porism} comes from the verb \gk{por'izw}, 
meaning to furnish or provide.


The original source for much of the material of these notes 
is Hungerford's \emph{Algebra} \cite{MR600654},
or sometimes Lang's \emph{Algebra} \cite{Lang-alg}, 
but there are various rearrangements and additions.  
The back cover of Hungerford's book quotes a review:
\begin{quote}
Hungerford's exposition is clear enough that an average graduate student can read the text on his own and understand most of it.
\end{quote}
I myself aim for logical clarity; 
but I do not intend for these notes to be a replacement for lectures in a classroom.  
Such lectures may amplify some parts, while glossing over others.
As a graduate student myself,
I understood a course to consist of the teacher's lectures,
and the most useful reference was not a printed book,
but the notes that I took in my own hand.
I still occasionally refer to those notes today.

Hungerford is inspired by category theory, 
of which his teacher Saunders Mac Lane was one of the creators.  
Categories are defined in the present text
in \S\ref{sect:category} (page~\pageref{sect:category}).
The spirit of category theory is seen 
at the beginning of Hungerford's Chapter I, ``Groups'':
\begin{quote}
There is a basic truth 
that applies not only to groups 
but also to many other algebraic objects 
(for example, rings, modules, vector spaces, fields): 
in order to study effectively an object 
with a given algebraic structure, 
it is necessary to study as well the functions 
that preserve the given algebraic structure 
(such functions are called homomorphisms).
\end{quote}  
Hungerford's term \emph{object} here reflects the usage of category theory.  
Taking inspiration from model theory, 
the present notes will often use the term \emph{structure} instead.  
Structures are defined in \S\ref{sect:structures} (page~\pageref{sect:structures}).  
The examples of objects named by Hungerford are all structures 
in the sense of model theory, 
although not every object in a category is a structure in this sense.

When a word is printed in \textbf{boldface} in these notes,
the word is a technical term whose meaning can be inferred from the surrounding text.

\chapter{Mathematical foundations}\label{ch:N}%\label{part:N}
%\setcounter{section}{-1}

%\setchapterpreamble{
As suggested in the Introduction,
the full details of this chapter are not strictly part of an algebra course, 
but are logically presupposed by the course.  

One purpose of the chapter is to establish the notation whereby
\begin{align*}
\N&=\{1,2,3,\dots\},&
\upomega&=\{0,1,2,\dots\}.
\end{align*} 
%$\N=\{1,2,3,\dots\}$ and $\upomega=\{0,1,2,\dots\}$. 
The elements of $\upomega$ are the von-Neumann natural numbers,%%%%%
\footnote{\label{fn:Greek}%%%%%
The letter $\upomega$ 
is not the minuscule English letter called \emph{double u,} 
but the minuscule Greek \emph{omega,} 
which is probably in origin a double o.  
Obtained with the control sequence \url{\upomega} 
from the \url{upgreek} package
for \LaTeX, 
the $\upomega$ used here is upright, 
unlike the standard slanted $\omega$ (obtained with \url{\omega};
see Appendix \ref{app:Greek}, p.\ \pageref{app:Greek}).  
The slanted $\omega$ might be used as a variable 
(as for example on page~\pageref{omega}).  
We shall similarly distinguish between the constant $\uppi$ 
(used for the ratio of the circumference to the diameter of a circle,
as well as for the \emph{canonical projection} 
defined on page~\pageref{can-proj}
and the \emph{coordinate projections} 
defined on pages~\pageref{coord-proj} and \pageref{coord-proj-2}) 
and the variable $\pi$ (pages~\pageref{pi} and \pageref{pi-2}).} 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
so that if $n\in\upomega$, then
\begin{equation*}
n=\{0,\dots,n-1\}.
\end{equation*}
In particular, $n$ is itself a set with $n$ elements.  
When $n=0$, this means $n$ is the empty set.  
A cartesian power $A^n$ can be understood as the set of functions from $n$ to $A$.  
Then a typical element of $A^n$ can be written as $(a_0,\dots,a_{n-1})$.  
Most people write $(a_1,\dots,a_n)$ instead; 
and when they want an $n$-element set, they use $\{1,\dots,n\}$.
This is a needless complication, 
since it leaves us with no simple abbreviation for an $n$-element set.

Another purpose of this chapter is to review the construction,
not only of the sets $\N$ and $\upomega$,
but the sets $\Qp$, $\Q$, $\Z$, $\Rp$, and $\R$ derived from them.
We ultimately have certain \emph{structures,} namely:
\begin{compactitem}
\item
the \emph{semigroup} $(\N,+)$;
\item
\emph{monoids} $(\upomega,0,+)$ and $(\N,1,\cdot)$;
\item
\emph{groups} $(\Qp,1,{}\inv,\cdot)$, $(\Q,0,-,+)$, $(\Z,0,-,+)$, $(\Rp,1,{}\inv,\cdot)$, and $(\R,0,-,+)$; 
\item
\emph{rings} $(\Z,0,-,+,1,\cdot)$, $(\Q,0,-,+,1,\cdot)$, and $(\R,0,-,+,1,\cdot)$.
\end{compactitem}


\section{Sets and geometry}\label{sect:sets}

Most objects of mathematical study can be understood as \emph{sets.}
A set is a special kind of \emph{collection.}
A \textbf{collection} is many things, considered as one.  Those many
things are the 
\textbf{members}%
\index{member}
or 
\textbf{elements}%
\index{element}
of the collection.  The members \textbf{compose} the collection, and the collection \textbf{comprises} them.\footnote{Thus the relations named by the verbs \emph{compose} and \emph{comprise} are converses of one another; but native English speakers often confuse these two verbs.}  Each member \textbf{belongs} to the collection and is \textbf{in} the collection, and the collection \textbf{contains} the member.  

Designating certain collections as sets,
we shall identify some properties of them
that will allow us to do the mathematics that we want.
These properties will be expressed by \emph{axioms.} 
We shall use versions of the so-called Zermelo--Fraenkel Axioms
with the Axiom of Choice.  
The collection of these axioms is denoted by $\zfc$.
Most of these axioms were described by Zermelo in 1908 \cite{Zermelo-invest}.

We study study sets axiomatically, 
because a na\"\i ve approach can lead to contradictions.
For example, one might think na\"\i vely 
that there was a collection of all collections.
But there can be no such collection,
because if there were, then there would be 
a collection of all collections that did not contain themselves,
and \emph{this} collection would contain itself if and only if it did not.
This result is the \textbf{Russell Paradox,}\label{Russell}
described in a letter \cite{Russell-letter} from Russell to Frege in 1902.

The propositions of Euclid's \emph{Elements} 
concern points and lines\label{line} in a plane and in space.
Some of these lines are \emph{straight} lines,
and some are circles.
Two straight lines that meet at a point make an \emph{angle.}
Unless otherwise stated, straight lines have endpoints.
It is possible to compare two straight lines, or two angles:
if they can be made to coincide, they are \emph{equal} to one another.
This is one of Euclid's so-called \emph{common notions.}
If a straight line has an endpoint on another straight line,
two angles are created.
If they are equal to one another, then they are called \emph{right angles.}
One of Euclid's \emph{postulates} is that all right angles are equal to one another.
The other postulates tell us things that we can do:
Given a center and radius, we can draw a circle.
From any given point to another, we can draw a straight line,
and we can extend an existing straight line beyond its endpoints;
indeed, given \emph{two} straight lines,
with another straight line cutting them 
so as to make the interior angles on the same side together less than two right angles,
we can extend the first two straight lines so far that they will intersect one another.

Using the common notions and the postulates,
Euclid proves propositions: 
the problems and theorems discussed in the Introduction above.
The common notions and the postulates 
do not \emph{create} the plane or the space
in which the propositions are set.
The plane or the space exists already.
The Greek word \gk{gewmetr'ia} has the original meaning of \emph{earth measurement,} 
that is, surveying.
People knew how to measure the earth long before Euclid's \emph{Elements} was written.

Similarly, people were doing mathematics long before set theory was developed.
Accordingly, the set theory presented here
will assume that sets already exist.
Where Euclid has postulates, we shall have axioms.
Where Euclid has definitions and common notions and certain unstated assumptions,
we shall have definitions and certain logical principles.

It is said of the \emph{Elements,}
\begin{quote}
A critical study of Euclid, with, of course, the advantage of present insights,
shows that he uses dozens of assumptions that he never states and undoubtedly did not recognize.\hfill\cite[p.~87]{MR0472307}
\end{quote}
One of these assumptions is that two circles will intersect 
if each of them passes through the center of the other.  
(This assumption is used to construct an equilateral triangle.)
But it is impossible to state \emph{all} of one's assumptions.
We shall assume, for example, that if a formal sentence $\Forall x\phi(x)$ is true,
what this means is that $\phi(a)$ is true for arbitrary $a$.\label{a}
\emph{This} means $\phi(b)$ is true, and $\phi(c)$ is true, and so on.
However, there is nothing at the moment called $a$ or $b$ or $c$ or whatever.
For that matter, we have no actual formula called $\phi$.
There is nothing called $x$,
and moreover there will never be anything called $x$
in the way that there might be something called $a$.
Nonetheless, we assume that everything we have said 
about $\phi$, $x$, $a$, $b$, and $c$ makes sense.

The elements of every set will be sets themselves.  
By definition, two sets will
\emph{equal}\label{equal}%
\index{equal}
if they have the same elements.  
There will be an \emph{empty set,}\label{empty} denoted by
\begin{equation*}
\emptyset;
\end{equation*}
this will have no elements.
If $a$ is a set, then there will be a set denoted by
\begin{equation*}
\{a\},
\end{equation*}
with the unique element $a$.  If $b$ is also a set, then there will be a set denoted by
\begin{equation*}
a\cup b,
\end{equation*}
whose members are precisely the members of $a$ and the members of $b$.  
Thus there will be sets $a\cup\{b\}$ and $\{a\}\cup\{b\}$; 
the latter is usually written as
\begin{equation*}
\{a,b\}.
\end{equation*}
If $c$ is another set, we can form the set $\{a,b\}\cup\{c\}$, 
which we write as
\begin{equation*}
\{a,b,c\},
\end{equation*}
and so forth.  
This will allow us to build up the following infinite sequence:
\begin{align*}
&\emptyset,&
&\{\emptyset\},&
&\bigl\{\emptyset,\{\emptyset\}\bigr\},&
&\Bigl\{\emptyset,\{\emptyset\},\bigl\{\emptyset,\{\emptyset\}\bigr\}\Bigr\},&
&\dots
\end{align*}
By definition,\label{nat} these sets will be the natural numbers $0$, $1$, $2$, $3$, \dots
To be more precise, they are the \textbf{von Neumann natural numbers} \cite{von-Neumann}.

\section{Set theory}

\subsection{Notation}

Our formal axioms for set theory will be written in a certain logic, 
whose symbols are:
\begin{compactenum}[1)]
\item
variables, as $x$, $y$, and $z$;
\item
the symbol $\in$ denoting the membership relation;
\item
the Boolean connectives of propositional logic:
\begin{compactenum}
\item
the singulary connective $\lnot$ (``not''), and
 \item
the binary connectives $\lor$ (``or''), $\land$ (``and''), $\lto$ (``implies''), 
and $\liff$ (``if and only if'');
 \end{compactenum}
\item
parentheses;
\item
quantification symbols $\exists$ (``there exists'') and $\forall$ (``for all'').
\end{compactenum}
We may also introduce constants, as $a$, $b$, and $c$, or $A$, $B$, and $C$, to stand for particular sets.  A variable or a constant is called a \emph{term.}  If $t$ and $u$ are terms, then the expression
\begin{equation*}
t\in u
\end{equation*}
is called an \textbf{atomic formula.}  
It means $t$ is a member of $u$.
From atomic formulas, other formulas are built up \emph{recursively} 
by use of the symbols above, according to certain rules, namely,
\begin{compactenum}[1)]
\item
if $\phi$ is a formula, then so is $\lnot\phi$;
\item
if $\phi$ and $\psi$ are formulas, then so is $(\phi*\psi)$, 
where $*$ is one of the binary Boolean connectives;
\item
if $\phi$ is a formula and $x$ is variable, 
then $\Exists x\phi$ and $\Forall x\phi$ are formulas.
\end{compactenum}
The formula $\lnot\;t\in u$ says $t$ is \emph{not} a member of $u$.  
We usually abbreviate the formula by
\begin{equation*}
t\notin u.
\end{equation*}

The expression $\Forall z(z\in x\lto z\in y)$ is the formula saying
that every element of $x$ is an element of $y$.  Another way to say
this is that $x$ is a 
\textbf{subset}%
\index{subset}
of $y$, or that $y$ 
\textbf{includes}%
\index{include}
$x$.  We abbreviate this formula by%%%%%
\footnote{The relation $\included$ of being included 
is completely different from the relation $\in$ of being contained.  
However, many mathematicians confuse these relations in words, 
using the word \emph{contained} to describe both.}
\begin{equation*}
x\included y.
\end{equation*}
The expression $x\included y\land y\included x$ is the formula
saying that $x$ and $y$ have the same members, 
so that they are \textbf{equal} by the definition foretold above 
(page~\pageref{empty}); in this case we use the abbreviation
\begin{equation*}
x=y.
\end{equation*}

All occurrences of $x$ 
in the formulas $\Exists x\phi$ and $\Forall x\phi$ are \textbf{bound,}%%%%%
\footnote{The word \emph{bound} here 
is the past participle of the verb \emph{to bind.}
There is another verb, \emph{to bound,} 
which is also used in mathematics, 
but its past participle is \emph{bounded.}
The two verbs \emph{to bind} and \emph{to bound} are apparently unrelated.
The verb \emph{to bind} has been part of English 
since the beginning of that language in the tenth century.
The verb \emph{to bound} is based on the noun \emph{bound,}
which entered Middle English in the 12th century from the Old French noun 
that became the modern \emph{borne.}}  
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
and they remain bound when other formulas are built up from these formulas.  
Occurrences of a variable that are not bound are \textbf{free.}  

\subsection{Classes and equality}

A \textbf{singulary}%%%%%
\footnote{The word
  \textbf{unary}\index{unary} is  
  more common, but less etymologically correct.}
  %%%%%
formula is a formula in which only one variable occurs freely.
If $\phi$ is a singulary formula with free variable $x$, we may write $\phi$ as
\begin{equation*}
\phi(x).
\end{equation*}
If $a$ is a set, then by replacing every free occurrence of $x$ in $\phi$ with $a$, 
we obtain the formula
\begin{equation*}
\phi(a),
\end{equation*}
which is called a \textbf{sentence} because it has no free variables.  
This sentence is true or false, depending on which set $a$ is.  
If the sentence is true, then $a$ can be said to \textbf{satisfy} the formula $\phi$.  
There is a collection of all sets that satisfy $\phi$:
we denote this collection by
\begin{equation*}
\{x\colon\phi(x)\}.
\end{equation*}
Such a collection is called a 
\textbf{class.}%
\index{class}\label{class}
In particular, it is the class \textbf{defined} by the formula $\phi$.
If we give this class the name $\bm C$, then the expression
\begin{equation*}
x\in\bm C
\end{equation*}
means just $\phi(x)$.

A formula in which only two variables occur freely is \textbf{binary.}
If $\psi$ is such a formula, with free variables $x$ and $y$,
then we may write $\psi$ as
\begin{equation*}
\psi(x,y).
\end{equation*}
We shall want this notation for proving Theorem~\ref{thm:=} below.
If needed, we can talk about ternary formulas $\chi(x,y,z)$, and so on.

The definition of equality of sets can be expressed by the sentences
\begin{gather}\label{eqn:=}
\Forall x\Forall y\bigl(x=y\lto(a\in x\liff a\in y)\bigr),\\\label{eqn:>=}
\Forall x\Forall y\Exists z\bigl(\lnot(z\in x\liff z\in y)\lor x=y\bigr),
\end{gather}
where $a$ is an arbitrary set.
The \textbf{Equality Axiom} is that equal sets belong to the same sets:
\begin{equation}\label{eqn:=2}
\Forall x\Forall y\bigl(x=y\lto(x\in a\liff y\in a)\bigr).
\end{equation}
The meaning of the sentences \eqref{eqn:=} and \eqref{eqn:=2} 
is that equal sets satisfy the same atomic formulas $a\in x$ and $x\in a$.

\begin{theorem}\label{thm:=}
Equal sets satisfy the same formulas:
\begin{equation}\label{eqn:=thm}
\Forall x\Forall y\Bigl(x=y\lto\bigl(\phi(x)\liff\phi(y)\bigr)\Bigr).
\end{equation}
\end{theorem}

\begin{proof}
Suppose $a$ and $b$ are equal sets.
By symmetry, it is enough to show
\begin{equation}\label{eqn:ab}
\phi(a)\lto\phi(b)
\end{equation}
for all singulary formulas $\phi(x)$.
As noted, we have \eqref{eqn:ab} 
whenever $\phi(x)$ is an atomic formula $x\in c$ or $c\in x$.
We also have it when $\phi$ is $x\in x$.
If we have \eqref{eqn:ab} when $\phi$ is $\psi$, 
then we have it when $\phi$ is $\lnot\psi$.
If we have \eqref{eqn:ab} when $\phi$ is $\psi$ or $\chi$, 
then we have it when $\phi$ is $(\psi*\chi)$,
where $*$ is one of the binary connectives.
If, for some binary formula $\psi$, 
we have \eqref{eqn:ab} whenever $\phi(x)$ is of the form $\psi(x,c)$,
then we have it when $\phi(x)$ is $\Forall y\psi(x,y)$ or $\Exists y\psi(x,y)$.
Therefore we do have \eqref{eqn:ab} in all cases.
\end{proof}

The foregoing is a proof by \textbf{induction.}
Such a proof is possible because formulas are defined recursively.  
See \S\ref{sect:N} below (page~\pageref{sect:N}).
Actually we have glossed over some details.
This may cause confusion;
but then the details themselves could cause confusion.
What we are really proving is all of the sentences of one of the infinitely many forms
\begin{equation}\label{eqn:''}
\left.
\begin{gathered}
\Forall x\Forall y\Bigl(x=y\lto\bigl(\phi(x)\liff\phi(y)\bigr)\Bigr),\\
\Forall x\Forall y\Forall z\Bigl(x=y\lto\bigl(\phi(x,z)\liff\phi(y,z)\bigr)\Bigr),\\
\Forall x\Forall y\Forall z\Forall{z'}
\Bigl(x=y\lto\bigl(\phi(x,z,z')\liff\phi(y,z,z')\bigr)\Bigr),\\
\makebox[8.8cm]{\dotfill,}
%\Forall x\Forall y\Forall z\Forall{z'}\Forall{z''}\Bigl(x=y\lto\bigl(\phi(x,z,z',z'')\liff\phi(y,z,z',z'')\bigr)\Bigr),
\end{gathered}
\right\}
\end{equation}
%and so on, 
where no constant occurs in any of the formulas $\phi$.  
Assuming $a=b$, it is enough to prove every sentence of one of the forms
\begin{gather*}
\phi(a)=\phi(b),\\
\phi(a,c)=\phi(b,c),\\
\phi(a,c,c')=\phi(b,c,c'),\\
\makebox[4cm]{\dotfill}
%\phi(a,c,c',c'')=\phi(b,c,c',c''),	
\end{gather*}
%and so on.
We have tried to avoid writing all of this out,
by allowing constants to occur implicitly in formulas,
and by understanding $\Forall x\phi(x)$ to mean $\phi(a)$ for arbitrary $a$,
as suggested above (page~\pageref{a}).
We could abbreviate the sentences in \eqref{eqn:''} as
\begin{multline}\label{eqn:...}
\Forall x\Forall y\Forall{z_1} \dots\Forall{z_n}
\Bigl(x=y\lto\\
\bigl(\phi(x,z_1,\dots,z_n)\liff\phi(y,z_1,\dots,z_n)\bigr)\Bigr).
\end{multline}
However, we would have to explain what $n$ was and what the dots of ellipsis meant.
The expression in \eqref{eqn:...} 
means one of the formulas in the infinite list suggested in \eqref{eqn:''},
and there does not seem to be a better way to say it than that.

The sentence \eqref{eqn:=thm} is usually taken as a logical axiom,
like one of Euclid's common notions. 
Then \eqref{eqn:=} and \eqref{eqn:=2} are special cases of this axiom, 
but \eqref{eqn:>=} is no longer true, either by definition or by proof. 
So this too must be taken as an axiom, which is called the \textbf{Extension Axiom.}  
%The idea behind the name is that having the same members 
%means having the same \emph{extension.}  

In any case, all of the sentences \eqref{eqn:=}, \eqref{eqn:>=}, \eqref{eqn:=2}, and \eqref{eqn:=thm} end up being true.  They tell us that equal sets are precisely those sets that are logically indistinguishable.
We customarily treat equality as \emph{identity.}  We consider equal sets to be the \emph{same} set.  If $a=b$, we may say simply that $a$ is $b$.  

Similarly, in ordinary mathematics,
since $1/2=2/4$,\label{1/2} we consider $1/2$ and $2/4$ to be the same.  
In ordinary \emph{life} they are distinct: 
$1/2$ is one thing, namely one half, 
while $2/4$ is two things, namely two quarters.  
In mathematics, we ignore this distinction.

As with sets, so with classes, 
one \textbf{includes} another
if every element of the latter belongs to the former.
Hence if formulas $\phi(x)$ and $\psi(y)$ 
define classes $\class C$ and $\class D$ respectively,
and if
\begin{equation*}
  \Forall x\bigl(\phi(x)\lto\psi(x)\bigr),
\end{equation*}
this means $\class D$ includes $\class C$, and we write
\begin{equation*}
  \class C\included\class D.
\end{equation*}
If also $\class C$ includes $\class D$,
then the two classes are \textbf{equal,}
and we write
\begin{equation*}
  \class C=\class D;
\end{equation*}
this means $\Forall x\bigl(\phi(x)\liff\psi(x)\bigr)$.
Likewise set and a class can be considered as \textbf{equal} 
if they have the same members.
Thus if again $\bm C$ is defined by $\phi(x)$,
then the expression
\begin{equation*}
a=\bm C
\end{equation*}
means $\Forall x\bigl(x\in a\liff\phi(x)\bigr)$. 

\begin{theorem}
Every set is a class.
\end{theorem}

\begin{proof}
The set $a$ is the class $\{x\colon x\in a\}$.
\end{proof}

However, there is no reason to expect the converse to be true.

\begin{theorem}\label{thm:RP}
Not every class is a set.
\end{theorem} 

\begin{proof}
There are formulas $\phi(x)$ such that
\begin{equation*}
\Forall y\lnot\Forall x\bigl(x\in y\liff\phi(x)\bigr).
\end{equation*}
Indeed, let $\phi(x)$ be the formula $x\notin x$.
Then
\begin{equation*}
\Forall y\lnot\bigl(y\in y\liff\phi(y)\bigr).\qedhere
\end{equation*}
\end{proof}

More informally, the argument is that the class $\{x\colon x\notin x\}$ is not a set,
because if it were a set $a$, then $a\in a\liff a\notin a$,
which is a contradiction.  
This is what was given above as the Russell Paradox (page~\pageref{Russell}). 
Another example of a class that is not a set 
is given by the \emph{Burali-Forti Paradox} on page~\pageref{BF} below.

\subsection{Construction of sets}

We have established what it means for sets to be equal.
We have established that sets are examples, 
but not the only examples,
of the collections called classes.
However, we have not officially exhibited any sets.
We do this now.
The \textbf{Empty Set Axiom} is
\begin{equation*}
\Exists x\Forall yy\notin x.
\end{equation*}
As noted above (page~\pageref{empty}), 
the set whose existence is asserted by this axiom is denoted by $\emptyset$.
This set is the class $\{x\colon x\neq x\}$.

We now obtain the sequence $0$, $1$, $2$, \dots, described above 
(p.\ \pageref{nat}). 
We use the Empty Set Axiom to start the sequence.
We continue by means of the \textbf{Adjunction Axiom:}
if $a$ and $b$ are sets, then the set denoted by $a\cup\{b\}$ exists.
Formally, the axiom is
\begin{equation*}
\Forall x\Forall y\Exists z\Forall w(w\in z\liff w\in x\lor w=y).
\end{equation*}
In writing this sentence, 
we follow the convention whereby 
the connectives $\lor$ and $\land$ are more binding than $\lto$ and $\liff$,
so that, for example, the expression
\begin{equation*}
(w\in z\liff w\in x\lor w=y)  
\end{equation*}
means the formula $\bigl(w\in z\liff (w\in x\lor w=y)\bigr)$.

We can understand the Adjunction Axiom as saying that, for all sets $a$ and $b$,
the class $\{x\colon x\in a\lor x=b\}$ is actually a set.
Adjunction is not one of Zermelo's original axioms of 1908;
but the following is Zermelo's \textbf{Pairing Axiom:}

\begin{theorem}
For any two sets $a$ and $b$, the set $\{a,b\}$ exists:
\begin{equation*}
\Forall x\Forall y\Exists z\Forall w(w\in z\liff w=x\lor w=y).
\end{equation*}
\end{theorem}

\begin{proof}
By Empty Set and Adjunction, $\emptyset\cup\{a\}$ exists, but this is just $\{a\}$.
Then $\{a\}\cup\{b\}$ exists by Adjunction again.
\end{proof}

The theorem is that the class $\{x\colon x=a\lor x=b\}$ is always a set.
Actually Zermelo does not have a Pairing Axiom as such,
but he has an \textbf{Elementary Sets Axiom,} 
which consists of what we have called the Empty Set Axiom and the Pairing Axiom.%%%%%
\footnote{Zermelo also requires that for every set $a$ there be a set $\{a\}$; 
but this can be understood as a special case of pairing.}


Every class $\bm C$ has a \textbf{union,} 
which is the class
\begin{equation*}
 \{x\colon\Exists y(x\in y\land y\in\bm C)\}. 
\end{equation*}
This class is denoted by
\begin{equation*}
\bigcup\bm C.
\end{equation*}
This notation is related as follows
with the notation for the classes involved in the Adjunction Axiom:

\begin{theorem}
For all sets $a$ and $b$, $a\cup\{b\}=\bigcup\bigl\{a,\{b\}\bigr\}$.
\end{theorem}

We can now use the more general notation
\begin{equation*}
a\cup b=\bigcup\{a,b\}.
\end{equation*}
The \textbf{Union Axiom} is that the union of a \emph{set} is always a set:
\begin{equation*}
\Forall x\Exists yy=\bigcup x.
\end{equation*}
The Adjunction Axiom is 
a consequence of the Empty-Set, Pairing, and Union Axioms.  
This why Zermelo did not need Adjunction as an axiom.
We state it as an axiom,
because we can do a lot of mathematics with it
that does not require the full force of the Union Axiom.
We shall however use the Union Axiom 
when considering unions of chains of structures 
(as on p.\ \pageref{chains} below).

Suppose $A$ is a set and $\bm C$ is the class $\{x\colon\phi(x)\}$.  
Then we can form the class
\begin{equation*}
A\cap\bm C,
\end{equation*}
which is defined by the formula $x\in A\land\phi(x)$.  
The \textbf{Separation Axiom} is that this class is a set.  
Standard notation for this set is
\begin{equation}\label{eqn:xinA}
\{x\in A\colon\phi(x)\}.
\end{equation}
However, this notation is unfortunate.
Normally the formula $x\in A$ is read as a sentence of ordinary language, 
namely ``$x$ belongs to $A$'' or ``$x$ is in $A$.''
However, the expression in \eqref{eqn:xinA} is read 
as ``the set of $x$ in $A$ such that $\phi$ holds of $x$'';
in particular, $x\in A$ here is read as the noun phrase ``$x$ in $A$''
(or ``$x$ belonging to $A$,'' or ``$x$ that are in $A$,''
or something like that).%%%%%
\footnote{Ambiguity of expressions like $x\in A$ (is it a noun or a sentence?)
is common in mathematical writing, as for example in the abbreviation of $\Forall{\varepsilon}(\epsilon>0\lto\phi)$ as $(\forall{\varepsilon>0})\;\phi$.
Such ambiguity is avoided in these notes.
However, certain ambiguities are tolerated: 
letters like $a$ and $A$ stand sometimes for sets, 
sometimes for \emph{names} for sets.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Actually Separation is a \emph{scheme} of axioms, one for each singulary formula $\phi$:
\begin{equation*}
\Forall x\Exists y\Forall z\bigl(z\in y\liff z\in x\land\phi(z)\bigr).
\end{equation*}

In most of mathematics, and in particular in the other sections of these notes, 
one need not worry too much about the distinction between sets and classes.  
But it is logically important.  
It turns out that the objects of interest in mathematics can be understood as sets.  
Indeed, we have already defined natural numbers as sets.  
We can talk about sets by means of formulas.  
Formulas define classes of sets, as we have said.  
Some of these classes turn out to be sets themselves; 
but again, there is no reason to expect all of them to be sets,  
and indeed by Theorem~\ref{thm:RP} (p.\ \pageref{thm:RP}) 
some of them are not sets.  
\emph{Sub-classes} of sets are sets, by the Separation Axiom; 
but some classes are too big to be sets.  
The class $\{x\colon x=x\}$ of all sets is not a set, 
since if it were, then the sub-class $\{x\colon x\notin x\}$ would be a set, and it is not.

Every set $a$ has a \emph{power class,} 
namely the class $\{x\colon x\included a\}$ of all subsets of $a$.  
This class is denoted by
\begin{equation*}
\pow a.
\end{equation*}
The \textbf{Power Set Axiom} is that this class is a set:
\begin{equation*}
\Forall x\Exists yy=\pow x.
\end{equation*}
Then $\pow a$ can be called the \textbf{power set} of $a$.
In the main text, after this chapter, 
we shall not explicitly mention power sets until p.\ \pageref{pow}.
However, the Power Set Axiom is of fundamental importance
for allowing us to prove Theorem~\ref{thm:prod-set} on p.\ \pageref{thm:prod-set} below.

We want the \textbf{Axiom of Infinity} to be simply that 
the collection $\{0,1,2,\dots\}$ of natural numbers 
as defined on p.\ \pageref{nat} is a set.  
It is not obvious how to formulate this as a sentence of our logic.  
However, the indicated collection contains $0$, which by definition is the empty set;
also, for each of its elements $n$,
the collection contains also $n\cup\{n\}$.
Let $\It$ be the class of all \emph{sets} with these properties: that is,
\begin{equation*}
\It=\bigl\{x\colon0\in x\land\Forall y(y\in x\lto y\cup\{y\}\in x)\bigr\}.
\end{equation*}
Thus, if it exists, the set of natural numbers will belong to $\It$.
Furthermore, the set of natural numbers 
will be the \emph{smallest} element of $\It$.
But we still must make this precise.
For an arbitrary class $\bm C$, we define
\begin{equation*}
\bigcap\bm C=\{x\colon\Forall y(y\in\bm C\lto x\in y)\}.
\end{equation*}
This class is the \textbf{intersection} of $\bm C$. 

\begin{theorem}
If $a$ and $b$ are two sets, then
\begin{equation*}
a\cap b=\bigcap\{a,b\}.
\end{equation*}
If $a\in\bm C$, then
\begin{equation*}
\bigcap\bm C\included a,
\end{equation*}
so in particular $\bigcap\bm C$ is a set.
However, $\bigcap\emptyset$ is the class of all sets, which is not a set.
\end{theorem} 

We can now define%%%%%
\footnote{Some writers define $\bigcap\bm C$ only when $\bm C$ is a nonempty set.
This would make our definition of $\upomega$ invalid without the Axiom of Infinity.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{equation}\label{eqn:upomega-defn}
\upomega=\bigcap\It.
\end{equation}

\begin{theorem}\label{thm:AI}
The following conditions are equivalent.
\begin{compactenum}
\item
$\It\neq\emptyset$.
\item
$\upomega$ is a set.
\item
$\upomega\in\It$.
\end{compactenum}
\end{theorem}

Any of the equivalent conditions in the theorem can be taken as the Axiom of Infinity.
This does not by itself establish that 
$\upomega$ has the properties we expect of the natural numbers;
we still have to do some work.  
We shall do this in \S\ref{sect:omega} (p.~\pageref{sect:omega}).

The \textbf{Axiom of Choice} can be stated 
in any of several equivalent versions.
One of these versions is that every set can be \textbf{well-ordered:} 
that is, the set can be given a linear ordering 
(as defined on p.\ \pageref{lo} below) 
so that every nonempty subset has a least element 
(as in Theorem~\ref{thm:wo} on p.\ \pageref{thm:wo}).
However, we have not yet got a way to understand an ordering as a set.
An ordering is a kind of binary relation,
and a binary formula can be understood to define a binary relation.
But we cannot yet use our logical symbolism 
to say that such a relation \emph{exists.}
We shall be able to do so in the next section.
We shall use the Axiom of Choice:
\begin{compactitem}
\item
to establish 
that every set has a \emph{cardinality} (p.\ \pageref{cardinality});
\item
to prove Theorem~\ref{thm:PID->UFD},
that every \pid\ is a \ufd\ (p.\ \pageref{thm:PID->UFD});
\item
to prove Zorn's Lemma (p.\ \pageref{thm:ZL};
\item
hence to prove Stone's theorem on representations of Boolean rings 
(p.\ \pageref{thm:Stone}).
\end{compactitem}
The Axiom can also used to show: 
\begin{compactitem}
\item
that direct sums are not always the same as direct products (p.\ \pageref{ac});
\item
that nonprincipal ultraproducts of fields exist (p.\ \pageref{ac-up}).
\end{compactitem}

For the record, we have now named all of the axioms given by Zermelo in 1908:
\begin{inparaenum}[(I)]
\item
Extension,
\item
Elementary Sets,
\item
Separation,
\item
Power Set,
\item
Union,
\item
Choice, and
\item
Infinity.
\end{inparaenum}
Zermelo assumes that equality is identity: 
but his assumption is our Theorem~\ref{thm:=}.  
In fact Zermelo does not use logical formalism as we have.  
We prefer to define equality with \eqref{eqn:=} and \eqref{eqn:>=} 
and then use the Axioms of 
\begin{inparaenum}[(i)]
\item
the Empty Set,
\item
Equality,
\item
Adjunction,
\item
Separation,
\item
Union,
\item
Power Set,
\item
Infinity, and
\item
Choice.
\end{inparaenum}
But these two collections of definitions and axioms are logically equivalent.  

Apparently Zermelo overlooked one axiom, the \emph{Replacement Axiom,} 
which was supplied in 1922 by Skolem \cite{Skolem-some-remarks} and by Fraenkel.%%%%%
\footnote{I have not been able to consult Fraenkel's original papers.  
However, according to van Heijenoort \cite[p.~291]{MR1890980}, 
Lennes also suggested something like the Replacement Axiom at around the same time (1922) as Skolem and Fraenkel; but Cantor had suggested such an axiom in 1899.}  
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
We shall give this axiom in the next section.  

An axiom never needed in ordinary mathematics is the \emph{Foundation Axiom.}  Stated originally by von Neumann \cite{von-Neumann-ax}, it ensures that certain pathological situations, like a set containing itself, are impossible.  It does this by declaring that every nonempty set has an element that is disjoint from it: $\Forall x\Exists y(x\neq\emptyset\lto y\in x\land x\cap y=\emptyset)$.  We shall never use this.

The collection called $\zfc$ is Zermelo's axioms, along with Replacement and Foundation.  If we leave out Choice, we have what is called $\zf$.


\section{Functions and relations}\label{sect:f}

Given two sets $a$ and $b$, we define
\begin{equation*}
(a,b)=\bigl\{\{a\},\{a,b\}\bigr\}.
\end{equation*}
This set is the \textbf{ordered pair} 
whose first entry is $a$ and whose second entry is $b$.
The purpose of the definition is to make the following theorem true.

\begin{theorem}
Two ordered pairs are equal if and only if 
their first entries are equal and their second entries are equal:
\begin{equation*}\label{eqn:op}
(a,b)=(x,y)\liff a=x\land b=y.
\end{equation*}
\end{theorem}

If $A$ and $B$ are sets, then we define
\begin{equation*}
A\times B=\{z\colon\Exists x\Exists y(z=(x,y)\land x\in A\land y\in B)\}.
\end{equation*}
This is the \textbf{cartesian
  product}\index{cartesian product}
  of $A$ and $B$.

\begin{theorem}\label{thm:prod-set}
The cartesian product of two sets is a set.
\end{theorem}

\begin{proof}
If $a\in A$ and $b\in B$, 
then $\{a\}$ and $\{a,b\}$ are elements of $\pow{A\cup B}$,
so $(a,b)\in\pow{\pow{A\cup B}}$, and therefore
\begin{equation*}
A\times B\included\pow{\pow{A\cup B}}.\qedhere
\end{equation*}
\end{proof}

An \textbf{ordered triple}\index{ordered triple} $(x,y,z)$ 
can be defined as $\bigl((x,y),z\bigr)$, and so forth.

A \textbf{function}\index{function} or \textbf{map}\index{map} 
from $A$ to $B$ 
is a subset $f$ of $A\times B$ such that, 
for each $a$ in $A$, 
there is exactly one $b$ in $B$ such that $(a,b)\in f$.  
Then instead of $(a,b)\in f$, we write 
\begin{equation}\label{eqn:f}
  f(a)=b.
\end{equation}
We have then
\begin{equation*}
A=\{x\colon\Exists yf(x)=y\},
\end{equation*}
that is, $A=\{x\colon\Exists y(x,y)\in f\}$.
The set $A$ is called the \textbf{domain} of $f$.
A function is sometimes said to be a function \textbf{on} its domain.
For example, the function $f$ here is a function on $A$.
The \textbf{range} of $f$ is the subset
\begin{equation*}
\{y\colon\Exists xf(x)=y\}
\end{equation*}
of $B$.  
If this range is actually equal to $B$,
then we say that $f$ is \textbf{surjective onto} $B$,
or simply that $f$ is \textbf{onto} $B$.
Strictly speaking, it would not make sense to say $f$ was surjective or onto, simply.

A function $f$ is
\textbf{injective} or \textbf{one-to-one,} if
\begin{equation*}
\Forall x\Forall z(f(x)=f(z)\lto x=z).
\end{equation*}
The expression $f(x)=f(z)$ is an abbreviation of $\Exists y(f(x)=y\land f(z)=y)$,
which is another way of writing $\Exists y\bigl((x,y)\in f\land(z,y)\in f\bigr)$.
An injective function from $A$ \emph{onto} $B$ is a \textbf{bijection} from $A$ to $B$.

If it is not convenient to name a function with a single letter like $f$, 
we may write the function as
\begin{equation*}
x\mapsto f(x),
\end{equation*}
where the expression $f(x)$ would be replaced by some particular expression involving $x$.  
As an abbreviation of the statement that $f$ is a function from $A$ to $B$, 
we may write
\begin{equation}\label{eqn:f:B->A}
f\colon A\to B.
\end{equation}
Thus, while the symbol $f$ can be understood as a \emph{noun,} 
the expression $f\colon A\to B$ is a complete \emph{sentence.}  
If we say, ``Let $f\colon A\to B$,'' we mean
let $f$ be a function from $A$ to $B$.

If $f\colon A\to B$ and $D\included A$, then the subset $\{y\colon\Exists x(x\in D\land y=f(x)\}$ of $B$
can be written as one of%%%%%
\footnote{The notation $f(D)$ is also used, but the ambiguity is dangerous, 
at least in set theory as such.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{align*}
&\{f(x)\colon x\in D\},&
&f[D].
\end{align*}
This set is the \textbf{image} of $D$ under $f$.
Similarly, we can write
\begin{equation*}
A\times B=\{(x,y)\colon x\in A\land y\in B\}.
\end{equation*}
Then variations on this notation are possible.
For example, if $f\colon A\to B$ and $D\included A$, we can define
\begin{equation*}
f\restriction D=\{(x,y)\in f\colon x\in D\}.
\end{equation*}

\begin{theorem}
If $f\colon A\to B$ and $D\included A$, then 
\begin{equation*}
f\restriction D\colon D\to B
\end{equation*}
and, for all $x$ in $D$, $(f\restriction D)(x)=f(x)$.
\end{theorem}

If $f\colon A\to B$ and $g\colon B\to C$, 
then we can define
\begin{equation*}
g\circ f=\{(x,z)\colon\Exists y(f(x)=y\land g(y)=z)\};
\end{equation*}
this is called the \textbf{composite} of $(g,f)$.

\begin{theorem}\label{thm:composite}
If $f\colon A\to B$ and $g\colon B\to C$, then
\begin{equation*}
g\circ f\colon A\to C.
\end{equation*}
If also $h\colon C\to D$, then
\begin{equation*}
h\circ(g\circ f)=(h\circ g)\circ f.
\end{equation*}
\end{theorem}

We define
\begin{equation*}
\id_A=\{(x,x)\colon x\in A\};
\end{equation*}
this is the \textbf{identity} on $A$.

\begin{theorem}\label{thm:id}
$\id_A$ is a bijection from $A$ to itself.
If $f\colon A\to B$, then
\begin{align*}
f\circ\id_A&=f,&
\id_B\circ f&=f.
\end{align*}
\end{theorem}

If $f$ is a bijection from $A$ to $B$, we define
\begin{equation*}
f\inv=\{(y,x)\colon f(x)=y\};
\end{equation*}
this is the \textbf{inverse} of $f$.

\begin{theorem}\label{thm:inverses}
\mbox{}
\begin{compactenum}
\item
The inverse of a bijection from $A$ to $B$ is a bijection from $B$ to $A$.
\item
Suppose $f:A\to B$ and $g:B\to A$.  Then $f$ is a bijection from $A$ to $B$ whose inverse is $g$ if and only if
\begin{align*}
g\circ f&=\id_A,&f\circ g=\id_B.
\end{align*}
\end{compactenum}
\end{theorem}


In the definition of the cartesian product $A\times B$ 
and of a functions from $A$ to $B$,
we may replace the sets $A$ and $B$ with classes.
For example, we may speak of the function $x\mapsto\{x\}$ on the class of all sets.
If $\bm F$ is a function on some class $\bm C$,
and $A$ is a \emph{subset} of $\bm C$,
then by the \textbf{Replacement Axiom,}
the image $\bm F[A]$ is also a set.
For example, if we are given a function $n\mapsto G_n$ on $\upomega$, 
then by Replacement the class $\{G_n\colon n\in\upomega\}$ is a set.
Then the union of this class is a set, which we denote by
\begin{equation*}
\bigcup_{n\in\upomega}G_n.
\end{equation*}

A \textbf{singulary operation}\index{singulary} on $A$ is a function
from $A$ to itself; a \textbf{binary}\index{binary operation} on $A$
is a function 
from $A\times A$ to $A$.  A \textbf{binary relation} on $A$ is a
subset of $A\times A$; if $R$ is such, and $(a,b)\in R$, we often
write
\begin{equation*}\label{mathrel}
  a\mathrel Rb.
\end{equation*}
A singulary operation on $A$ is a particular kind of binary
relation on $A$; for such a relation, we already have the
special notation in~\eqref{eqn:f}.  
The reader will be familiar
with other kinds of binary relations, such as \emph{orderings.}
We are going to define a particular binary relation on p.\ \pageref{<} below
and prove that it is an ordering.

\section{An axiomatic development of the natural numbers}\label{sect:N}

In the preceding sections, we sketched an axiomatic approach to set theory.  
Now we start over with an axiomatic approach to the natural numbers alone.  
In the section after this,
we shall show that the set $\upomega$ 
does actually provide 
a \emph{model} of the axioms for natural numbers 
developed in the present section.

For the moment though, we forget the definition of $\upomega$.  
We forget about starting the natural numbers with $0$.  
Children learn to count starting with $1$, not $0$.  
Let us understand the natural numbers to compose \emph{some} set called $\N$. 
This set has
a distinguished \textbf{initial element,}\index{initial element}
which we call \textbf{one}\index{zero} and denote by
\begin{equation*}
1.
\end{equation*}
On the set $\N$ there is also
a distinguished singulary operation of
\textbf{succession,}\index{succession, successor} 
namely the operation
\begin{equation*}
n\mapsto n+1,
\end{equation*}
where $n+1$ is called the \textbf{successor} of $n$. 
Note that some other expression like $S(n)$ might be used for this successor.
For the moment, we have no binary operation called $+$ on $\N$.

I propose to refer to the ordered triple $(\N,1,n\mapsto n+1)$ as an
\emph{iterative structure.}
In general, by an \textbf{iterative structure,}\index{iterative} 
I mean any set that has a distinuished element and a distinguished singulary operation.  
Here the underlying set can be called 
the \textbf{universe}\index{universe} of the structure.  
For a simple notational distinction between a structure and its universe, 
if the universe is $A$, 
the structure itself might be denoted by a fancier version of this letter,
such as the Fraktur version $\str A$.  
See Appendix~\ref{app:German} (p.~\pageref{app:German}) for Fraktur versions,
and their handwritten forms, for all of the Latin letters.

 The
iterative structure $(\N,1,n\mapsto n+1)$ is
distinguished among iterative structures by satisfying the
following axioms.
\begin{compactenum}
\item\label{ax:0}
$1$ is not a successor: $1\neq n+1$.
\item\label{ax:inj}
Succession is injective: if $m+1=n+1$, then $m=n$.
\item\label{ax:ind}
The structure admits \textbf{proof by induction,}\index{induction} in
the following sense.  
Every subset $A$ of the universe must be the whole universe,
provided $A$ has the following two closure properties:
  \begin{compactenum}
  \item 
$1\in A$, and
\item
for all $n$, if $n\in A$, then $n+1\in A$.
  \end{compactenum}
\end{compactenum}

These axioms were discovered originally by
Dedekind~\cite[II, VI (71), p.~67]{MR0159773}; 
but they were written down also by Peano~\cite{Peano}, 
and they are often known as the \textbf{Peano axioms.}\index{Peano} 
  
Suppose $(A,b,f)$ is an iterative structure.  
If we successively compute $b$, $f(b)$, $f(f(b))$, $f(f(f(b)))$, and so on, 
either we always get a new element of $A$,
or we reach an element that we have already seen.
In the latter case,
if the first repeated element is $b$,
then the first Peano axiom fails.
If it is not $b$, then the second Peano axiom fails.
The last Peano axiom, the Induction Axiom,
would ensure that every element of $A$ was reached by our computations.  
None of the three axioms implies the others, 
although the Induction Axiom implies 
that exactly one of the other two axioms holds \cite{MR0120156}.
  
The following theorem will allow us to define all of the usual operations on $\N$.  
The theorem is difficult to prove.  
Not the least difficulty is seeing that the theorem \emph{needs} to be proved.%%%%%
\footnote{Peano did not see this need, but Dedekind did.  
Landau discusses the matter \cite[pp.~ix--x]{MR12:397m}.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\emph{Homomorphisms} will be defined generally on p.\ \pageref{hom},
but meanwhile we need a special case.
A \textbf{homomorphism} from $(\N,1,n\mapsto n+1)$ to an iterative structure $(A,b,f)$
is a function $h$ from $\N$ to $A$ such that
\begin{compactenum}[1)]
\item 
$h(1)=b$, and
\item
$h(n+1)=f(h(n))$ for all $n$ in $\N$.
\end{compactenum}

\begin{theorem}[Recursion]\label{thm:rec}
For every iterative structure, there is exactly one
homomorphism from
$(\N,1,n\mapsto n+1)$ to this structure.
\end{theorem}

\begin{proof}
Given an iterative structure $(A,b,f)$,
we seek a homomorphism $h$ from $(\N,1,x\mapsto n+1)$ to $(A,b,f)$.
Then $h$ will be a particular subset of $\N\times A$.
Let $B$ be the set whose elements are the subsets $C$ of $\N\times
A$ such that, if $(n,y)\in C$, then either 
\begin{compactenum}[1)]
\item 
$(n,y)=(1,b)$ or else
\item $C$ has an element
$(m,x)$ such that $(n,y)=(m+1,f(x))$.
\end{compactenum}
In particular, $\{(1,b)\}\in B$.
Also, if $C\in B$ and $(m,x)\in C$, then
\begin{equation*}
C\cup\{(m+1,f(x))\}\in B.
\end{equation*}
Let $R=\bigcup B$; so $R$ is a subset of $\N\times A$.  
We may say $R$ is a \emph{relation from $\N$ to $A$}.  
If $(n,y)\in R$, then (as suggested on p.\ \pageref{mathrel} above)
we may write also 
\begin{equation*}
n\mathrel Ry.  
\end{equation*}
Since $\{(1,b)\}\in
B$, we have $1\mathrel Rb$.  
Also, if $m\mathrel Rx$, then $(m,x)\in C$ for some $C$ in $B$, 
so $C\cup\{(m+1,f(x))\}\in B$, 
and therefore $(m+1)\mathrel R f(x)$.  
Thus $R$ is the desired function $h$,
provided $R$ is actually a \emph{function} from $\N$ to $A$.  
Proving that $R$ is a function from $\N$ to $R$ has two stages.
\begin{asparaenum}[1.]
  \item
Let $D$ be the set of all $n$ in $\N$ 
for which there is $y$ in $A$ such that $n\mathrel Ry$.
Then we have just seen that $1\in D$, and if $n\in D$, then $n+1\in D$.  
By induction, $D=\N$.
Thus if $R$ is a function, its domain is $\N$.
\item
Let $E$ be the set of all $n$ in $\N$ such that,
for all $y$ in $A$,
if $n\mathrel Ry$ and $n\mathrel Rz$, then $y=z$.
Suppose $1\mathrel R y$.  
Then $(1,y)\in C$ for some $C$ in $B$.  
Since $1$ is not a successor, we must have $y=b$, by definition of $B$.  
Therefore $1\in E$.  
Suppose $n\in E$, and $(n+1)\mathrel Ry$.  
Then $(n+1,y)\in C$ for some $C$ in $B$.  
Again since $1$ is not a successor, 
we must have
\begin{equation*}
(n+1,y)=(m+1,f(x))
\end{equation*}
for some $(m,x)$ in $C$.  
Since succession is injective, we must have $m=n$.  
Thus, $y=f(x)$ for some $x$ in $A$ such that $n\mathrel Rx$.
Since $n\in E$, we know $x$ is \emph{unique} such that $n\mathrel Rx$.  
Therefore $y$ is unique such that $(n+1)\mathrel Ry$.  
Thus $n+1\in E$.  
By induction, $E=\N$.
\end{asparaenum}

So $R$ is the desired function $h$.
Finally, $h$ is unique by induction.
\end{proof}

Note well that the proof uses all three of the Peano Axioms.
The Recursion Theorem is often used in the following form.

\begin{corollary}\label{cor:rec}
For every set $A$ with a distinguished element $b$, and for every function
$F$ from $\N\times B$ to $B$, there is a unique function $H$ from $\N$ to
$A$ such that
\begin{compactenum}[1)]
\item 
$H(1)=b$, and
\item
$H(n+1)=F(n,H(n))$ for all $n$ in $\N$.
\end{compactenum}
\end{corollary}

\begin{proof}
Let $h$ be the unique homomorphism from $(\N,1,n\mapsto n+1)$ to
$(\N\times A,(1,b),f)$, where $f$ is the operation
$(n,x)\mapsto(n+1,F(n,x)))$.  In particular, $h(n)$ is always an
ordered pair.  By induction, the 
first entry of $h(n)$ is always $n$; so there is a function $H$ from
$\N$ to $A$ such that $h(n)=(n,H(n))$.  Then $H$ is as desired.  By
induction, $H$ is unique.
\end{proof}

We can now use recursion to define, on $\N$,
%\begin{compactenum}[1)]
%  \item
the binary operation
\begin{equation*}
(x,y)\mapsto x+y
\end{equation*}
of \textbf{addition,}\index{addition} and 
%\item
the binary operation
\begin{equation*}
(x,y)\mapsto x\cdot y
\end{equation*}
of \textbf{multiplication.}\index{multiplication}
%\end{compactenum}
More precisely, for each $n$ in $\N$,
we recursively define the operations $x\mapsto n+x$ and $x\mapsto n\cdot x$.
The definitions are:
\begin{align}\label{eqn:+.}
&  \begin{gathered}
n+1=n+1,\\
n\cdot1=n,    
  \end{gathered}&
&  \begin{gathered}
    n+(m+1)=(n+m)+1,\\
n\cdot(m+1)=n\cdot m+n.
  \end{gathered}
\end{align}
The definition of addition might also be written as $n+1=S(n)$ and $n+S(m)=S(n+m)$.
In place of $x\cdot y$, we often write $xy$.
 
\begin{lemma}\label{lem:+}
For all $n$ and $m$ in $\N$,
\begin{align*}
  1+n&=n+1,&(m+1)+n&=(m+n)+1.
\end{align*}
\end{lemma}

\begin{proof}
  Induction.
\end{proof}

\begin{theorem}\label{thm:N-comm}
Addition on $\N$ is
  \begin{compactenum}[1)]
  \item 
\textbf{commutative:}\index{commutative} $n+m=m+n$; and
\item
\textbf{associative:}\index{associative} $n+(m+k)=(n+m)+k$.
  \end{compactenum}
\end{theorem}

\begin{proof}
  Induction and the lemma.
\end{proof}

\begin{theorem}\label{thm:cancel}
  Addition on $\N$ allows \textbf{cancellation:}\index{cancellation}
if $n+x=n+y$, then $x=y$.
\end{theorem}

\begin{proof}
  Induction, and injectivity of succession.
\end{proof}

The analogous proposition for multiplication is Corollary~\ref{cor:mulcan} below.

\begin{lemma}\label{lem:.}
For all $n$ and $m$ in $\N$,
\begin{align*}
  1\cdot n&=n,&(m+1)\cdot n&=m\cdot n+n.
\end{align*}
\end{lemma}

\begin{proof}
  Induction.
\end{proof}

\begin{theorem}\label{thm:mult-comm}
Multiplication on $\N$ is
  \begin{compactenum}[1)]
  \item 
commutative: $nm=mn$;
\item
\textbf{distributive}\index{distributive} over addition: $n(m+k)=nm+nk$; and
\item
associative: $n(mk)=(nm)k$.
  \end{compactenum}
\end{theorem}

\begin{proof}
  Induction and the lemma.
\end{proof}

Landau \cite{MR12:397m} proves \emph{using induction alone} 
that $+$ and $\cdot$ exist 
as given by the recursive definitions above.  
However, Theorem~\ref{thm:cancel} needs more than induction.  
So does the existence of the \textbf{factorial}\label{factorial} function
defined by
\begin{align*}
1!&=1,&(n+1)!&=n!\cdot(n+1).
\end{align*}
So does \textbf{exponentiation,}\index{exponentiation} defined by
\begin{align*}
n^1&=n,&n^{m+1}&=n^m\cdot n.
\end{align*}

The usual ordering $<$ of $\N$ is defined recursively as follows.
First note that $m\leq n$ means simply $m<n$ or $m=n$.  
Then the definition of $<$\label{<} is:
\begin{compactenum}[1)]
\item 
$m\not<1$ (that is, $\lnot\;m<1$);% for \emph{no} $m$ in $\N$;
\item
$m<n+1$ if and only if $m\leq n$.
\end{compactenum}
In particular, $n<n+1$.
Really, it is the sets $\{x\in\N\colon x<n\}$ that are defined by
recursion:
\begin{gather*}
\{x\in\N\colon x<1\}=\emptyset,\\
\{x\in\N\colon x<n+1\}=\{x\in\N\colon x<n\}\cup\{n\}=\{x\in\N\colon x\leq n\}.	
\end{gather*}
We now have $<$ as a binary relation on $\N$;
we must \emph{prove} that it is an ordering.

\begin{theorem}\label{thm:<trans}
  The relation $<$ is \textbf{transitive}\index{transitive} on $\N$,
  that is, if $k<m$ 
  and $m<n$, then $k<n$.
\end{theorem}

\begin{proof}
  Induction on $n$.
\end{proof}

\begin{theorem}\label{thm:<irr}
  The relation $<$ is \textbf{irreflexive}\index{irreflexive} on $\N$:
  $m\not<m$. 
\end{theorem}

\begin{proof}
Since every element $k$ of $\N$ is less than some other element (namely $k+1$), 
it is enough to prove
\begin{equation*}
k<n\lto k\not<k.
\end{equation*}
We do this by induction on $n$.
The claim is vacuously true when $n=1$.
Suppose it is true when $n=m$.
If $k<m+1$, then $k<m$ or $k=m$.
If $k<m$, then by inductive hypothesis $k\not<k$.
If $k=m$, but $k<k$, then $k<m$,
so again $k\not<k$.
Thus the claim holds when $n=m+1$.
By induction, it holds for all $n$.
\end{proof}

\begin{lemma}\label{lem:1leqm}
  $1\leq m$.
\end{lemma}

\begin{proof}
  Induction.
\end{proof}

\begin{lemma}\label{lem:k<m}
If $k<m$, then $k+1\leq m$.
\end{lemma}

\begin{proof}
The claim is vacuously true when $m=1$.  
Suppose it is true when $m=n$.  
Say $k<n+1$.  
Then $k\leq n$.  
If $k=n$, then $k+1=n+1$, so $k+1\leq n+1$.  
If $k<n$, then $k+1\leq n$ by inductive hypothesis,
so $k+1<n+1$ by transitivity (Theorem~\ref{thm:<trans}), and therefore $k+1\leq n+1$.  
Thus the claim holds when $m=n+1$.
By induction, the claim holds for all $m$.
\end{proof}

\begin{theorem}\label{thm:<tot}
  The relation $<$ is \textbf{total}\index{total} on $\N$: either
  $k\leq m$ or 
  $m<k$.
\end{theorem}

\begin{proof}
By Lemma~\ref{lem:1leqm}, the claim is true when $k=1$.
Suppose it is true when $k=\ell$.
If $m\not<\ell+1$, then $m\nleq\ell$.
In this case, we have both $m\neq\ell$ and $m\not<\ell$.
Also, by the inductive hypothesis, $\ell\leq m$, so $\ell<m$,
and hence $\ell+1\leq m$ by Lemma~\ref{lem:k<m}.
\end{proof}

Because of Theorems~\ref{thm:<trans},~\ref{thm:<irr},
and~\ref{thm:<tot}, the relation $<$ is a \textbf{linear ordering}\label{lo} of $\N$,
and $\N$ is \textbf{linearly ordered}\index{order}\index{strict} by $<$. 

\begin{theorem}\label{thm:m+x=n}
  For all $m$ and $n$ in $\N$, we have $m<n$ if and only if the
  equation
  \begin{equation}\label{eqn:m+x=n}
    m+x=n
  \end{equation}
is soluble in $\N$.
\end{theorem}

\begin{proof}
  By induction on $k$, if $m+k=n$, then $m<n$.  We prove the converse by induction on $n$.  We never have $m<1$.  Suppose for some $r$ that, for all $m$, if $m<r$, then the equation $m+x=r$ is soluble.  Suppose also $m<r+1$.  Then $m<r$ or $m=r$.  In the former case, by inductive hypothesis, the equation $m+x=r$ has a solution $k$, and therefore $m+(k+1)=r+1$.  If $m=r$, then $m+1=r+1$.  Thus the equation $m+x=r+1$ is soluble whenever $m<r+1$.
By
induction, for all $n$ in $\N$, if $m<n$, then~\eqref{eqn:m+x=n}
is soluble in $\N$. 
\end{proof}

\begin{theorem}\label{thm:N<}
If $k<\ell$, then
\begin{align*}
  k+m&<\ell+m,&
km&<\ell m.
\end{align*}
\end{theorem}

Here the first conclusion is a refinement of Theorem~\ref{thm:cancel}; the second yields the following analogue of Theorem~\ref{thm:cancel} for multiplication.

\begin{corollary}\label{cor:mulcan}
  If $km=\ell m$, then $k=\ell$.
\end{corollary}

\begin{theorem}\label{thm:wo}
  $\N$ is well-ordered\index{well ordered} by $<$: every
  nonempty set of natural 
  numbers has a least element.
\end{theorem}

\begin{proof}
  Suppose $A$ is a set of natural numbers with no least element.  Let
  $B$ be the set of natural numbers $n$ such that, if $m\leq n$, then
  $m\notin A$.  Then
  $1\in B$, since otherwise $1$ would be the least
  element of $A$.  Suppose $m\in B$.  Then $m+1\in B$, since otherwise
  $m+1$ would be the least element of $A$.  By induction, $B=\N$, so
  $A=\emptyset$. 
\end{proof}

The members of $\N$ are the \textbf{positive integers;}\label{no-th}
the full set $\Z$ of \emph{integers} will be defined formally 
in \S\ref{sect:ZQ} below, on p.\ \pageref{Z}.
As presented in Books VII--IX of Euclid's \emph{Elements,}
number theory is a study of the positive integers;
but a consideration of all integers is useful in this study,
and the integers that will constitute a motivating example,
first of a group (p.\ \pageref{Z-as-group}), 
and then of a ring (p.\ \pageref{Z-as-ring}).
Fundamental topics of number theory developed in the main text are:
\begin{compactitem}
\item
greatest common divisors, the Euclidean algorithm,
and numbers prime to one another (sub-\S\ref{subsect:gen-sub}, p.\ \pageref{gcd});
\item
prime numbers, Fermat's Theorem, and Euler's generalization of this (\S\ref{sect:Lagrange}, p.\ \pageref{prime-number});
\item
Chinese Remainder Theorem, primitive roots (\S\ref{sect:fgag}, p.\ \pageref{thm:CRT});
\item
Euclid's Lemma (\S\ref{sect:factor}, p.\ \pageref{thm:Euc-Lem});
\item
the Fundamental Theorem of Arithmetic (\S\ref{sect:int-dom}, p.\ \pageref{thm:FTA}).
\end{compactitem}


\section{A construction of the natural numbers}\label{sect:omega}

For an arbitrary set $a$, let
\begin{equation*}
a'=a\cup\{a\}.
\end{equation*}
If $A$ belongs to the class $\It$ defined in \eqref{eqn:upomega-defn} on p.\ \pageref{eqn:upomega-defn},
then $0\in A$, and $A$ is closed under the operation $x\mapsto x'$,
and so $(A,0,{}')$ is an iterative structure.
In particular, by the Axiom of Infinity,
$\upomega$ is a set,
so $(\upomega,0,{}')$ is an iterative structure.

\begin{theorem}
The structure $(\upomega,0,{}')$ satisfies the Peano Axioms.
\end{theorem}

\begin{proof}
There are three things to prove.
\begin{asparaenum}
\item
In $(\upomega,0,{}')$, the initial element $0$ is not a successor,
because for all sets $a$, the set $a'$ contains $a$, so it is nonempty.
\item
$(\upomega,0,{}')$ admits induction, because,
if $A\included\upomega$, and $A$ contains $0$ and is closed under $x\mapsto x'$,
then $A\in\It$, so $\bigcap\It\included A$, that is, $\upomega\included A$.
\item
It remains to establish that $x\mapsto x'$ is injective on $\upomega$.
On p.\ \pageref{<}, we used recursion to define a relation $<$ on $\N$ so that
\begin{align}\label{eqn:mnot1}
m&\not<1,& m<n+1&\liff m<n\lor m=n.
\end{align}
Everything that we proved about this relation required only these properties, and induction.
On $\upomega$, we do not know whether we have recursion,
but we have \eqref{eqn:mnot1} when $<$ is $\in$ and $1$ is $0$: that is, we have
\begin{align*}
m&\notin0,&m\in n'&\liff m\in n\lor m=n.
\end{align*}
Therefore $\in$ must be a linear ordering of $\upomega$, by the proofs in the previous section.
We also have Lemma~\ref{lem:k<m} for $\in$, 
that is, if $n$ in $\upomega$, and $m\in n$, then either $m'=n$ or $m'\in n$.
In either case, $m'\in n'$.
Thus, if $m\neq n$, then either $m\in n$ or $n\in m$, and so $m'\in n'$ or $n'\in m'$,
and therefore $m'\neq n'$.\qedhere
\end{asparaenum}
\end{proof}

Given sets $A$ and $B$, we define
\begin{equation*}
A\setminus B=\{x\in A\colon x\notin B\}.
\end{equation*}
As a corollary of the foregoing theorem, we have that the iterative structure $(\upomega\setminus\{0\},1,{}')$ also satisfies the Peano Axioms.
We may henceforth assume that $(\N,1,x\mapsto x+1)$ is this structure.
In particular,
\begin{equation*}
\N=\upomega\setminus\{0\}.
\end{equation*}
Thus we no longer need the Peano Axioms as axioms;
they are theorems about $(\N,1,x\mapsto x+1)$ and $(\upomega,0,{}')$.

We extend the definitions of addition and multiplication on $\N$ to allow their arguments to be $0$:
\begin{align*}
n+0&=n=0+n,&n\cdot0&=0=0\cdot n.
\end{align*}

\begin{theorem}
Addition and multiplication are commutative and associative on $\upomega$,
and multiplication distributes over addition.
\end{theorem}

In particular, the equations \eqref{eqn:+.} 
making up the recursive definitions of addition and multiplication on $\N$ 
are still valid on $\upomega$.
The same goes for factorials and exponentiation when we define\label{0factorial}
\begin{align*}
0!&=1,&n^0&=1.
\end{align*}
 

\section{Structures}\label{sect:structures}

For us, the point of using the von-Neumann definition of the natural numbers 
is that, under
this definition, a natural number $n$ 
is a particular set, namely $\{0,\dots,n-1\}$, with $n$ elements.
We denote the set of functions from a set $B$ to a set $A$ by\label{A^B}
\begin{equation*}
  A^B.
\end{equation*}
In particular then, $A^n$ is the set of functions from
$\{0,\dots,n-1\}$ into $A$.  We can denote such a function by one of
\begin{align*}
&(x_0,\dots,x_{n-1}),&
&(x_i\colon i<n),
\end{align*}
so that 
\begin{equation*}
A^n=\{(x_0,\dots,x_{n-1})\colon x_i\in A\}.
\end{equation*}
Thus, $A^2$ can be identified with $A\times A$, and $A^1$ with $A$
itself.  There is exactly one function from $0$ to $A$, namely $0$; so
\begin{equation*}
  A^0=\{0\}=1.
\end{equation*}
An $n$-ary \textbf{relation}\index{relation} on $A$ is a subset of $A^n$;
an \textbf{$n$-ary}\index{n-ary@$n$-ary}
\textbf{operation}\index{operation} on $A$ is a function from $A^n$ to
$A$.  Relations and operations that are $2$-ary, $1$-ary, or $0$-ary
can be called 
\textbf{binary,}\index{binary} \textbf{singulary,}\index{singulary}
or \textbf{nullary,}\index{nullary} respectively; after the appropriate
identifications, this agrees with the terminology used in
\S \ref{sect:f}. 
A nullary operation on $A$ can be identified with an element of $A$.  

Generalizing the terminology used at the beginning of \S \ref{sect:N},
we define a \textbf{structure}\index{structure} as a set
together with some distinguished relations and operations on the set;
as before, the set is the \textbf{universe}\index{universe} of the
structure.  Again, if the 
universe is $A$, then
the whole structure might be denoted by $\str A$; if $B$, then $\str
B$.

The \textbf{signature}\index{signature}\label{signature} of a structure comprises a
symbol for each 
distinguished relation and operation of the structure.  For example,
we have so far obtained $\N$ as a structure in the signature $\{1,+,\cdot,<\}$.
We may then write out this structure as
\begin{equation*}
(\N,1,+,\cdot,<).
\end{equation*}
In this way of writing the structure,
an expression like $+$ stands not for the \emph{symbol} of addition,
but for the actual operation on $\N$.
In general, if $s$ is a symbol of the signature of $\str A$, then the
corresponding relation or operation on $A$ can, for precision, 
be denoted by $s^{\str A}$,
in case there is another structure around with the same signature.
We use this notation in writing the next definition,
and later on p.\ \pageref{interpretation}.

A \textbf{homomorphism}\label{hom} 
from a structure $\str A$ to a structure $\str
B$ of the same signature is a function $h$ from $A$ to $B$ that
\emph{preserves} the distinguished relations and operations: this
means
\begin{gather}\notag
  h(f^{\str A}(x_0,\dots,x_{n-1}))=f^{\str
    B}(h(x_0),\dots,h(x_{n-1})),\\\label{eqn:hom}
(x_0,\dots,x_{n-1})\in R^{\str A}\lto(h(x_0),\dots,h(x_{n-1}))\in
  R^{\str B},
\end{gather}
for all $n$-ary operation-symbols $f$ and relation-symbols $R$ of the
signature, for all $n$ in $\upomega$.  
To indicate that $h$ is a homomorphism from $\str A$ to $\str B$, we may write
\begin{equation*}
h\colon\str A\to\str B
\end{equation*}
(rather than simply $h\colon A\to B$).
We have already seen a special case of a homomorphism in the Recursion Theorem
(Theorem~\ref{thm:rec} on p.\ \pageref{thm:rec} above).

\begin{theorem}\label{thm:hom-comp}
If $h\colon\str A\to\str B$ and $g\colon\str B\to\str C$, then
\begin{equation*}
g\circ h\colon\str A\to\str C.
\end{equation*}
\end{theorem}

A homomorphism is an
\textbf{embedding}\index{embedding} if it is injective and if the converse
of~\eqref{eqn:hom} also holds.  A surjective embedding is an
\textbf{isomorphism.}\index{isomorphism}

\begin{theorem}\label{thm:isom-inv}
The function $\id_A$ is an isomorphism from $\str A$ to itself.
The following are equivalent conditions 
on a bijective homomorphism $h$ from $\str A$ to $\str B$:
\begin{compactenum}[1)]
\item
$\str B$ is an isomorphism from $\str A$ to $\str B$,
\item
$h\inv$ is a homomorphism from $\str B$ to $\str A$,
\item
$h\inv$ is an isomorphism from $\str B$ to $\str A$.
\end{compactenum}
\end{theorem}

If there is an isomorphism from a structure $\str A$ to a structure $\str B$,
then these two structures are said to be \textbf{isomorphic} to one another,
and we may write
\begin{equation*}
\str A\cong\str B.
\end{equation*}
In this case $\str A$ and $\str B$ are indistinguishable as structures,
and so (out of laziness perhaps) we may \emph{identify}\label{identify} them,
treating them as the \emph{same} structure.
We have already done this, in a sense, 
with $(\N,1,x\mapsto x+1)$ and $(\upomega\setminus\{0\},1,{}')$.
However, we never actually had a set called $\N$,
until we identified it with $\upomega\setminus\{0\}$.
 
A \textbf{substructure}\index{substructure} of a structure $\str B$ 
is a structure $\str A$ of the
same signature such that $A\included B$ 
and the \textbf{inclusion} $x\mapsto x$ of $A$ in
$B$ is an embedding of $\str A$ in $\str B$.
 
\textbf{Model theory} studies structures as such.
\textbf{Universal algebra} studies \textbf{algebras,}\label{algebra}
which are sets with distinguished operations,
but no distinguished relations (except for equality). 
In other words, an algebra is a structure in a signature with no symbols for relations (except equality).

We shall study mainly the algebras called \emph{groups} 
and the algebras called \emph{rings.}
Meanwhile, we have the algebra $(\N,1,+,\cdot)$,
and we shall have more examples in the next section.
 
A \textbf{reduct}\label{reduct} of a structure is obtained by ignoring some of its operations and relations, while the universe remains the same.
The original structure is then an \textbf{expansion} of the reduct.
For example, $(\N,+)$ is a reduct of $(\N,+,\cdot,<)$,
and the latter is an expansion of the former.

\section{Constructions of the integers and rationals}\label{sect:ZQ}
 
The following theorem is an example of something like \emph{localization,} 
which will be the topic of \S\ref{sect:loc} (p.~\pageref{sect:loc}).  
One learns the theorem implicitly in school, 
when one learns about fractions (as on p.\ \pageref{1/2} above).
Perhaps fractions are our first encounter with nontrivial equivalence-classes.

Let $\approx$ be the binary relation on $\N\times\N$ given by%%%%%
\footnote{As a binary relation on $\N\times\N$, 
the relation $\approx$ is a subset of $(\N\times\N)^2$, 
which we identify with $\N^4$.}
\begin{equation}\label{eqn:approx}
  (a,b)\approx(x,y)\liff ay=bx.
\end{equation}

\begin{lemma}\label{lem:approx}
The relation $\approx$ on $\N\times\N$ is an equivalence-relation.   
\end{lemma}

If $(a,b)\in\N\times\N$,
let its equivalence-class with respect to $\approx$
be denoted by $a/b$ or
\begin{equation*}
  \frac ab.
\end{equation*}
Let the set of all such equivalence-classes be denoted by
\begin{equation*}
  \Qp.
\end{equation*}  
This set comprises the \textbf{positive rational numbers.}

\begin{theorem}\label{thm:Qp}
There are well-defined operations $+$, ${}\inv$, and $\cdot$ on $\Qp$
given by the rules
\begin{gather*}
\frac ab+\frac xy=\frac{ay+bx}{by},\\
\left(\frac xy\right)\inv=\frac yx,\\
\frac ab\cdot\frac xy=\frac{ax}{by}.
\end{gather*}
There is a linear ordering $<$ of $\Qp$ given by
\begin{equation*}
\frac ab<\frac xy\liff ay<bx.
\end{equation*}
The structure $(\N,+,\cdot,<)$ embeds in $(\Qp,+,\cdot,<)$ under the
map $x\mapsto x/1$.  
Addition and multiplication are commutative and associative on $\Qp$, 
and multiplication distributes over addition.
Moreover,
\begin{align}\label{eqn:Qp-group}
  \frac11\cdot\frac xy&=\frac xy,&
  \left(\frac xy\right)\inv\cdot\frac xy&=\frac11,
\end{align}
Finally,
\begin{equation}\label{eqn:Qp-ordered}
  \frac11<\frac ab\land\frac11<\frac xy\lto\frac11<\frac ab\cdot\frac xy.
\end{equation}
\end{theorem}

The operations on $\Qp$ in the theorem are said to be \emph{well defined}
because it is not immediately obvious that they exist at all.
It is possible that $a/b=c/d$ although $(a,b)\neq(c,d)$.
In this case one must check that (for example) $(ay+bx)/(by)=(cy+dx)/(dy)$.
See p.\ \pageref{well-defined} below.

Because multiplication is commutative and associative on $\Qp$,
and also \eqref{eqn:Qp-group} holds,
the structure $(\Qp,1/1,{}\inv,\cdot)$ is an \textbf{abelian group.}
Because in addition $\Qp$ is linearly ordered by $<$,
and \eqref{eqn:Qp-ordered} holds,
the structure $(\Qp,1/1,{}\inv,\cdot,<)$ is an \textbf{ordered group.}

In the theorem, the natural number $n$ is \emph{not} a rational number, 
but $n/1$ is a rational number.
However, we henceforth \emph{identify} $n$ and $n/1$:
we treat them as the same thing.
Then we have $\N\included\Qp$.

In the definition \eqref{eqn:approx} of $\approx$, 
if we replace multiplication with addition,
then instead of the positive rational numbers,
we obtain the \emph{integers.}
Probably this construction of the integers is not learned in school.
If it were, possibly students would never think 
that $-x$ is automatically a negative number.
In any case, by applying this construction of the integers
to the positive rational numbers, 
we obtain all of the rational numbers as follows.
Let $\sim$ be the binary relation on
$\Qp\times\Qp$ given by
\begin{equation}\label{eqn:sim}
  (a,b)\sim(x,y)\liff a+y=b+x.
\end{equation}

\begin{lemma}
The relation $\sim$ on $\Qp\times\Qp$ is an equivalence-relation.    
\end{lemma}

If $(a,b)\in\Qp\times\Qp$, 
let its equivalence-class with respect to $\sim$ be denoted by
\begin{equation*}
  a-b.
\end{equation*}
Let the set of such equivalence-classes be denoted by
\begin{equation*}
  \Q.
\end{equation*}
 
\begin{theorem}\label{thm:Q}
There are well-defined operations $-$, $+$, and $\cdot$ on $\Q$ 
given by the rules
\begin{gather*}
-(x-y)=y-x,\\
(a-b)+(x-y)=(a+x)-(b+y),\\
(a-b)\cdot(x-y)=(ax+by)-(ay+bx).
\end{gather*}
There is a dense linear ordering $<$ of $\Q$ given by
\begin{equation*}
  a-b<x-y\liff a+y<b+x.
\end{equation*}
The structure $(\Qp,+,\cdot,<)$ embeds in $(\Q,+,\cdot,<)$ 
under the map $x\mapsto(x+1)-1$.  
The structure $(\Q,1-1,-,+,<)$ is an ordered group.
Moreover, multiplication is also commutative and associative on $\Q$,
and it distributes over addition.
\end{theorem}

We identify $\Qp$ with its image in $\Q$.
Now we can refer to the elements of $\Q$ as the \textbf{rational numbers.}
We denote $1-1$ by
\begin{equation*}
  0.
\end{equation*}
Then $\Qp=\{x\in\Q\colon0<x\}$.  We can now define\label{Z}
\begin{equation*}
  \Z=\{x-y\colon(x,y)\in\N\times\N\};
\end{equation*}
this is the set of \textbf{integers.}

\begin{theorem}\label{thm:Z->Q}
The structure $(\Z,0,-,+,1,\cdot,<)$ is a well-defined substructure of 
$(\Q,0,-,+,1,\cdot,<)$.
The structure $(\Z,0,-,+,<)$ is an ordered group.
\end{theorem}

We can also think of $\Q$ as arising from $\Z$ 
by the same construction that gives us $\Qp$ from $\N$.
This gives us the following.

\begin{theorem}
There is a surjective function $(x,y)\mapsto x/y$ 
from the product $\Z\times(\Z\setminus\{0\})$ to $\Q$ such that
\begin{gather*}
\frac ab+\frac xy=\frac{ay+bx}{by},\\
1=\frac11,\\
\frac ab\cdot\frac xy=\frac{ax}{by}.
\end{gather*}
Then
\begin{equation*}
\frac ab<\frac xy\liff ay<bx.
\end{equation*}
There is an operation $x\mapsto x\inv$ on $\Q\setminus\{0\}$ given by
\begin{equation*}
\left(\frac xy\right)\inv=\frac yx.
\end{equation*}
Then $(\Q\setminus\{0\},1,{}\inv,\cdot)$ is a commutative group.
Finally,
\begin{equation}\label{eqn:Q-ord}
0<x\land 0<y\lto 0<x\cdot y.
\end{equation}
\end{theorem}

Because $(\Q,0,-,1,<)$ is an ordered group,
and $(\Q\setminus\{0\},1,{}\inv,\cdot)$ is a commutative group,
and multiplication distributes over addition in $\Q$,
and \eqref{eqn:Q-ord} holds,
the structure $(\Q,0,-,+,1,\cdot,<)$ is an \textbf{ordered field.}
However, the ordering of $\Q$ is not \textbf{complete,} that is,
there are subsets with upper bounds, 
but no \emph{suprema} (least upper bounds).
An example is the set $\{x\in\Q\colon 0<x\land x^2<2\}$.
 
\section{A construction of the reals}\label{sect:R}
 
There is a technique due to Dedekind for completing $(\Q,<)$
to obtain the completely ordered set $(\R,<)$.
As Dedekind says explicitly \cite[pp.~39--40]{MR0159773},
the original inspiration for the technique
is the definition of \emph{proportion}
found in Book V of Euclid's \emph{Elements.}

In the geometry of Euclid, 
let us refer to the collection of straight lines 
that are equal to a given straight line 
(in the sense of p.\ \pageref{line} above) 
as the \emph{length} of that straight line.
Two lengths of straight lines can be \emph{added} together
by taking two particular lines with those lengths and setting them end to end.
Then lengths of straight lines 
compose the set of positive elements of an ordered group. 
Therefore individual lengths can be \emph{multiplied,}
that is, taken several times.
Indeed, if $A$ is a length, and $n\in\N$, 
we can define the multiple $nA$ of $x$ recursively:
\begin{align*}
  1A&=A,&(n+1)A=nA+A.
\end{align*}
It is assumed that, for any two lengths $A$ and $B$,
some multiple of $A$ is greater than $B$:
this is the \textbf{archimedean property.}
If $C$ and $D$ are two more lengths,
then $A$ has to $B$ the \emph{same ratio} that $C$ has to $D$,
provided that, for all $k$ and $m$ in $\N$,
\begin{equation*}
  kA>mB\liff kC>mD.
\end{equation*}
In this case, the four lengths $A$, $B$, $C$, and $D$ are \emph{proportional,} 
and we may write
\begin{equation*}
  A:B:\;:C:D.
\end{equation*}
We can write the condition for this proportionality as
\begin{equation*}
  \left\{\frac xy\in\Qp\colon xB<yA\right\}
=\left\{\frac xy\in\Qp\colon xD<yC\right\}
\end{equation*}
Dedekind's observation is that such sets can be defined
independently of all geometrical considerations.
Indeed, we may define a \textbf{positive real number}
as a nonempty, proper subset $C$ of $\Qp$ such that
\begin{compactenum}[1)]
\item
if $a\in C$ and $b\in\Qp$ and $b<a$, then $b\in C$, and
\item
if $C$ has a supremum in $\Qp$, this supremum does not belong to $C$.  
\end{compactenum}
Let the set of all positive real numbers be denoted by
\begin{equation*}
  \Rp.
\end{equation*}

\begin{theorem}\label{thm:R-complete}
The set $\Rp$ is completely ordered by proper inclusion.
There are well-defined operations $+$, ${}\inv$, and $\cdot$ on $\Qp$
given by the rules
\begin{gather*}
	C+D=\{x+y\colon x\in C\land y\in D\},\\
	C\inv=\{x\inv\colon x\in\Qp\land\Exists y(y\in\Qp\setminus C\land y<x)\},\\
	C\cdot D=\{x\cdot y\colon x\in C\land y\in D\}.
\end{gather*}
Then $(\Qp,+,{}\inv,\cdot)$ embeds in $(\Rp,+,{}\inv,\cdot)$
under the map $y\mapsto\{x\in\Qp\colon x<y\}$.
\end{theorem}

Let us identify $\Qp$ with its image in $\Rp$.
We may also write $\pincluded$ on $\Rp$ as $<$.

For every $n$ in $\upomega$, 
an $n$-ary operation $f$ on $\Rp$ is \textbf{continuous}
if, for every $(A_i\colon i<n)$ in $(\Rp)^n$,
for every $\epsilon$ in $\Qp$, 
there is $(\delta_i\colon i<n)$ in $(\Qp)^n$ such that, 
for all $(X_i\colon i<n)$ in $(\Rp)^n$, if
\begin{equation*}
\bigwedge_{i<n}A_i-\delta_i<X_i<A_i+\delta_i,
\end{equation*}
then
\begin{equation*}
f(A_i\colon i<n)-\epsilon<f(X_i\colon i<n)<f(A_i\colon i<n)+\epsilon.
\end{equation*}

\begin{theorem}
The operations $+$, ${}\inv$, and $\cdot$ on $\Rp$ are continuous.
Every composite of continuous functions on $\Rp$ is continuous.
\end{theorem}

\begin{lemma}
The only continuous singulary operation on $\Rp$ that is $1$ on $\Q$ 
is $1$ everywhere.
\end{lemma}

\begin{theorem}%\label{thm:R-of}
The structure $(\Rp,1,{}\inv,\cdot,<)$ 
is an ordered group,
and addition is commutative and associative on $\Rp$,
and multiplication distributes over addition on $\Rp$.
\end{theorem}

Now define $\sim$ on $\Rp\times\Rp$ as in~\eqref{eqn:sim}.
Just as before, this is an equivalence relation.
The set of its equivalence-classes is denoted by
\begin{equation*}
  \R.
\end{equation*}
Just as before, we obtain the ordered field $(\R,0,-,+,{}\inv,\cdot,<)$.
But now, the ordering is complete.
We identify $\Rp$ with its image in $\R$.
The elements of $\R$ are the \textbf{real numbers.}

\begin{lemma}
  For every $n$ in $\N$, 
for every element $A$ of a completely and densely ordered group,
the equation
\begin{equation*}
  nX=A
\end{equation*}
is soluble in the group.
\end{lemma}

\begin{theorem}
Suppose $(G,0,-,+,<)$ is a completely and densely ordered group,
and $u$ is a positive element of $G$, 
and $b$ is an element of $\Rp$ such that $1<b$.
Then there is an isomorphism from $(G,0,-,+,<)$ to $(\Rp,1,{}\inv,\cdot,<)$
taking $u$ to $b$. 
\end{theorem}

By this theorem, 
the completely ordered groups $(\R,0,-,+,<)$ and $(\Rp,1,{}\inv,\cdot,<)$ 
are isomorphic,
and indeed for every $b$ in $\Rp$ such that $b>1$,
there is an isomorphism taking $1$ to $b$.
This isomorphism is denoted by
\begin{equation*}
  x\mapsto b^x,
\end{equation*}
and its inverse is
\begin{equation*}
  x\mapsto\log_bx.
\end{equation*}

\begin{theorem}[Intermediate Value Theorem]
If $f$ is a continuous singulary operation on $\R$, and $f(a)\cdot f(b)<0$, 
then $f$ has a zero between $a$ and $b$.
\end{theorem}

Hence for example the function $x\mapsto x^2-2$ 
must have a zero in $\R$ between $1$ and $2$.
More generally,
if $A\included\R$, then the set of \emph{polynomial functions over $A$} 
is obtained from the set of constant functions taking values in $A$, 
along with $-$, $+$, $\cdot$, and the projections $(x_0,\dots,x_{n-1})\mapsto x_i$, 
by closing under taking composites.  
Then all polynomial functions over $\R$ are continuous, 
and so the Intermediate Value Theorem applies to the singulary polynomial functions.
Therefore the ordered field $\R$ is said to be \textbf{real-closed.}
However, there are smaller real-closed ordered fields:
we establish this in the next section.


\section{Countability}\label{sect:count}

A set is \textbf{countable} if it embeds in $\upomega$; 
otherwise the set is \textbf{uncountable.}

\begin{theorem}
The sets $\N$, $\Z$, and $\Q$ are all countable.
\end{theorem}

\begin{theorem}\label{thm:pow-un}
$\pow{\upomega}$ is uncountable.
\end{theorem}

\begin{proof}
Suppose $f$ is an injection from $\upomega$ to $\pow{\upomega}$.
Then the subset $\{x\colon x\notin f(x)\}$ of $\upomega$ is not in the range of $f$,
by a variant of the Russell Paradox:
if $\{x\colon x\notin f(x)\}=f(a)$, then $a\in f(a)\liff a\notin f(a)$.
\end{proof}

\begin{theorem}
The set $\R$ is uncountable.
\end{theorem}

\begin{proof}
We shall use the notation whose properties 
will be established in sub-\S\ref{subsect:PS} (p.~\pageref{subsect:PS}).
For every subset $A$ of $\upomega$, 
let $g(A)$ be the set of rational numbers $x$ such that, 
for some $n$ in $\upomega$,
\begin{equation*}
x<\sum_{k\in A\cap n}\frac2{3^k}.
\end{equation*}
Then $g(A)$ is a real number by the original definition.
The function $A\mapsto g(A)$ from $\pow{\upomega}$ to $\R$ is injective.
\end{proof}

However, suppose we let $\rc A$ be the smallest field 
that contains all zeros from $\R$ of singulary polynomial functions over $A$.  
If we define $A_0=\Q$ and $A_{n+1}=\rc{{A_n}}$, 
then $\bigcup_{n\in\upomega}A_n$ will contain 
all zeros from $\R$ of singulary polynomial functions over itself.
In fact $\bigcup_{n\in\upomega}A_n$ will be $\rc{\Q}$.
But this field is countable.

We can say more about a set than whether it is countable or uncountable.
The main reason for doing this here is that it provides a good example of a \emph{classification}: see \S\ref{sect:fin} on p.\ \pageref{sect:fin} below.
A class is \textbf{transitive} if it properly includes all of its elements.
A transitive \emph{set} is an \textbf{ordinal} 
if it is well-ordered by the relation of membership.
Then all of the elements of $\upomega$ are ordinals, and so is $\upomega$ itself.
The class of all ordinals can be denoted by
\begin{equation*}
\on.
\end{equation*}

\begin{theorem}
The class $\on$ is transitive and is well-ordered by membership.
\end{theorem}

In particular, $\on$ cannot contain itself;
so it is not a set.
This result is the \textbf{Burali-Forti Paradox}\label{BF}~\cite{Burali-Forti}.

\begin{theorem}
Every well-ordered set $(A,<)$ is isomorphic to a unique ordinal.
The isomorphism is a certain function $f$ on $A$, 
and this function is determined by the rule
\begin{equation*}
f(b)=\{f(x)\colon x<b\}.
\end{equation*}
\end{theorem}
 
There are three classes of ordinals.
\begin{compactenum}
\item 
A \textbf{successor} is an ordinal $\alpha'$ for some ordinal $\alpha$.
\item
The least ordinal, $0$, is in a class by itself.
\item
A \textbf{limit} is an ordinal that is neither $0$ nor a successor.
\end{compactenum}
Then $\upomega$ is the least limit ordinal.

Two sets are \textbf{equipollent} if there is a bijection between them.
An ordinal is a \textbf{cardinal} 
if it is the least ordinal that is equipollent with it.

\begin{theorem}%\label{thm:fin-ord-card}
Every element of $\upomega$ is a cardinal.  So is $\upomega$ itself.
\end{theorem}

The class of cardinals can be denoted by
\begin{equation*}
\cn.
\end{equation*}
By the Axiom of Choice, every set is equipollent with some unique cardinal.
This is the \textbf{cardinality}\label{cardinality} or \textbf{size} of that set.
The cardinality of an arbitrary set $A$ is denoted by
\begin{equation*}
\size A.
\end{equation*}
A countable set has cardinality $\upomega$ or less;
uncountable sets have cardinality greater than $\upomega$.
The \textbf{finite} sets are those whose cardinalities are less then $\upomega$;
other sets are \textbf{infinite.}

\begin{theorem}\label{thm:Ded}
A set is infinite if and only if it is in bijection with a proper subset of itself.
\end{theorem}
 
\begin{theorem}
There is a bijection from $\on$ to $\cn\setminus\upomega$ (the class of infinite cardinals).
\end{theorem} 
 
The bijection of the theorem is denoted by
\begin{equation*}
\alpha\mapsto\aleph_{\alpha}.
\end{equation*}
Thus $\upomega=\aleph_0$, 
and $\size{\R}=\aleph_{\alpha}$ for some ordinal $\alpha$ that is greater than $0$.
The \emph{Continuum Hypothesis} is that $\size{\R}=\aleph_1$,
but we shall make no use of this.
 
%\newpage

\part{Groups}\label{part:groups}

\chapter{Basic properties of groups and rings}\label{ch:gr}

We define both groups and rings in this chapter.  
We define rings (in \S\ref{sect:rings}, p.\ \pageref{sect:rings}), 
because at the beginning of the next chapter 
(\S\ref{sect:gl}, p.\ \pageref{sect:gl}) 
we shall define certain groups%
---namely \emph{general linear groups}---%
in terms of rings.

\section{Groups}\label{sect:groups}


Given a set $A$, we may refer to a bijection from $A$ to itself as a
\textbf{symmetry}\index{symmetry} or \textbf{permutation}\index{permutation} of $A$.  Let us denote the set
of these symmetries by
\begin{equation*}
  \Sym A.
\end{equation*}
This set can be equipped with:
\begin{compactenum}[1)]
  \item
the element 
%(or nullary operation\footnote{It is a nullary operation on $\Sym A$, but a singulary operation on $A$.}) 
  $\id_{A}$, which is the
  \textbf{identity}\index{identity} on $A$; 
\item
the singulary operation $f\mapsto f\inv$, which is \textbf{inversion;}\index{inversion}
\item
the binary operation $(f,g)\mapsto f\circ g$, which is \textbf{composition.}\index{composition}
\end{compactenum}
(The functions $\id_A$, $f\inv$, and $f\circ g$ are defined in \S\ref{sect:f}, p.\ \pageref{sect:f}).
The structure or algebra denoted by
\begin{equation*}
(\Sym A,\id_A,{}\inv,\circ)
\end{equation*}
is the \textbf{complete
  group of symmetries}\index{complete group of symmetries} of $A$.  A
substructure of this can be called 
simply a 
\textbf{group of symmetries}\index{group of symmetries} of $A$.  
(Structures, substructures, and algebras are defined in \S\ref{sect:structures}, p.\ \pageref{sect:structures}.)
  
We may use the expression $\Sym A$ 
to denote the whole structure 
%\linebreak
$(\Sym A,\id_A,{}\inv,\circ)$.  
When we speak of a \textbf{subgroup}\label{subgroup} of $\Sym A$, 
we mean a subset that contains the identity 
and is closed under inversion and composition.
  
\begin{theorem}\label{thm:sym}
For all sets $A$, for all elements $f$, $g$, and $h$ of a group of symmetries of $A$,
\begin{gather*}
f\circ\id_A=f,\\
\id_A\circ f=f,\\
f\circ f\inv=\id_A,\\
f\inv\circ f=\id_A,\\
(f\circ g)\circ h=f\circ(g\circ h).	
\end{gather*}
\end{theorem}  

\begin{proof}
Theorems \ref{thm:id}, \ref{thm:inverses}, and \ref{thm:composite} 
in \S\ref{sect:f} (p.\ \pageref{sect:f}).
\end{proof}

A \textbf{group}\index{group} is a structure with the properties of a group of symmetries given by the last theorem, Theorem~\ref{thm:sym}.  That is, a group is a structure $(G,\gid,{}\inv,\cdot)$ in which the following equations are \emph{identities} 
(are true for all values of the variables):
\begin{gather*}
x\cdot\gid=x,\\
\gid\cdot x=x,\\
x\cdot x\inv=\gid,\\
x\inv\cdot x=\gid,\\
(x\cdot y)\cdot z=x\cdot(y\cdot z).
\end{gather*}
We may say also that these equations are the \emph{axioms} of groups: 
this means that their \emph{generalizations}
($\Forall xx\cdot\gid=x$ and so forth)
are true in every group, by definition.
According to these axioms, in every group $(G,\gid,{}\inv,\cdot)$,
\begin{compactenum}[1)]
\item
the binary operation $\cdot$ is associative,
\item
the element $\gid$ is an identity with respect to $\cdot$,
\item
the singulary operation ${}\inv$ is inversion with respect to $\cdot$ and $\gid$.
\end{compactenum}
The identity and the inversion 
will turn out to be uniquely determined by the binary operation, 
by Theorem~\ref{thm:u} on p.\ \pageref{thm:u}.

A group is called \textbf{abelian}\label{abelian} if its binary operation is commutative.
If $A$ has at least three elements, then $\Sym A$ is not abelian.
However, every one-element set $\{a\}$ becomes an abelian group when we define
\begin{align*}
\gid&=a,&a\inv&=a,&a\cdot a&=a.
\end{align*}
This group is a \textbf{trivial group.}  
All trivial groups are isomorphic to one another.
Therefore, as suggested on p.\ \pageref{identify},
we tend to identify them with one another,
referring to each of them as \emph{the} trivial group.

Besides symmetry groups and the trivial group, 
we have four examples of groups from \S\ref{sect:ZQ} (p.\ \pageref{sect:ZQ}), 
namely\label{Z-as-group}
\begin{align*}
&(\Qp,1,{}\inv,\cdot),&
&(\Q,0,-,+),&
&(\Z,0,-,+),&
&(\Q\setminus\{0\},1,{}\inv,\cdot),
\end{align*}
and three examples from \S\ref{sect:R} (p.\ \pageref{sect:R}):
\begin{align*}
(\Rp,1,{}\inv,\cdot),&
&(\R,0,-,+),&
&(\R\setminus\{0\},1,{}\inv,\cdot).
\end{align*}
These seven examples are all abelian.
Four of them
are the origin of a terminological convention.
In an arbitrary group $(G,\gid,{}\inv,\cdot)$,
the operation $\cdot$ is usually called \textbf{multiplication.} 
We usually write $g\cdot h$ as $gh$.  
The element $g\inv$ is the \textbf{inverse} of $g$.  
The element $\gid$ is the \textbf{identity,} and 
it is sometimes denoted by $1$ rather than $\gid$. 

Evidently the groups of rational numbers, of integers, and of real numbers 
use different notation.
These groups are said to be written \textbf{additively.}\label{additive}
Additive notation is often used for abelian groups,
but almost never for other groups.
It will be useful to have one more example of an abelian group.
Actually there will be one example for each positive integer.
If $a$ and $b$ are arbitrary integers for which the equation
\begin{equation*}
ax=b
\end{equation*}
has a solution in $\Z$, then we say that $a$ \textbf{divides} $b$,\label{divides}
or $a$ is a \textbf{divisor} or \textbf{factor} of $b$,
or $b$ is a \textbf{multiple} of $a$,
and we may write
\begin{equation*}
a\divides b.
\end{equation*}
Using the notation due to Gauss \cite[p.~1]{Gauss}, 
for a positive integer $n$ and arbitrary integers $a$ and $b$ we write
\begin{equation*}
a\equiv b\pmod n
\end{equation*}
if $n\divides a-b$.
In this case we say $a$ and $b$ are \textbf{congruent} 
with respect to the \textbf{modulus} $n$.
This manner of speaking is abbreviated 
by putting the Latin word \emph{modulus} into the ablative case:
$a$ and $b$ are congruent \textbf{\emph{modulo}} $n$.%%%%%
\footnote{The ablative case of Latin
corresponds roughly to the \emph{-den hali} of Turkish.
Gauss writes in Latin; however, instead of \emph{modulo} $n$, 
he says \emph{secundum modulum} $n$, ``according to the modulus $n$'' \cite[p.~2]{Gauss-Latin}.}
%%%%%
Still following Gauss, 
we may say too that $a$ is a \textbf{residue} of $b$ 
with respect to the modulus $n$.


\begin{theorem}\label{thm:mod-n}
Let $n\in\N$.  
\begin{compactenum}
\item
Congruence \emph{modulo} $n$ is an equivalence-relation on $\Z$.
\item
If $a\equiv x$ and $b\equiv y\pmod n$, then
\begin{equation*}
-a\equiv-x\And a+b\equiv x+y\And ab\equiv xy\pmod n.
\end{equation*}
\end{compactenum}
\end{theorem}

Thus congruence \emph{modulo} $n$ is an example of a \emph{congruence} in the sense to be defined on p.\ \pageref{congruence}.
The set of congruence-classes of integers \emph{modulo} $n$ can be denoted by
\begin{equation*}%\label{mod}
\Zmod n.
\end{equation*}
If $a$ is some integer, 
we can denote its congruence-class \emph{modulo} $n$ by something like $[a]$ or $\bar a$, or more precisely by
\begin{equation*}
a+n\Z.
\end{equation*}
(This is a \emph{coset} in the sense to be defined in \S\ref{sect:cosets}, p.\ \pageref{sect:cosets}.)

\begin{theorem}\label{thm:res}
For every positive integer $n$,
the function
\begin{equation*}
x\mapsto x+n\Z
\end{equation*}
from $\{0,\dots,n-1\}$ to $\Zmod n$ is a bijection.
\end{theorem}

\begin{proof}
If $0\leq i<j<n$, then $1\leq j-i<n$, and so $nx>j-i$ for all $x$ in $\N$.
By Theorem~\ref{thm:N<} (p.\ \pageref{thm:N<}),
\begin{equation*}
i\not\equiv j\pmod n.
\end{equation*}
Thus the given map is injective.
If $k\in\Z$, let $a$ be its least nonnegative residue 
(which exists by Theorem~\ref{thm:wo}).
Then $a<n$ (since otherwise $0\leq a-n<a$, and $a-n$ is also a residue of $k$).
Thus
\begin{equation*}
a+n\Z=k+n\Z.
\end{equation*}
So the given map is surjective.
\end{proof}

Again given a positive integer $n$,
we may treat an arbitary integer 
as a name for its own congruence-class \emph{modulo} $n$.
In particular, by the last theorem,
we may consider $\Zmod n$ as being the set $\{0,\dots,n-1\}$,
where these $n$ elements are understood to be distinct.
By Theorem~\ref{thm:mod-n}, 
we have a well-defined structure $(\Zmod n,0,-,+,1,\cdot)$,
where $0$ and $1$ stand for 
their respective congruence-classes $n\Z$ and $1+n\Z$.
The following theorem is then easy to prove.
In fact the formal verification will be made even easier
by Theorem~\ref{thm:cong} on p.\ \pageref{thm:cong}.

\begin{theorem}\label{thm:Zmod-group}
For each $n$ in $\N$, the structure $(\Zmod n,0,-,+)$ is an abelian group.
\end{theorem}

The (multiplicative) groups of positive rational numbers, 
of non\-zero rational numbers, of positive real numbers, 
and of nonzero real numbers, and the (additive) groups of integers, rational numbers, real numbers, and integers with respect to some modulus, 
are not obviously symmetry groups.
But they can be \emph{embedded} in symmetry groups,
in the sense of \S\ref{sect:structures} (p.\ \pageref{sect:structures}).
Indeed, every element $g$ of a group $G$ (written multiplicatively) determines a
singulary operation $\uplambda_g$ on $G$, given by
\begin{equation*}
  \uplambda_g(x)=gx.
\end{equation*}
Then we have the following.

\begin{theorem}[Cayley]\label{thm:Cay}%
\index{theorem!Cayley's Th---}
For every group $(G,\gid,{}\inv,\cdot)$,
the function
\begin{equation*}
x\mapsto\uplambda_x
\end{equation*}
embeds $(G,\gid,{}\inv,\cdot)$ in the group $(\Sym G,\id_G,{}\inv,\circ)$ of symmetries. 
\end{theorem}

\begin{proof}
We first observe that
\begin{align*}
\uplambda_{\gid}&=\id_G,&\uplambda_{g\cdot h}&=\uplambda_g\circ\uplambda_h,
\end{align*}
because
\begin{gather*}
	\uplambda_{\gid}(x)=\gid\cdot x=x=\id_G(x),\\
	\uplambda_{g\cdot h}(x)=(g\cdot h)\cdot x
	=g\cdot(h\cdot x)=\uplambda_g(\uplambda_h(x))=(\uplambda_g\circ\uplambda_h)(x).
\end{gather*}
Consequently, by Theorem~\ref{thm:inverses} (p.\ \pageref{thm:inverses}), 
each $\lambda_g$ has an inverse, and
\begin{equation*}
(\uplambda_g)\inv=\uplambda_{g\inv}.
\end{equation*}
This establishes $x\mapsto\uplambda_x\colon G\to\Sym G$ 
and in fact
\begin{equation*}
x\mapsto\uplambda_x\colon (G,\gid,{}\inv,\cdot)\to(\Sym G,\id_G,{}\inv,\circ)
\end{equation*}
---that is, by the notational convention established on p.\ \pageref{hom}, $x\mapsto\uplambda_x$ is a \emph{homomorphism} from the one group to the other.
It is an embedding, since if $\uplambda_g=\uplambda_h$, then in particular
\begin{equation*}
g=g\gid=\uplambda_g(\gid)=\uplambda_h(\gid)=h\gid=h.\qedhere
\end{equation*}
\end{proof}

By Cayley's Theorem, every group can be considered as a symmetry group.

\section{Symmetry groups}\label{sect:sym}

In case $n\in\upomega$, then in place of $\Sym n$ the notation
\begin{equation*}
\mathrm S_n
\end{equation*}
is also used.  
However, most people probably understand $\mathrm S_n$ 
as the complete group of symmetries of the set $\{1,\dots,n\}$.  
It does not really matter 
whether $\{0,\dots,n-1\}$ or $\{1,\dots,n\}$ is used; 
we just need a set with $n$ elements,
and we are using $\{0,\dots,n-1\}$, which is $n$, as this set.

In the following, the \emph{factorial} of a natural number 
was defined on pages \pageref{factorial} and~\pageref{0factorial}, 
and the \emph{cardinality} of a set was defined on p.\ \pageref{cardinality}.

\begin{theorem}\label{thm:Sym-ord}
For each $n$ in $\upomega$,
\begin{equation*}
\size{\Sym n}=n!
\end{equation*}
\end{theorem}

%We shall consider the groups $\Sym n$ at greater length in \S\ref{sect:fin} (p.~\pageref{sect:fin}).  But it will be worth our while to have a look at them now too.  
The group $\Sym 0$ has a unique element, 
$\id_0$, which is itself $0$, that is, $\emptyset$.
The group $\Sym 1$ has the unique element $\id_1$, which is $\{(0,0)\}$.
Thus
\begin{align*}
\Sym0&=1,&
\Sym1&=\bigl\{\{(0,0)\}\bigr\}.
\end{align*}  
As groups, they are both trivial.
We can think of the next symmetry groups---$\Sym2$, $\Sym3$, and so on---%
in terms of the following notion.

\subsection{Automorphism groups}

An \textbf{automorphism} of a structure is an isomorphism from the structure to itself.
The set of automorphisms of a structure $\str A$ can be denoted by
\begin{equation*}
\Aut{\str A}.
\end{equation*}
We have $\Aut{\str A}\included\Sym A$,
where as usual $A$ is the universe of $\str A$; and we have more:  

\begin{theorem}\label{thm:Aut<Sym}
For every structure $\str A$, the set $\Aut{\str A}$ is the universe of a substructure of the group of symmetries of $A$.
\end{theorem}

\begin{proof}
$\Aut{\str A}$ contains $\id_A$ and is closed under inversion and composition.
\end{proof}

Thus we may speak of $\Aut{\str A}$ 
as the \textbf{automorphism group} of $\str A$.


\subsection{Automorphism groups of graphs}

It will be especially useful 
to consider automorphism groups of \emph{graphs.}
As a structure, 
a \textbf{graph on} a set $A$ is an ordered pair $(A,E)$,
where $E$ is an irreflexive, symmetric binary relation on $A$.
This means
\begin{align*}
\lnot\;x&\mathrel Ex,&
x\mathrel Ey&\liff y\mathrel Ex.
\end{align*}
The elements of $A$ are called \textbf{vertices} of the graph.
If $b\mathrel Ec$, then the set $\{b,c\}$ is called an \textbf{edge} of the graph.
An edge is an example of an \textbf{(unordered) pair,} 
that is, a set with exactly two elements.
The set of unordered pairs of elements of a set $A$ can be denoted by
\begin{equation*}
  \unordered2A.
\end{equation*}
Every graph on a given set is determined by its edges, 
and moreover every subset of $\unordered2A$ determines a graph on $A$.
This result can be stated as follows.

\begin{theorem}
For every set $A$, there is a bijection
\begin{equation*}
E\mapsto\bigl\{\{x,y\}\colon(x,y)\in E\bigr\}
\end{equation*}
from the set of irreflexive, antisymmetric binary relations on $A$
to $\pow{\unordered2A}$.
\end{theorem}

For our purposes, 
the \textbf{triangle} is the graph on $3$ with edge set $\unordered23$.
In a word, it is the \textbf{complete graph on} $3$.
Therefore every permutation of $3$ is an automorphism of the triangle.
The vertices of this triangle 
can be envisioned 
as the points $(1,0,0)$, $(0,1,0)$, and $(0,0,1)$ in the space $\R^3$.
An automorphism of this triangle then 
induces a permutation of the coordinate axes of $\R^3$.

Similarly, the \textbf{tetrahedron} is the complete graph on $4$,
and so each permutation of $4$ is an automorphism of the tetrahedron.
The tetrahedron can be envisioned 
as having vertices $(1,0,0,0)$, $(0,1,0,0)$, $(0,0,1,0)$, and $(0,0,0,1)$ in $\R^4$.

In general,\label{coord}  
$\Sym n$ can be understood as comprising the permutations of the coordinate axes of $\R^n$.
In this way, an element $\sigma$ of $\Sym n$ determines the permutation
\begin{equation*}
(x_i\colon i<n)\mapsto(x_{\sigma\inv(i)}\colon i<n)
\end{equation*}
of $\R^n$.  
The reason why we use $\sigma\inv$ in this rule is the following.
Suppose we denote by $f_{\sigma}$ the permutation of $\R^n$ given by this rule.
Then
\begin{align*}
f_{\tau}(f_{\sigma}(x_i\colon i<n))
&=f_{\tau}(x_{\sigma\inv(i)}\colon i<n)\\
&=(x_{\sigma\inv(\tau\inv(i))}\colon i<n)\\
&=(x_{(\tau\sigma)\inv(i)}\colon i<n)\\
&=f_{\tau\sigma}(x_i\colon i<n).
\end{align*}
Thus $\sigma\mapsto f_{\sigma}$ is a homomorphism from $\Sym n$ to $\Sym{\R^n}$.
Another way to see this is to recall 
that an element $(x_i\colon i<n)$ of $\R^n$ 
is just a function $i\mapsto x_i$ from $n$ to $\R$.  
Denoting this function simply by $x$, we have
\begin{gather*}
f_{\sigma}(x)=x\circ\sigma\inv,\\
f_{\tau}(f_{\sigma}(x))
=x\circ\sigma\inv\circ\tau\inv=x\circ(\tau\circ\sigma)\inv=f_{\tau\sigma}(x).
\end{gather*}
This idea will come back in \S\ref{sect:gl} (p.~\pageref{sect:gl}).
Meanwhile,
we are going to develop a way to distinguish 
the \emph{orientation-preserving} permutations of the axes,
namely the permutations that can be achieved by rotation without reflection.

If $n\geq3$, we may consider the \textbf{$n$-gon}
to be the graph on $n$ with the $n$ vertices
\begin{align*}
&\{0,1\},&
&\{1,2\},&
&\{2,3\},&
&\dots,&
&\{n-2,n-1\},&
&\{n-1,0\}
\end{align*}
Considering $n$ as $\Zmod n$, 
we can also write these edges more symmetrically as
\begin{equation*}
\{i,i+1\},
\end{equation*}
where $i\in\Zmod n$.
The $3$-gon is the triangle.
The \textbf{square} is the $4$-gon.
The \textbf{$n$th dihedral group,}
denoted by one of
\begin{align*}
&\Dih n,&
&\mathrm D_n,
\end{align*}
is the automorphism group of the $n$-gon; it is a subgroup of $\Sym n$.

\begin{theorem}\label{thm:Dih}
If $n\geq3$, then
every element $\sigma$ of $\Dih n$ is determined by $(\sigma(0),\sigma(1))$.
Moreover, $\sigma(0)$ can have any value in $n$,
and then $\sigma(1)$ can and must be $\sigma(0)\pm1$.
Thus
\begin{equation*}
\size{\Dih n}=2n.
\end{equation*}
\end{theorem}

Theorem~\ref{thm:Dih-2} on p.\ \pageref{thm:Dih-2} will build on this theorem.

\subsection{A homomorphism}%\label{subsect:43}

Every permutation of $4$ is an automorphism of the tetrahedron.
It can also be understood as a permutation of a certain set of three elements as follows.

\begin{theorem}\label{thm:43}
There is a surjective homomorphism from $\Sym4$ onto $\Sym3$.
\end{theorem}

\begin{proof}
Let $A$ be the set consisting of the three partitions
\begin{align*}
&\bigl\{\{0,1\},\{2,3\}\bigr\},&
&\bigl\{\{0,2\},\{1,3\}\bigr\},&
&\bigl\{\{0,3\},\{1,2\}\bigr\}
\end{align*}
of $4$ into two pairs.
If $\sigma\in\Sym4$, there is an element $\tilde{\sigma}$ in $\Sym A$ given by
\begin{equation*}
\tilde{\sigma}\Bigl(\bigl\{\{i,j\},\{k,\ell\}\bigr\}\Bigr)
=\Bigl(\bigl\{\{\sigma(i),\sigma(j)\},\{\sigma(k),\sigma(\ell)\}\bigr\}\Bigr).
\end{equation*}
Then $\sigma\mapsto\tilde{\sigma}$ is a surjective homomorphism from $\Sym4$ to $\Sym A$.
\end{proof}

This homomorphism will be of use later: in an example on p.\ \pageref{43-1},
and then in the proof of Theorem~\ref{thm:A4} on p.\ \pageref{thm:A4},
which will be used on p.\ \pageref{43-2}.


\subsection{Cycles}

We now consider symmetry groups of arbitrary sets.
We shall be interested in the results mainly for finite sets;
but obtaining the results for infinite sets also will take no more work.
For any set $A$, for any $\sigma$ in $\Sym A$, 
we make the recursive definition\label{sym-pow}
\begin{align*}
\sigma^0&=\id_A,&\sigma^{n+1}&=\sigma\circ\sigma^n.
\end{align*}
If $n\in\N$, we also define
\begin{equation*}
\sigma^{-n}=(\sigma^n)\inv.
\end{equation*}
Thus we have a function $n\mapsto\sigma^n$ from $\Z$ to $\Sym A$.

\begin{theorem}\label{thm:s^n}
For every set $A$, for every $\sigma$ in $\Sym A$, the function $n\mapsto\sigma^n$ from $\Z$ to $\Sym A$ is a homomorphism of groups.
\end{theorem}

\begin{proof}
Since $\sigma^0=\id_A$ and $\sigma^{-n}=(\sigma^n)\inv$ for \emph{all} $n$ in $\Z$, it remains to show
\begin{equation}\label{eqn:ZS}
\sigma^{n+m}=\sigma^n\circ\sigma^m
\end{equation}
for all $m$ and $n$ in $\Z$.  
We start with the the case where $m$ and $n$ are in $\upomega$.
Here we use induction on $n$.
The claim holds easily if $n=0$.  Suppose it holds when $n=k$.  Then
\begin{align*}
\sigma^{(k+1)+m}
&=\sigma^{(k+m)+1}\\
&=\sigma\circ\sigma^{k+m}\\
&=\sigma\circ(\sigma^k\circ\sigma^m)\\
&=(\sigma\circ\sigma^k)\circ\sigma^m\\
&=\sigma^{k+1}\circ\sigma^m,
\end{align*}
and so \eqref{eqn:ZS} holds when $n=k+1$.
By induction, it holds for all $n$ in $\upomega$, for all $m$ in $\upomega$.
Hence in this case also we have
\begin{equation*}
\sigma^{-n-m}=(\sigma^{m+n})\inv=(\sigma^m\circ\sigma^n)\inv=\sigma^{-n}\circ\sigma^{-m}.
\end{equation*}
Finally, if also $m\leq n$, then we have $\sigma^{n-m}\circ\sigma^m=\sigma^n$, so
\begin{gather*}
	\sigma^{n-m}=\sigma^n\circ(\sigma^m)\inv
	=\sigma^n\circ\sigma^{-m},\\
	\sigma^{m-n}
	=(\sigma^{n-m})\inv
	=(\sigma^n\circ\sigma^{-m})\inv
	=\sigma^m\circ\sigma^{-n}.
\end{gather*}
This completes all cases of \eqref{eqn:ZS}.
\end{proof}

If $b\in A$ and $\sigma\in\Sym A$, 
then the set $\{\sigma^n(b)\colon n\in\Z\}$ 
is called the \textbf{orbit of $b$ under $\sigma$}.  
A subset of $A$ is an \textbf{orbit under $\sigma$} 
if it is the orbit under $\sigma$ of some element of $A$.  
So for example if we think of the tetrahedron
as a pyramid with an equilateral triangular base,
and we let $\sigma$ be the automorphism
that rotates the base clockwise by $120^{\circ}$,
then the orbit under $\sigma$ of any vertex of the base
is the set of vertices of the base.

An orbit is \textbf{trivial} if it has size $1$;
if it is larger, it is \textbf{nontrivial.}
Then a permutation is a \textbf{cycle} if, under it,
there is exactly one nontrivial orbit.
Cycles are like prime numbers,
by Theorem~\ref{thm:cycles} below.
Under the identity, there are no nontrivial cycles.
As we do not consider $1$ to be a prime number,
so we do not consider the identity to be a cycle.

If the nontrivial orbits under some cycles are disjoint from one another,
then the cycles themselves are said to be \textbf{disjoint} from one another.
If $\sigma$ and $\tau$ are disjoint cycles, then $\sigma\tau=\tau\sigma$,
and so on for larger numbers of disjoint cycles:
the order of multiplying them makes no difference to the product.
It even makes sense to talk about the product of an infinite set of disjoint cycles:

\begin{theorem}
Suppose $\Sigma$ is a set of disjoint cycles in $\Sym A$,
where the nontrivial orbit under each $\sigma$ in $\Sigma$ is $A_{\sigma}$.
Then there is a unique element $\pi$\label{pi} of $\Sym A$ given by
\begin{equation*}
\pi(x)=
\begin{cases}
	\sigma(x),&\text{ if }x\in A_{\sigma},\\
	x,&\text{ if }x\in A\setminus\bigcup_{\sigma\in\Sigma}A_{\sigma}.
\end{cases}
\end{equation*}
\end{theorem}

\begin{proof}
The rule gives us \emph{at least} one value of $\pi(x)$ for each $x$ in $A$;
and this value is itself in $A$.
But there is \emph{at most} one value, 
because the sets $A_{\sigma}$ are known to be disjoint from one another,
so that if $x\in A_{\sigma}$, and $\sigma\neq\tau$, then $x\notin A_{\tau}$.
Thus $\pi$ is unique.  Also $\pi\colon A\to A$.
Moreover, each $\sigma$ in $\Sigma$, restricted to $A_{\sigma}$, is a permutation of $A_{\sigma}$.
Thus, replacing each $\sigma$ with $\sigma\inv$,
we obtain $\pi\inv$ by the given rule.
Therefore $\pi\in\Sym A$.
\end{proof}

The permutation $\pi$ found in the theorem 
is the \textbf{product} of the cycles in $\Sigma$.  
We may denote this product by
\begin{equation*}
\prod\Sigma.
\end{equation*}
In the notation of the theorem, 
if $i\mapsto\sigma_i$ is a bijection from some set $I$ to $\Sigma$,
then we can write
\begin{equation*}
\prod_{i\in I}\sigma_i=\prod\Sigma.
\end{equation*}
This function $i\mapsto\sigma_i$ can be called an \textbf{indexing} of $\Sigma$ by $I$.
The product given by the theorem is independent of any indexing.
If $j\mapsto\tau_j$ is an indexing of $\Sigma$ by some set $J$,
then there must be a bijection $f$ from $I$ to $J$ such that $\tau_{f(i)}=\sigma_i$
for each $i$ in $I$,
and so by the theorem,
\begin{equation*}
\prod_{j\in J}\tau_j=
\prod_{i\in I}\sigma_i
=\prod_{i\in I}\tau_{f(i)}.
\end{equation*}

Next, instead of disjoint cycles, we consider disjoint orbits under some one permutation.

\begin{theorem}\label{thm:orbits}
Any two distinct orbits under the same permutation are disjoint.
In particular, if $a$ belongs to an orbit under $\sigma$,
then that orbit is $\{\sigma^k(a)\colon k\in\Z\}$.
If this orbit has size $n$ for some $n$ in $\N$, 
then the orbit is $\{\sigma^k(a)\colon k\in n\}$.
\end{theorem}

\begin{proof}
We prove the contrapositive of the first claim.
Suppose $a$ and $b$ have intersecting orbits under $\sigma$.
Then for some $m$ and $n$ in $\Z$ we have $\sigma^m(a)=\sigma^n(b)$.
In this case, for all $k$ in $\upomega$,
\begin{equation*}
\sigma^k(a)=\sigma^{n+k-m}(b).
\end{equation*}
Thus the orbit of $a$ is included in the orbit of $b$.
By symmetry, the two orbits are the same.

For the final claim, 
suppose the orbit of $a$ is finite.
Then for some $i$ in $\Z$ and $n$ in $\N$, we must have
\begin{equation}\label{eqn:sia}
\sigma^i(a)=\sigma^{i+n}(a).
\end{equation}
Then $a=\sigma^{\pm n}(a)$, 
and so, by induction, for all $k$ in $\Z$ we have $a=\sigma^{kn}(a)$,
and more generally
\begin{equation*}
i\equiv j\lto \sigma^i(a)=\sigma^j(a)\pmod n.
\end{equation*}
Therefore, by Theorem~\ref{thm:res}, the orbit of $a$ is $\{\sigma^i\colon i\in n\}$.
If $n$ is minimal such that, for some $i$, \eqref{eqn:sia}, then $n$ the size of the orbit of $a$.
\end{proof}

\begin{theorem}\label{thm:cycles}
For every set $A$, every element of $\Sym A$ 
is the product of disjoint cycles in a unique way.
\end{theorem}

\begin{proof}
Supposing $\sigma\in A$, 
let $I$ be the set of nontrivial orbits under $\sigma$.  
These are all disjoint from one another, by Theorem~\ref{thm:orbits}.
For each $i$ in $I$,
we can define a unique cycle $\sigma_i$ that agrees with $\sigma$ on $i$, 
but otherwise is the identity.  
Then $\sigma=\prod_{i\in I}\sigma_i$.
Suppose $\sigma=\prod\Sigma$ for some set $\Sigma$ of disjoint cycles.
Then for each $i$ in $I$, we must have $\sigma_i\in\Sigma$.
Moreover, $i\mapsto\sigma_i$ must be a bijection from $I$ to $\Sigma$.
\end{proof}

The cardinality of the unique nontrivial orbit under a cycle
is the \textbf{order} of the cycle.
We may say that the identity has order $1$.  Then orders come from the set $\N\cup\{\aleph_0\}$, which is $\upomega'\setminus\{0\}$.

\subsection{Notation}

Suppose $\sigma\in\Sym n$ for some $n$.  Then
\begin{equation*}
  \sigma
=\bigl\{\bigl(0,\sigma(0)\bigr),\dots,\bigl(n-1,\sigma(n-1)\bigr)\bigr\}.
\end{equation*}
We might write this equation a bit more simply in the form
\begin{equation}\label{eqn:braces}
  \sigma=\left\{
\begin{matrix}
  0&\dots&n-1\\\sigma(0)&\dots&\sigma(n-1)
\end{matrix}
\right\}.
\end{equation}
This is a set with $n$ elements, 
and each of those elements is an ordered pair, here written vertically.  
The braces in \eqref{eqn:braces} might be replaced with parentheses, as in
\begin{equation*}
  \begin{pmatrix}
       0  & \cdots &        n-1\\
\sigma(0) & \cdots & \sigma(n-1)
  \end{pmatrix}.
\end{equation*}
However, this notation is potentially misleading, 
because it does not stand for a \emph{matrix} 
such as we shall define in \S\ref{sect:gl} (p.~\pageref{sect:gl}).  
In a matrix, the order of the columns (as well as the rows) matters;
but in \eqref{eqn:braces}, the order of the columns does not matter.
The order of the rows \emph{does} matter.
Indeed, we have
\begin{equation*}
  \left\{
\begin{matrix}
\sigma(0)&\dots&\sigma(n-1)\\  0&\dots&n-1
\end{matrix}
\right\}=\sigma\inv.
\end{equation*}

Suppose $\sigma$ is a cycle, and $k$ belongs to the nontrivial orbit under it.
Then we may use for $\sigma$ the notation
\begin{equation}\label{eqn:cycle}
\bigl(k\cdiv \sigma(k)\cdiv \cdots\cdiv \sigma^{m-1}(k)\bigr),
\end{equation}
where $m$ is the order of $\sigma$.
By Theorem~\ref{thm:orbits}, we can replace $k$ with any member of the same cycle.
So the expression in \eqref{eqn:cycle} should be understood,
not as a matrix, 
but rather as a ring or a circle,\footnote{The English word ``circle'' comes from the Latin \emph{circulus} (which is a diminutive form of \emph{circus}); ``cycle'' comes ultimately from the Greek \gk{k'uklos}.  Both \emph{circulus} and \gk{k'uklos} mean something round; and \gk{k'uklos} is cognate with ``wheel.''}
as in Figure~\ref{fig:6} where $m=6$.
\begin{figure}[ht]
\centering
\psset{unit=7mm}
\begin{pspicture}(-2,-2)(2,2)
\rput(0,2){$k$}
\rput(1.73,1){$\sigma(k)$}
\rput(1.73,-1){$\sigma^2(k)$}
\rput(0,-2){$\sigma^3(k)$}
\rput(-1.73,-1){$\sigma^4(k)$}
\rput(-1.73,1){$\sigma^5(k)$}
\end{pspicture}
\caption{A cycle.}\label{fig:6}
\end{figure}
In general, 
the circle can be broken and written in one line in $m$ different ways, as
\begin{equation*}
\bigl(\sigma^i(k) \cdiv  \cdots \cdiv  \sigma^{m-1}(k) \cdiv  k \cdiv  
\sigma(k) \cdiv  \cdots \cdiv  \sigma^{i-1}(k)\bigr)
\end{equation*}
for any $i$ in $m$.  
The identity $\id_n$ might be denoted by $(0)$, 
or even by $(i)$ for any $i$ in $n$.

When $n$ is small, we can just list the elements of $\Sym n$,
according to their factorizations into disjoint cycles.
For example, $\Sym3$ consists of
\begin{gather*}
	(0),\\
	 (0\cdiv 1),\ (0\cdiv 2),\ (1\cdiv 2),\\
	  (0\cdiv 1\cdiv 2),\ (0\cdiv 2\cdiv 1),
\end{gather*}
where no nontrivial factorizations are possible,,
while $\Sym4$ consists of
\begin{gather*}
	(0),\\
	 (0\cdiv 1),\ (0\cdiv 2),\ (0\cdiv 3),\ (1\cdiv 2),\ (1\cdiv 3),\
(2\cdiv 3),\\
 (0\cdiv 1\cdiv 2),\ (0\cdiv 1\cdiv 3),\ (0\cdiv 2\cdiv 3),\ (1\cdiv 2\cdiv 3),\\
(0\cdiv 1)(2\cdiv 3),\ (0\cdiv 2)(1\cdiv 3),\ (0\cdiv 3)(1\cdiv 2),\\
 (0\cdiv 1\cdiv 2\cdiv 3),\
(0\cdiv 1\cdiv 3\cdiv 2),\ (0\cdiv 2\cdiv 1\cdiv 3),\ (0\cdiv 2\cdiv 3\cdiv 1),\ (0\cdiv 3\cdiv 1\cdiv 2),\
(0\cdiv 3\cdiv 2\cdiv 1).
\end{gather*}
For larger $n$, one might like to have some additional principle of organization.  
But then the whole study of groups might be understood as a search for such principles (for organizing the elements of a group, or organizing all groups).

If $m<n$, the map $\sigma\mapsto\sigma\cup\id_{n\setminus m}$ 
is an embedding of the group $\Sym m$ in $\Sym n$.
Similarly each $\Sym n$ embeds in $\Sym{\upomega}$;\label{sym-omega}
but the latter has
many elements that are not in the image of any $\Sym n$.
Indeed, we have the following, which can be obtained as a corollary of Theorem~\ref{thm:pow-un}.

\begin{theorem}\label{thm:sym-o-un}
$\Sym{\upomega}$ is uncountable.
\end{theorem}

\subsection{Even and odd permutations}

An element of $\Sym n$ is said to be \textbf{even}
if, in its factorization as a product of disjoint cycles, 
there is an even number of cycles of even order.
Otherwise the permutation is \textbf{odd.}
Thus cycles of even order are odd; cycles of odd order are even.
The reason for this peculiar situation is suggested by Theorem~\ref{thm:prod-trans} below.

Meanwhile, if $m<n$, then, 
under the embedding $\sigma\mapsto\sigma\cup\id_{n\setminus m}$ 
just discussed of $\Sym m$ in $\Sym n$,
evenness and oddness are preserved.
That is, $\sigma$ in $\Sym m$ is even if and only if $\sigma\cup\id_{n\setminus m}$ is even.

We define the \textbf{signum} function $\sgn$ from $\Sym n$ to $\{\pm1\}$ by
\begin{equation*}
\sgn(\sigma)=\begin{cases}
	1,&\text{ if $\sigma$ is even,}\\
	-1,&\text{ if $\sigma$ is odd.}
\end{cases}
\end{equation*}
Theorem~\ref{thm:sgn} on p.\ \pageref{thm:sgn} below
is that this function is a homomorphism.

A cycle of order $n$ can be called an \textbf{$n$-cycle.}
It is consistent with this terminology to consider the identity as a $1$-cycle.
A $2$-cycle is also called a \textbf{transposition.}\index{transposition}

\begin{theorem}\label{thm:prod-trans}
Every finite permutation is a product of transpositions.
A cycle of order $m$ is a product of $m-1$ transpositions.
\end{theorem}

\begin{proof}
$(0 \cdiv  1 \cdiv  \cdots \cdiv  m-1)
=(0 \cdiv  m-1)
\dotsm
(0 \cdiv  2)
(0 \cdiv  1)$.
\end{proof}

Thus an even permutation is the product of an even number of transpositions,
and an odd permutation is the product of an odd number of permutations.
If the converse is true,
then the signum function must be a homomorphism.

However, proving that converse is not especially easy.
The neatest approach might seem to be as follows.
A \textbf{tournament} on set $A$ 
is an irreflexive, antisymmetric, total binary relation on $A$.
This means, if $i$ and $j$ are distinct elements of $A$,
then exactly one of $(i,j)$ and $(j,i)$ belongs to a given tournament on $A$,
but $(i,i)$ never belongs.
If $(i,j)$ belongs to a given tournament, 
we can think of $i$ as the winner of a match between $i$ and $j$;
this is the reason for the name \emph{tournament.}
If $T$ is a tournament on $n$,
and $\sigma\in\Sym n$, we can define
\begin{equation*}
\tilde{\sigma}(T)=\{(\sigma(i),\sigma(j))\colon(i,j)\in T\}.
\end{equation*}
This is another (or possibly the same) tournament on $n$.
Fixing a particular tournament $U$ on $n$, such as $\{(i,j)\colon i<j<n\}$, 
we let
\begin{equation*}
A=\{\tilde{\sigma}(U)\colon\sigma\in\Sym n\}.
\end{equation*}
Then every $\tilde{\sigma}$, restricted to $A$, is a permutation of $A$,
and indeed the map $\sigma\mapsto\tilde{\sigma}\restriction A$ is a homomorphism
from $\Sym n$ to $\Sym A$.
Let
\begin{align*}
A_0&=\{T\in A\colon\size{T\setminus U}\text{ is even}\},&
A_1&=A\setminus A_0.
\end{align*}
We should like to show that, for every $\sigma$ in $\Sym n$, for each $i$ in $2$, 
the set $\{\tilde{\sigma}(T)\colon T\in A_i\}$ 
is $A_i$ again, if $\sigma$ is even, and $A_{1-i}$ if $\sigma$ is odd.
Thus we should obtain a homomorphism from $\Sym n$ to $\Sym{\{A_0,A_1\}}$, 
and the signum function would be a homomorphism.
However, proving all of these things seems to be no easier
than just proving directly Theorem~\ref{thm:sgn} on p.\ \pageref{thm:sgn} below.

\section{Monoids and semigroups}\label{sect:ms}

\subsection{Definitions}

The structure $(\N,1,\cdot)$ cannot \emph{expand} to a group,
that is, it cannot be given an operation of inversion so that the structure becomes a group.
(See p.\ \pageref{reduct}.)
The structure is however a \emph{monoid.}
A \textbf{monoid}%
\index{monoid} is a structure $(M,{\gid},\cdot)$
satisfying the axioms
\begin{gather*}
	x\gid=x\\
	\gid x=x,\\
	(xy)z=x(yz).
\end{gather*}
In particular, if $(G,\gid,\inv,\cdot)$ is a group, then the \emph{reduct}
$(G,\gid,\cdot)$ is a monoid.  

Not every monoid is the reduct of a group: the example of $(\N,1,\cdot)$ shows this.  So does the example of a set $M$ with an element $\gid$ and at least one other element, if we define $xy$ to be $\gid$ for all $x$ and $y$ in $M$.

For another example, given an arbitrary set $A$, 
we have the monoid $(A^A,\id_A,\circ)$.  (See p.\ \pageref{A^B}.)  
However, if $A$ has at least two elements, then $A^A$ has elements (for example, constant functions) that are not injective and are therefore not invertible.

If
$(M,\gid,\cdot)$ is a monoid, 
then by the proof of
Cayley's Theorem on p.\ \pageref{thm:Cay}, 
the map $x\mapsto\uplambda_x$ is a homomorphism 
from $(M,\gid,\cdot)$ to $(M^M,\id_M,\circ)$.
However, this homomorphism might not be an embedding.

Even though the monoid $(\N,1,\cdot)$ does not expand to a group, 
it embeds in the monoid $(\Qp,1,\cdot)$, 
which expands to the group $(\Qp,1,{}\inv,\cdot)$, 
by the method of fractions learned in school
and reviewed as Theorem~\ref{thm:Qp} on p.\ \pageref{thm:Qp} above.
There is no such embedding 
if we replace the monoid $(\N,1,\cdot)$ 
with the monoid $(A^A,\id_A,\circ)$ for a set $A$ with at least two elements.  
For, in this case, Lemma~\ref{lem:approx} on p.\ \pageref{lem:approx} is false,
because multiplication on $A^A$ does not allow cancellation in the sense of Theorem~\ref{thm:cancel} on p.\ \pageref{thm:cancel}.

However, Theorem~\ref{thm:Qp} does not actually require 
the identity $1$ in the monoid $(\N,1,\cdot)$.
After appropriate modifications,
the method of the theorem allows us to obtain the group $(\Q,0,-,+)$
such that $(\Qp,+)$ embeds in the reduct $(\Q,+)$.
This is shown in Theorem~\ref{thm:Q} on p.\ \pageref{thm:Q}.
The proof goes through, even though $(\Qp,+)$ does not expand to a monoid.
By the same method, $(\Z,0,-,+)$ can be obtained directly from $(\N,+)$.

The structures $(\N,+)$ and $(\Qp,+)$ are \emph{semigroups.}  In general, a \textbf{semigroup}\index{semigroup} is a structure $(S,\cdot)$
satisfying the identity
\begin{equation*}
(xy)z=x(yz).
\end{equation*}
If $(M,\gid,\cdot)$ is a monoid, then the reduct $(M,\cdot)$ is a semigroup.  But
not every semigroup is the reduct of a monoid: for example $(\N,+)$ and
$(\Qp,+)$ are not reducts of monoids. 
Or let
$O$ be the set of all operations $f$ on $\upomega^{\upomega}$ such that,
for all $n$ in $\upomega$, $f(n)>n$: then $O$ is closed under
composition, so $(O,\circ)$ is a semigroup; but it has no identity.

The structure $(\Q,0,-,+,1,\cdot)$ 
is an example of a \emph{ring} (or more precisely associative ring); 
in fact it is a \emph{field,} 
and it embeds in the field $(\R,0,-,+,1,\cdot)$ of real numbers,
as follows from Theorem~\ref{thm:R-complete} on p.\ \pageref{thm:R-complete}.
Rings and fields as such will be defined formally 
in \S\ref{sect:rings}, beginning on p.\ \pageref{sect:rings}.

\subsection{Some homomorphisms}\label{subsect:homs}

We defined powers of symmetries on p.\ \pageref{sym-pow}.
By the same definition, we obtain at least the \emph{positive} powers
of elements of semigroups:
\begin{align*}
a^1&=a,&a^{n+1}=a\cdot a^n.
\end{align*}

\begin{theorem}
Suppose $(S,\cdot)$ is a semigroup, and $m$ and $n$ range over~$\N$.
\begin{compactenum}
\item 
For all $a$ in $S$,
\begin{equation*}
a^{m+n}=a^ma^n.
\end{equation*}
That is, if $a\in S$, then
\begin{equation*}
n\mapsto a^n\colon(\N,+)\to(S,\cdot).
\end{equation*}
%$x\mapsto a^x$ is a homomorphism from $(\N,+)$ to $(S,\cdot)$.
\item
For all $a$ in $S$,
\begin{equation}\label{eqn:xmn}
a^{mn}=(a^m)^n.
\end{equation}
That is,
\begin{equation}\label{eqn:fmn}
n\mapsto(a\mapsto a^n)\colon(\N,1,\cdot)\to(S^S,\id_S,\circ).
\end{equation}
\end{compactenum}
\end{theorem}

\begin{proof}
We use induction.
The first part is proved like Theorem~\ref{thm:s^n}.
For the second part, we have
$a^{n\cdot 1}=a^n=(a^n)^1$, and if $a^{nm}=(a^n)^m$, then
\begin{equation*}
  a^{n(m+1)}=a^{nm+n}=a^{nm}a^n=(a^n)^ma^n=(a^n)^{m+1}.
\end{equation*}
This establishes \eqref{eqn:xmn}.  If we write $f_x(y)$ for $y^x$, then \eqref{eqn:xmn} becomes
\begin{equation*}
f_{mn}=f_n\circ f_m.
\end{equation*}
Since $mn=nm$, we get \eqref{eqn:fmn}.
\end{proof}

In a monoid, we define
\begin{equation*}
a^0=\gid.
\end{equation*}

\begin{theorem}
Suppose $(M,\gid,\cdot)$ is a monoid.
\begin{compactenum}
\item 
If $a\in M$, then $x\mapsto a^x\colon(\upomega,0,+)\to(M,\gid,\cdot)$.
\item
$x\mapsto(y\mapsto y^x)\colon(\upomega,1,\cdot)\to(M^M,\id_A,\circ)$. 
\end{compactenum}
\end{theorem}

In a group, we define
\begin{equation*}
  a^{-n}=(a^n)\inv.
\end{equation*}

\begin{theorem}\label{thm:exp-in-groups}
Suppose $(G,\gid,{}\inv,\cdot)$ is a group.
\begin{compactenum}
\item 
If $a\in G$, then $x\mapsto a^x\colon(\Z,0,-,+)\to(G,\gid,\inv,\cdot)$.
\item
$x\mapsto(y\mapsto y^x)\colon(\Z,1,\cdot)\to(G^G,\id_G,\circ)$. 
\end{compactenum}
\end{theorem}

We shall use the following in Theorem~\ref{thm:8} on p.\ \pageref{thm:8}.

\begin{theorem}\label{thm:x2e}
If $x^2=\gid$ for all $x$ in some group, then that group is abelian.
\end{theorem}

\subsection{Pi and Sigma notation}\label{subsect:PS}

We can generalize the taking of powers in a semigroup as follows.
Given elements $a_i$ of a semigroup, where $i$ ranges over $\upomega$, 
we define certain \textbf{iterated products} recursively by
\begin{align*}
\prod_{i<0}a_i&=1,&
\prod_{i<n+1}a_i&=\Bigl(\prod_{i<n}a_i\Bigr)\cdot a_n.
\end{align*}
We may also write $\prod_{i<n}a_i$ as
\begin{equation*}
a_0\cdots a_{n-1}.
\end{equation*}
This product depends not just on the set $\{a_i\colon i<n\}$,
but on the function $i\mapsto a_i$ on $n$.
As on p.\ \pageref{A^B}, we may denote this function by one of
\begin{align*}
&(a_0,\dots,a_{n-1}),&
&(a_i\colon i<n).
\end{align*}
Then the product $\prod_{i<n}a_i$ could also be written as
\begin{equation*}
\prod(a_i\colon i<n).
\end{equation*}
By associativity of multiplication in semigroups, we obtain the following.

\begin{theorem}\label{thm:prod-s}
In a semigroup,
\begin{equation*}
\prod_{i<n+m}a_i=\prod_{i<n}a_i\cdot\prod_{j<m}a_{n+j}.
\end{equation*}
\end{theorem}

If the operation on a semigroup is commutative, we usually write it additively,
and then we may define
\begin{align*}
\sum_{i<0}a_i&=0,&
\sum_{i<n+1}a_i&=\sum_{i<n}a_i+a_n.
\end{align*}
We may also write $\sum_{i<n}a_i$ as
\begin{equation*}
a_0+\dots+a_{n-1}.
\end{equation*}
However, we use multiplicative notation for the following.

\begin{theorem}\label{thm:Pi}
In a commutative semigroup, for all $n$ in $\N$, for all $\sigma$ in $\Sym n$,
\begin{equation*}
\prod_{i<n}a_{\sigma(i)}=\prod_{i<n}a_i.
\end{equation*}
\end{theorem}

\begin{proof}
Suppose first that $\sigma$ is the transposition $(k\cdiv \ell)$, 
where $k<\ell$.
Let
\begin{align*}
b&=\prod_{i<k}a_i,&
c&=\prod_{i<\ell-k-1}a_{k+i+1},&
d&=\prod_{i<n-\ell-1}a_{\ell+i+1}.
\end{align*}
By Theorem~\ref{thm:prod-s} and commutativity,
\begin{align*}
\prod_{i<n}a_{\sigma(i)}
&=b\cdot a_{\ell}\cdot c\cdot a_k\cdot d\\
&=b\cdot a_{\ell}\cdot a_k\cdot c\cdot d\\
&=b\cdot a_k\cdot a_{\ell}\cdot c\cdot d\\
&=b\cdot a_k\cdot c\cdot a_{\ell}\cdot d
=\prod_{i<n}a_i.
\end{align*}
So the claim holds when $\sigma$ is a transposition.
In this case we have
\begin{equation*}
\prod_{i<n}a_{\tau\sigma(i)}=\prod_{i<n}a_{\tau(i)}
\end{equation*}
for all $\tau$ in $\Sym n$.
Since every finite permutation is a product of transpositions by Theorem~\ref{thm:prod-trans},
we obtain the claim in general.
\end{proof}


By this theorem,\label{unord-prod}
if we have a function $i\mapsto a_i$ 
from some finite set $I$ into a commutative semigroup,
then the notation
\begin{equation*}
\prod_{i\in I}a_i
\end{equation*}
makes sense.
We use such notation in the next theorem, Theorem~\ref{thm:sgn}.
We may denote the function $i\mapsto a_i$ on $I$ by
\begin{equation*}
(a_i\colon i\in I),
\end{equation*}
and we may refer to it as an \textbf{indexed set,}\label{indexed}
specifically as an indexed subset of the commutative semigroup in question.
The set $I$ is the \textbf{index set} for this indexed set.
 

\subsection{Alternating groups}

\begin{theorem}\label{thm:sgn}
  The function $\sgn$ is a homomorphism
from  $\Sym n$ to $\{\pm1\}$. 
\end{theorem}

\begin{proof}
If $\sigma\in\Sym n$,
then there is a well-defined
function $X\mapsto\sq X$ from $\unordered 2n$ to $\{\pm1\}$ given by
\begin{equation*}
  \sq{\{i,j\}}=\frac{\sigma(i)-\sigma(j)}{i-j}.
\end{equation*}
Since multiplication in $\{\pm1\}$ is commutative, we can define
\begin{equation*}
  f(\sigma)=
\prod_{X\in \unordered{2}{n}}
\sq X.
\end{equation*}
If $\sigma=
(k\cdiv \ell)$, then
\begin{align*}
  f(\sigma)
  &=\sq{\{k,\ell\}}\cdot
  \prod_{i\in n\setminus\{k,\ell\}}\bigl(\sq{\{i,\ell\}}\cdot\sq{\{k,i\}}\bigr) \\
&=\frac{\ell-k}{k-\ell}\cdot
\prod_{i\in n\setminus\{k,\ell\}}\Bigl(\frac{i-k}{i-\ell}\cdot\frac{\ell-i}{k-i}\Bigr)\\
&=-1.
\end{align*}

If $\tau\in\Sym n$, we can define an element $\hat{\tau}$ of $\Sym{\unordered2n}$ by
\begin{equation*}
\hat{\tau}(\{i,j\})=\{\tau(i),\tau(j)\}.
\end{equation*}
By Theorem~\ref{thm:Pi},
\begin{equation*}
  f(\sigma)=
\prod_{X\in \unordered{2}{n}}
\sq{\hat{\tau}(X)},
\end{equation*}
so
\begin{align*}
  f(\sigma\tau)
&=\prod_{\{i,j\}\in \unordered{2}{n}}
\frac{\sigma(\tau(i))-\sigma(\tau(j))}{i-j}\\
&=\prod_{\{i,j\}\in \unordered{2}{n}}
\left(\frac{\sigma(\tau(i))-\sigma(\tau(j))}{\tau(i)-\tau(j)}\cdot
\frac{\tau(i)-\tau(j)}{i-j}\right)\\
&=\prod_{X\in\unordered{2}{n}}
\bigl(\sq{\hat{\tau}(X)}\cdot\sq[\tau]X\bigr)\\
&=\prod_{X\in\unordered{2}{n}}\sq{\hat{\tau}(X)}\cdot
\prod_{X\in \unordered{2}{n}}\sq[\tau]X\\
%&=\prod_{X\in\unordered{2}{n}}\sq{\hat{\tau}(X)}\cdot\sgn(\tau)\\
&=f(\sigma)\cdot f(\tau).
\end{align*}
Thus $f(\tau)=1$ if and only if $\tau$ is the product of an even number of transpositions,
and otherwise $f(\tau)=-1$.
Therefore $f$ must agree with $\sigma$ on $\Sym n$,
and so $\sgn$ must be a homomorphism.
\end{proof}

We have as a corollary that the even permutations of $n$ compose a subgroup of $\Sym n$.
This subgroup is the \textbf{alternating group}%%%%%
\index{alternating}\index{group!alternating ---} 
of degree $n$ and is denoted by\label{Alt}
\begin{equation*}
  \Alt n.
\end{equation*}
If $n>1$, there is a permutation $\sigma\mapsto\sigma\circ(0\cdiv 1)$ of $\Sym n$ itself 
that takes even elements to odd.  
In this case, $\Alt n$ is half the size of $\Sym n$.
However, $\Alt1=\Sym 1$.
For this reason, one may wish to say that that $\Alt n$ is defined only when $n\geq2$.
This makes Theorem~\ref{thm:SA2} (p.\ \pageref{thm:SA2} below) simpler to state.


\section{Simplifications}\label{sect:simp}

If a semigroup $(G,\cdot)$ expands to a group $(G,\gid,{}\inv,\cdot)$, 
then the semigroup $(G,\cdot)$ itself is often called a group.  
But this usage must be justified.

\begin{theorem}\label{thm:u}
A semigroup can expand to a group in only one way.
\end{theorem}

\begin{proof}
Let $(G,\gid,\inv,\cdot)$ be a group.
If $\gid'$ were a second identity, then
\begin{align*}
\gid'x&=\gid x,& \gid'xx\inv&=\gid xx\inv,& \gid'&=\gid.
\end{align*}
If $a'$ were a second inverse of $a$, then
\begin{align*}
a'a&=a\inv a,& a'aa\inv&=a\inv aa\inv,&a'&=a\inv.\qedhere
\end{align*}
\end{proof}

Establishing that a particular structure is a group is made easier by the following.

\begin{theorem}\label{thm:left}
Any structure satisfying the identities
\begin{gather*}
	{\gid}x=x,\\
	x\inv x=\gid,\\
	x(yz)=(xy)z
\end{gather*}
is a group.
In other words, 
  any semigroup with a left-identity and with left-inverses is a group.  
\end{theorem}

\begin{proof}
We need to show $x\gid=x$ and $xx\inv=\gid$.  To establish the latter,
using the given identies we have
\begin{equation*}
(xx\inv)(xx\inv)=x(x\inv x)x\inv=x{\gid}x\inv=xx\inv,
\end{equation*}
and so
\begin{equation*}
xx\inv={\gid}xx\inv=(xx\inv)\inv(xx\inv)(xx\inv)=(xx\inv)\inv(xx\inv)={\gid}.
\end{equation*}
Hence also
\begin{equation*}
x{\gid}=x(x\inv x)=(xx\inv)x={\gid}x=x.\qedhere
\end{equation*}
\end{proof}

The theorem has an obvious ``dual'' involving right-identities and right-inverses.  By the theorem, the semigroups that expand to groups are precisely the semigroups that satisfy the axiom
\begin{gather*}
\Exists z(\Forall xzx=x\land\Forall x\Exists y yx=z),
\end{gather*}
which is logically equivalent to
\begin{equation}\label{eqn:sg-ax}
\Exists z\Forall x\Forall y\Exists u(zx=x\land uy=z).
\end{equation}
We shall show that this sentence is more complex than need be.

Thanks to Theorem~\ref{thm:u}, if a semigroup $(G,\cdot)$ does expand to a group, then we may unambiguously refer to $(G,\cdot)$ itself as a group.  Furthermore, we may refer to $G$ as a group: this is commonly done, although, theoretically, it may lead to ambiguity.

\begin{theorem}\label{thm:solutions}
Let $G$ be a nonempty semigroup.  The following are equivalent.
\begin{compactenum}
\item\label{item:exp}
$G$ expands to a group.
%\item\label{item:exp-u}
%$G$ expands uniquely to a group.
\item\label{item:sol}
Each equation $ax=b$ and $ya=b$ with parameters from $G$ has a
solution in $G$.
\item\label{item:sol-u}
Each equation $ax=b$ and $ya=b$ with parameters from $G$ has a
unique solution in $G$.
\end{compactenum}
\end{theorem}

\begin{proof}
Immediately \eqref{item:sol-u}$\lto$\eqref{item:sol}.  Almost as easily, \eqref{item:exp}$\lto$\eqref{item:sol-u}.  For, if $a$ and $b$ belong to some semigroup that expands to a group, we have $ax=b\liff x=a\inv b$; and we know by Theorem~\ref{thm:u} that $a\inv$ is uniquely determined.  Likewise for $ya=b$.

Finally we show \eqref{item:sol}$\lto$\eqref{item:exp}.
Suppose $G$ is a nonempty semigroup in which all equations $ax=b$ and $ya=b$ have solutions.  If $c\in G$, let $\gid$ be
a solution to $yc=c$.  If $b\in G$, let $d$ be a
solution to 
$cx=b$.  Then
\begin{equation*}
  {\gid}b={\gid}(cd)=({\gid}c)d=cd=b.
\end{equation*}
Since $b$ was chosen arbitrarily, $\gid$ is a left identity.  Since the equation $yc={\gid}$ has a solution, $c$ has a left inverse.  But $c$ is an arbitrary element of $G$.  By Theorem~\ref{thm:left}, we are done.
\end{proof}

Now we have that the semigroups that expand to groups 
are just the semigroups that satisfy the axiom
\begin{equation*}
  \Forall x\Forall y(\Exists zxz=y\land\Exists wwx=y).
\end{equation*}
This may not look simpler than \eqref{eqn:sg-ax}, but it is.  
It should be understood as 
\begin{equation*}
\Forall x\Forall y\Exists z\Exists w(xz=y\land wx=y), 
\end{equation*}
which is a sentence of the general form $\forall\exists$;
whereas \eqref{eqn:sg-ax} is of the form $\exists\forall\exists$.  

\begin{theorem}\label{thm:gp-hom}
  A map $f$ from one group to another is a homomorphism, provided it is a
  homomorphism of semigroups, that is, $f(xy)=f(x)f(y)$.
\end{theorem}

\begin{proof}
In a group, if $a$ is an element, then the identity is the unique
solution of $xa=a$, and $a\inv$ is the unique solution of $yaa=a$.  A
semigroup homomorphism $f$ takes solutions of
these equations to solutions of $xb=b$ and $ybb=b$, where $b=f(a)$. 
\end{proof}

\emph{Inclusion} of a substructure in a larger structure is a homomorphism. 
In particular, if $(G,\gid,{}\inv,\cdot)$ and $(H,\gid,{}\inv,\cdot)$ 
are groups, we have
\begin{equation*}
(G,\cdot)\included(H,\cdot)
\implies(G,\gid,{}\inv,\cdot)\included(H,\gid,{}\inv,\cdot).
\end{equation*}

If an arbitrary class of structures is axiomatized 
by $\forall\exists$ sentences, 
then the class is ``closed under unions of chains\label{chains}'' 
in the sense that, 
if $\str A_0\included\str A_1\included\str A_2\included\dotsb$, 
where each $\str A_k$ belongs to the class, 
then the union of all of these structures also belongs to the class.  
In fact the converse is also true, 
by the so-called Chang--\L o\'s--Suszko Theorem \cite{MR0103812,MR0089813}.  
With this theorem, 
and with Theorem~\ref{thm:gp-hom} in place of~\ref{thm:solutions}, 
we can still conclude 
that the theory of groups in the signature $\{\cdot\}$ 
has $\forall\exists$ axioms,
although we may not know what they are.

Theorem~\ref{thm:gp-hom} fails with monoids in place of groups.  
For example, $(\Z,1,\cdot)$ and $(\Z\times\Z,(1,1),\cdot)$ are monoids 
(the latter being the product of the former with itself 
as defined in \S\ref{sect:new}), 
and $x\mapsto(x,0)$ is an embedding 
of the semigroup $(\Z,\cdot)$ in $(\Z\times\Z,\cdot)$, 
but it is not an embedding of the monoids.

\section{Associative rings}\label{sect:rings}

A homomorphism from a structure to itself is an
\textbf{endomorphism.}\index{endomorphism}
Recall from p.\ \pageref{abelian} 
that a group in which the multiplication is commutative 
is said to be an \textbf{abelian group,}
and (p.\ \pageref{additive}) its operation is usually written additively.
The set of endomorphisms of an abelian group can be made into an
abelian group in which: 
\begin{compactenum}[1)]
\item 
the identity is the constant function $x\mapsto\gid$;
\item
additive inversion converts $f$ to $x\mapsto-f(x)$;
\item
addition converts $(f,g)$ to $x\mapsto f(x)+g(x)$.
\end{compactenum}
If $E$ is an abelian group, let the abelian group of its endomorphisms
be denoted by
\begin{equation*}
  \End E.
\end{equation*}
The set of endomorphisms of $E$ can also be made into
a monoid in which 
the identity is the identity function $\id_E$, and multiplication
is functional composition.
This multiplication
distributes in both senses over addition: 
\begin{align*}
  f\circ(g+h)&=f\circ g+f\circ h,& (f+g)\circ h&=f\circ h+g\circ h.
\end{align*}
We may denote the two combined structures---abelian group and
monoid together---by
\begin{equation*}
  (\End E,\id_E,\circ);
\end{equation*}
this is the \textbf{complete ring of
  endomorphisms of}\index{complete ring of endomorphisms} $E$.  
A substructure of $(\End E,\id_E,\circ)$ can be called
simply a \textbf{ring of endomorphisms}\index{ring of endomorphisms of} $E$.  

An \textbf{associative ring} is a structure $(R,0,-,+,1,\cdot)$ such that
\begin{compactenum}[1)]
\item
$(R,0,-,+)$ is an abelian group,
\item
$(R,1,\cdot)$ is a monoid,
\item
the multiplication distributes in both senses over addition.
\end{compactenum}
Then rings of endomorphisms are associative rings.%%%%%
\footnote{See note~\ref{note:ring} on p.\ \pageref{note:ring}
for the origin of the term \emph{ring.}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
It may be convenient to write an associative ring as $(R,1,\cdot)$,
where $R$ is implicitly an abelian group.
We might even say simply that $R$ is an associative ring.

An associative ring is usually just called a ring;
however, we shall consider some rings that are not associative rings in \S\ref{sect:non-assoc} (p.\ \pageref{sect:non-assoc}).
Some authors might not require
an associative ring to have a multiplicative identity.%%%%%
\footnote{For Lang \cite[ch.~II, \S1, p.~83]{Lang-alg},
a ring is what we have defined as an associative ring.
For Hungerford \cite[ch.~III, \S1, p.~115]{MR600654},
what we call an associative ring is a \emph{ring with identity.}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
We require it, so that the next theorem holds.
As with a group, so with an associative ring, 
an element $a$ determines a singulary operation $\uplambda_a$ on the structure, 
the operation being given by
\begin{equation*}
  \uplambda_a(x)=ax.
\end{equation*}
Then we have an analogue of Cayley's Theorem (p.\ \pageref{thm:Cay}):

\begin{theorem}\label{thm:x-lambda_x}
For every associative ring $(R,1,\cdot)$,
the function
\begin{equation*}
x\mapsto\uplambda_x
\end{equation*}
embeds $(R,1,\cdot)$ in $(\End R,\id_R,\circ)$.
\end{theorem}

In an associative ring, if the multiplication commutes,
then the ring is a \textbf{commutative ring.}\index{commutative ring}
For example, $(\Z,0,-,+,1,\cdot)$\label{Z-as-ring}
and $(\Q,0,-,+,1,\cdot)$
are commutative rings.
The following is easy to check,
but can be seen
as a consequence of Theorem \ref{thm:ring-q} on p.\ \pageref{thm:ring-q} below, 
which is itself easy to prove, especially given Theorem~\ref{thm:cong}.

\begin{theorem}\label{thm:Zmod-ring}
$(\Zmod n,0,-,+,1,\cdot)$ is a commutative ring.
\end{theorem}


In an associative ring, 
an element with both a left and a right multiplicative inverse can be
called simply \textbf{invertible;}\index{invertible} it is also called
a \textbf{unit.}\index{unit}  

\begin{theorem}\label{thm:units}
In an associative ring, the units compose a group with respect to
multiplication.  In particular, a unit has a unique
left inverse, which is also a right inverse.
\end{theorem}

The group of units of an associative ring $R$ is denoted by
\begin{equation*}
  \units R.
\end{equation*}
For example, $\units{\Z}=\{1,-1\}$.  
Evidently all two-element groups
are isomorphic to this one.

By the theorem, if an element of an associative ring
has both a left inverse and a right inverse,
then they are equal.
However, possibly an element can have a right inverse,
but not a left inverse.
We can construct an example by means of the following.

\begin{theorem}\label{thm:power}
If $I$ is a set and $G$ is a group,
then the set $G^I$ of functions from $I$ to $G$
is a group with multiplication given by
\begin{equation*}
(x_i\colon i\in I)\cdot(y_i\colon i\in I)=(x_i\cdot y_i\colon i\in I).
\end{equation*}
\end{theorem}

Let $G$ be any nontrivial group.\label{exa:no-unit}
An arbitrary element $(x_n\colon n\in\upomega)$ of $G^{\upomega}$ 
can be written also as
\begin{equation*}
(x_0,x_1,\dots).
\end{equation*}
Then $\End{G^{\upomega}}$ contains elements $f$ and $g$ given by
\begin{gather*}
f(x_0,x_1,\dots)=(x_1,x_2,x_3,x_4,\dots),\\
g(x_0,x_1,\dots)=(x_0,x_0,x_1,x_2,\dots),
\end{gather*}
so that
\begin{gather*}
fg(x_0,x_1,\dots)=(x_0,x_1,x_2,\dots),\\
gf(x_0,x_1,\dots)=(x_1,x_1,x_2,\dots).
\end{gather*}
In particular, $g$ is a right inverse of $f$, but not a left inverse.
The construction in Theorem~\ref{thm:power} will be generalized on p.\ \pageref{dprod}.

If $R$ is a commutative ring, and
$\units R=R\setminus\{0\}$, then $R$ is called a \textbf{field.}\index{field}
For example, $\Q$ and $\R$ are fields.  
The field $\C$ can be defined as $\R\times\R$ with the appropriate operations:
see p.\ \pageref{C}.  

The trivial group $\{0\}$ becomes the trivial associative ring 
when we define $1=0$ and $0\cdot0=0$.
This ring is not a field, because its only element $0$ is a unit.


\chapter{Groups}

\section{*General linear groups}\label{sect:gl}

The purpose of this section is to define some families of examples of groups,
besides the finite symmetry groups $\Sym n$.

By Cayley's Theorem, p.\ \pageref{thm:Cay}, 
we know that every finite group embeds, for some $n$ in $\upomega$, in $\Sym n$.
We know in turn (from p.\ \pageref{sym-omega}) 
that each $\Sym n$ embeds in $\Sym{\upomega}$,
which however is uncountable by Theorem~\ref{thm:sym-o-un}. 
For every commutative ring $R$,
for every $n$ in $\upomega$,
we shall define the group $\GLR$ 
of \emph{invertible $n\times n$ matrices over} $R$.
Both $\Sym n$ and $\units R$ embed in $\GLR$.
If $R$ is countable, then so is $\GLR$.
If $R$ is finite, then so is $\GLR$.
In any case, $\GLR$ can be understood as the automorphism group of $R^n$,
when this is considered as an \emph{$R$-module.}

We shall use invertible matrices over $\Z$ in classifying 
the \emph{finitely generated} abelian groups, 
in \S\ref{sect:fgag} (p.\ \pageref{sect:fgag}).

\subsection{Additive groups of matrices}

For any commutative ring $R$,
for any two elements $m$ and $n$ of $\upomega$,
a function $(i,j)\mapsto a^i_j$ from $m\times n$ to $R$ 
can be called an $m\times n$ \textbf{matrix over} $R$ 
and denoted by the expression
\begin{equation*}
  \begin{pmatrix}
    a^0_0&\cdots&a^0_{n-1}\\
\vdots&\ddots&\vdots\\
a^{m-1}_0&\cdots&a^{m-1}_{n-1}
  \end{pmatrix},
\end{equation*}
which has $m$ rows and $n$ columns.
We may abbreviate this matrix to
\begin{equation*}
(a^i_j)^{i< m}_{j< n},
\end{equation*}
or simply
\begin{equation*}
 (a^i_j)^{i}_{j} 
\end{equation*}
if the sets over which $i$ and $j$ range is clear.  
The \textbf{entries} $a^i_j$ are from $R$.
The set of all $m\times n$ matrices over $R$ can be denoted by 
\begin{equation*}
\MatR[m\times n].
\end{equation*}
This is an abelian group in the obvious way, with addition defined by
\begin{equation*}
  (a^i_j)^{i<m}_{j<n}+(b^i_j)^{i<m}_{j<n}
= (a^i_j+b^i_j)^{i<m}_{j<n}.
\end{equation*}


\subsection{Multiplication of matrices}

Given any three elements $m$, $s$, and $n$ of $\upomega$,
we define \textbf{multiplication} as a function
from the product $\MatR[m\times s]\times\MatR[s\times n]$ 
to $\MatR[m\times n]$ by
\begin{equation*}
(a^i_j)^{i<m}_{j<s}\cdot(b^j_k)^{j<s}_{k<n}
=\Bigl(\sum_{j\in s}a^i_jb^j_k\Bigr)^{i<m}_{k<n}.
\end{equation*}
Then in particular multiplication 
is a binary operation 
on each group $\MatR[n\times n]$ of \emph{square} matrices.
One particular element of this group is
\begin{equation*}
  \begin{pmatrix}
    1&&0\\
&\ddots&\\
0&&1
  \end{pmatrix},
\end{equation*}
which can be denoted by
\begin{equation*}
  \IM_n.
\end{equation*}
This matrix can also be written as $(\updelta^i_j)^{i<n}_{j<n}$, where
\begin{equation*}
\updelta^i_j=\begin{cases}
	1,&\text{ if } i=j,\\
	0,&\text{ otherwise,}
\end{cases}
\end{equation*}

\begin{theorem}\label{thm:M}
For all commutative rings $R$,
multiplication of matrices over $R$ is associative
and distributes over addition.
Also $\MatR[n\times n]$ is an associative ring
with multiplicative identity $\IM_n$.
\end{theorem}

The group $\units{\MatR[n\times n]}$ is called the \textbf{general linear
  group}\index{general linear group}
of degree $n$ over $R$; it is also denoted by
\begin{equation*}
\GLR.
\end{equation*}

Some elements of $\GLR$ are picked out by the following.

\begin{theorem}\label{thm:Sym-GL}
For each $n$ in $\upomega$,
there is an embedding of $\Sym n$ in $\GLR$, namely
  \begin{equation*}
\sigma\mapsto(\updelta^{\sigma\inv(i)}_j)^{i<n}_{j<n}.
\end{equation*}
\end{theorem}

\begin{proof}
  The given function is evidently injective.
It is a homomorphism since
\begin{equation*}
  (\updelta^{\sigma\inv(i)}_j)^i_j\cdot(\updelta^{\tau\inv(i)}_j)^i_j
=\Bigl(\sum_{k<n}\updelta^{\sigma\inv(i)}_k\cdot\updelta^{\tau\inv(k)}_j\Bigr)^i_j
=(\updelta^{\tau\inv(\sigma\inv(i))}_j)^i_j\qedhere
\end{equation*}
\end{proof}

If $R$ is a field, 
there is an algorithm called \textbf{Gauss--Jordan elimination,}%\label{GJE}
learned in linear algebra classes, 
for determining whether a given element $A$ of $\MatR[n\times n]$ is invertible.
One systematically performs certain invertible operations on the rows of $A$,
attempting to transform it into $\IM_n$.
These operations are called \textbf{elementary row operations,}\label{ero}
and they are:
\begin{compactenum}[1)]
\item
interchanging two rows,
\item
adding a multiple of one row by an element of $R$ to another, and
\item
multiplying a row by an element of $\units R$.
\end{compactenum}
One works through the matrix from left to right,
first converting a nonzero element of the first column to $1$,
and using this to eliminate the other nonzero entries;
then continuing with the second column, and so on.
One will be successful in transforming $A$ to $\IM_n$
if and only if $A$ is indeed invertible.
In this case, the same elementary row operations,
performed on the rows of $\IM_n$,
will produce $A\inv$.
The reason is that performing each of these operations
is the same as multiplying from the left 
by the result of performing the same operation on $\IM_n$.

When $R$ is $\Z$, one can instead use the Euclidean algorithm
to make one entry in each column of $A$
equal to the \emph{greatest common divisor} of all of the entries in that column.
(See p.\ \pageref{gcd}.)
Then $A$ is invertible if and only if each of these greatest common divisors is $1$.

We now develop a method for determining 
whether a matrix over an arbitrary ring is invertible.

\subsection{Determinants of matrices}

Given a commutative ring $R$, 
we define the function $X\mapsto\det(X)$ from $\MatR$ to $R$ by
\begin{equation*}
  \det((a^i_j)^{i<n}_{j<n})=\sum_{\sigma\in\Sym
    n}\sgn(\sigma)\prod_{i<n}a^i_{\sigma(i)}.
\end{equation*}
Here $\det(A)$ is the \textbf{determinant} of $A$.

\begin{theorem}
  The function $X\mapsto\det(X)$ is a multiplicative homomorphism,
  that is,
  \begin{equation*}
    \det(XY)=\det(X)\cdot\det(Y).
  \end{equation*}
\end{theorem}

\begin{proof}
  We shall use the identity
  \begin{equation*}
    \prod_{i<k}\sum_{j<n}f(i,j)=\sum_{\phi\colon k\to n}\prod_{i<k}f(i,\phi(i)).
  \end{equation*}
Let $A=(a^i_j)^{i<n}_{j<n}$ and $B=(b^i_j)^{i<n}_{j<n}$.  Then
\begin{align*}
  \det(AB)
&=\det\biggl(\Bigl(\sum_{j<n}a^i_jb^j_k\Bigr)^{i<n}_{k<n}\biggr)\\
&=\sum_{\sigma\in\Sym n}\sgn(\sigma)\prod_{i<n}\sum_{j<n}a^i_jb^j_{\sigma(i)}\\
&=\sum_{\sigma\in\Sym n}\sgn(\sigma)\sum_{\phi\colon n\to
    n}\prod_{i<n}(a^i_{\phi(i)}b^{\phi(i)}_{\sigma(i)})\\
&=\sum_{\phi\colon n\to n}
\prod_{i<n}a^i_{\phi(i)}
 \sum_{\sigma\in\Sym n}\sgn(\sigma)\prod_{i<n}b^{\phi(i)}_{\sigma(i)}. 
\end{align*}
We shall eliminate from the sum those terms in any $\phi$ that is not
injective. 
Suppose $k<\ell<n$, but
$\phi(k)=\phi(\ell)$.  The function 
$\sigma\mapsto\sigma\circ
\begin{cycle}
  k\cdiv \ell
\end{cycle}$ is a bijection between $\Alt n$ and $\Sym
n\setminus\Alt n$.  Writing $\sigma'$ for $\sigma\circ
\begin{cycle}
  k\cdiv \ell
\end{cycle}$, we have
\begin{equation*}
\sum_{\sigma\in\Sym n}\sgn(\sigma)\prod_{i<n}b^{\phi(i)}_{\sigma(i)}
=\sum_{\sigma\in\Alt n}\sgn(\sigma)
\Bigl(\prod_{i<n}b^{\phi(i)}_{\sigma(i)}-\prod_{i<n}b^{\phi(i)}_{\sigma'(i)}\Bigr).
\end{equation*}
Each term of the last sum is $0$,
since $\sigma$ and $\sigma'$ agree on $n\setminus\{k,\ell\}$, while
\begin{equation*}
b^{\phi(k)}_{\sigma(k)}b^{\phi(\ell)}_{\sigma(\ell)}
= b^{\phi(\ell)}_{\sigma'(\ell)}b^{\phi(k)}_{\sigma'(k)}
= b^{\phi(k)}_{\sigma'(k)}b^{\phi(\ell)}_{\sigma'(\ell)}.
\end{equation*}
Therefore, continuing with the computation above, we have
\begin{equation*}
\det(AB)
=\sum_{\tau\in\Sym n}\prod_{i<n}a^i_{\tau(i)}
 \sum_{\sigma\in\Sym n}\sgn(\sigma)\prod_{i<n}b^{\tau(i)}_{\sigma(i)}.
\end{equation*}
Since each $\tau$ in $\Sym n$ permutes $n$, we have also
\begin{align*}
\prod_{i<n}b^{\tau(i)}_{\sigma(i)}
&=\prod_{i<n}b^i_{\sigma\tau\inv(i)},&
\sgn(\sigma)&=\sgn(\tau)\cdot\sgn(\sigma\tau\inv).
\end{align*}
Putting this all together, we have
\begin{align*}
\det(AB)
&=\sum_{\tau\in\Sym n}\prod_{i<n}a^i_{\tau(i)}
 \sum_{\sigma\in\Sym n}\sgn(\tau)\sgn(\sigma\tau\inv)\prod_{i<n}b^i_{\sigma\tau\inv(i)}\\
&=\sum_{\tau\in\Sym n}\sgn(\tau)\prod_{i<n}a^i_{\tau(i)}
 \sum_{\sigma\in\Sym n}\sgn(\sigma\tau\inv)\prod_{i<n}b^i_{\sigma\tau\inv(i)}\\
&=\sum_{\tau\in\Sym n}\sgn(\tau)\prod_{i<n}a^i_{\tau(i)}
 \sum_{\sigma\in\Sym n}\sgn(\sigma)\prod_{i<n}b^i_{\sigma(i)}\\
 &=\det(A)\cdot\det(B),
\end{align*}
since $\sigma\mapsto\sigma\tau\inv$ is a permutation of $\Sym n$.
\end{proof}

\begin{corollary}\label{cor:mat-inv}
  An element of $\MatR$ has an inverse only if its determinant is in $\units R$.
\end{corollary}

\subsection{Inversion of matrices}

Given the commutative ring $R$, 
we can now characterize the elements of $\GLR$ among elements of $\MatR$
by establishing the converse of Corollary~\ref{cor:mat-inv}.

\begin{theorem}
  An element of $\MatR$ has an inverse if its determinant is in $\units R$.
\end{theorem}

\begin{proof}
Let $A=(a^i_j)^{i<n}_{j<n}$.  If $i<n$, then
\begin{align*}
  \det(A)
&=\sum_{\sigma\in\Sym
    n}\sgn(\sigma)\cdot\prod_{\ell<n}a^{\ell}_{\sigma(\ell)}\\
&=\sum_{\sigma\in\Sym
    n}\sgn(\sigma)\cdot a^i_{\sigma(i)}\prod_{\ell\in
    n\setminus\{i\}}a^{\ell}_{\sigma(\ell)}\\
&=\sum_{j<n}a^i_j
\sum_{\substack{\sigma\in\Sym n\\\sigma(i)=j}}\sgn(\sigma)\cdot\prod_{\ell\in
    n\setminus\{i\}}a^{\ell}_{\sigma(\ell)}\\
&=\sum_{j<n}a^i_jb^j_i,
\end{align*}
where in general
\begin{equation*}
  b^j_k=
\sum_{\substack{\sigma\in\Sym n\\\sigma(k)=j}}\sgn(\sigma)\cdot\prod_{\ell\in
    n\setminus\{k\}}a^{\ell}_{\sigma(\ell)}.
\end{equation*}
If $i\neq k$, then
\begin{align*}
  \sum_{j<n}a^i_jb^j_k
&=\sum_{j<n}a^i_j\sum_{\substack{\sigma\in\Sym
      n\\\sigma(k)=j}}\sgn(\sigma)\cdot\prod_{\ell\in 
    n\setminus\{k\}}a^{\ell}_{\sigma(\ell)}\\
&=\sum_{\sigma\in\Sym n}\sgn(\sigma)\cdot a^i_{\sigma(k)}\prod_{\ell\in 
    n\setminus\{k\}}a^{\ell}_{\sigma(\ell)}\\
&=\sum_{\sigma\in\Sym n}\sgn(\sigma)\cdot a^i_{\sigma(k)}a^i_{\sigma(i)}\prod_{\ell\in 
    n\setminus\{i,k\}}a^{\ell}_{\sigma(\ell)}=0,
\end{align*}
since the map $\sigma\mapsto\sigma\circ
\begin{cycle}
  i\cdiv k
\end{cycle}$ is a bijection between $\Alt n$ and $\Sym
n\setminus\Alt n$.
Thus
\begin{equation*}
  A\cdot(b^j_k)^{j<n}_{k<n}=(\det(A)\cdot\updelta^i_k)^{i<n}_{k<n}.
\end{equation*}
Finally,
\begin{align*}
  \sum_{j<n}b^i_ja^j_k
&=\sum_{j<n}\sum_{\substack{\sigma\in\Sym
      n\\\sigma(j)=i}}\sgn(\sigma)\cdot\prod_{\ell\in
    n\setminus\{j\}}a^{\ell}_{\sigma(\ell)}a^j_k\\
&=\sum_{\sigma\in\Sym n}\sgn(\sigma)\cdot\prod_{\ell\in
    n\setminus\{\sigma\inv(i)\}}a^{\ell}_{\sigma(\ell)}a^{\sigma\inv(i)}_k\\
&=\sum_{\sigma\in\Sym n}\sgn(\sigma)\cdot\prod_{\ell\in
    n\setminus\{i\}}a^{\sigma\inv(\ell)}_{\ell}a^{\sigma\inv(i)}_k,
\end{align*}
which is $\det(A)$ if $i=k$, but is otherwise $0$, so
\begin{equation*}
  (b^i_j)^{i<n}_{j<n}A=(\det(A)\updelta^i_k)^{i<n}_{k<n}.
\end{equation*}
In particular, if $\det(A)$ is invertible, then so is $A$, and
\begin{equation*}
  A\inv=(\det(A)\inv b^j_k)^{j<n}_{k<n}. \qedhere
\end{equation*}
\end{proof}

Thus
\begin{equation*}
 \GLR=\{X\in\MatR[n\times n]\colon\det(X)\in\units R\}. 
\end{equation*}
In the $2\times 2$ case, if $ad-bc=1$, we have
\begin{equation*}
  \begin{pmatrix}
    a&b\\c&d
  \end{pmatrix}\inv
=
  \begin{pmatrix}
    d&-b\\-c&a
  \end{pmatrix}.
 \end{equation*}

\subsection{Modules and vector-spaces}\label{subsect:mod}

A \textbf{module} is a kind of structure with two universes.
One of these is the universe of a commutative ring $R$,
and other is the universe of an abelian group $M$.
Furthermore, there is a function $(x,\bm m)\mapsto x\cdot\bm m$ 
from $R\times M$ to $M$
such that the function $x\mapsto(\bm m\mapsto x\cdot\bm m)$ 
is a homomorphism from $R$ to $(\End M,\id_M,\circ)$.
Then we can understand $M$ 
as a group equipped with a certain additional operation 
for each element of $R$.
In this sense, $M$ is a \textbf{module over} $R$, or an \textbf{$R$-module.}

For example, $R$ is a module over itself.
A module over a \emph{field} is called a \textbf{vector space.}
In this case, the associated homomorphism 
from $R$ to $(\End M,\id_M,\circ)$ is an embedding,
unless $M$ is the trivial group.

The foregoing definition of modules makes sense, even if $R$ is not commutative;
but in that case what we have defined is a \textbf{left} module.
We restrict our attention to the commutative case.

We further restrict our attention to the case 
where $M$ is the group $\MatR[n\times1]$ for some $n$ in $\upomega$.
A typical element of this group can be written as either of
\begin{align*}
&\bm x,&&(x^i\colon i<n);
\end{align*}
thus it can be identified with an element of $R^n$.  
The group becomes an $R$-module when we make the obvious definition
\begin{equation*}
r\cdot\bm x=(r\cdot x^i\colon i<n).
\end{equation*}

\begin{theorem}
  For every commutative ring $R$, for every $n$ in $\upomega$,
there is an isomorphism from $\GLR$ to $\Aut{R^n}$, namely
\begin{equation}\label{eqn:GL-Aut}
  A\mapsto(\bm x\mapsto A\cdot\bm x).
\end{equation}
\end{theorem}

\begin{proof}
  By Theorem \ref{thm:M}, if $A\in\GLR$, 
then the operation $\bm x\mapsto A\cdot\bm x$ is a group endomorphism.
Being invertible, it is an group automorphism.
By commutativity of $R$ (and the definition of matrix multiplication),
for all $r$ in $R$,
\begin{equation*}
  A\cdot(r\cdot\bm x)=r\cdot(A\cdot\bm x).
\end{equation*}
Hence the function in \eqref{eqn:GL-Aut} is indeed a homomorphism $h$
from $\GLR$ to $\Aut{R^n}$.
To show that it is a bijection onto $\Aut{R^n}$, we use the notation
\begin{equation*}
\bv_j=(\updelta^i_j\colon i<n),
\end{equation*}
so that
\begin{equation*}
\bm x=\sum_{i<n}x^i\cdot\bv_i.
\end{equation*}
If $A=(a^i_j)^{i<n}_{j<n}$, then
\begin{equation*}
  A\cdot\bv_j=(a^i_j\colon i<n),
\end{equation*}
which is the number-$j$ column of $A$.
This shows $\ker(h)$ is trivial.
To show that $h$ is surjective onto $\Aut{R^n}$, 
suppose $f\in\Aut{R^n}$ and $f(\bv_i)=(a^j_i\colon j<n)$.
Then
\begin{align*}
  f(\bm x)
&=f\Bigl(\sum_{i<n}x_i\cdot\bv_i\Bigr)\\
&=\sum_{i<n}x^i\cdot f(\bv_i)\\
&=\sum_{i<n}x^i\cdot(a^j_i\colon j<n)\\
&=\Bigl(\sum_{i<n}x^i\cdot a^j_i\colon j<n\Bigr)\\
&=A\cdot\bm x,
\end{align*}
where $A=(a^i_j)^{i<n}_{j<n}$.  Thus $f=h(A)$.
\end{proof}

By composing the isomorphism in the theorem
with the embedding of $\Sym n$ in $\GLR$ given by Theorem~\ref{thm:Sym-GL},
we obtain the embedding of $\Sym n$ in $\Aut{R^n}$ 
discussed (in case $R=\R$) on p.\ \pageref{coord} above.


\section{New groups from old}\label{sect:new}

\subsection{Products}

If $\str A$ and $\str B$ are two algebras with the same signature,
then their \textbf{direct product,}\label{dp} denoted by
\begin{equation*}
\str A\times\str B,
\end{equation*}
is defined in the obvious way: 
the universe is $A\times B$, and
for every $n$ in $\upomega$, 
for every $n$-ary operation-symbol $f$ of the signature of $\str A$ and $\str B$,
\begin{equation*}
f^{\str A\times\str B}\bigl((x_i,y_i)\colon i<n\bigr)
=\bigl(f^{\str A}(x_i\colon i<n),f^{\str B}(y_i\colon i<n)\bigr).
\end{equation*}
In the special case where $\str A$ and $\str B$ are groups, we have
\begin{equation*}\label{interpretation}
(x_0,y_0)\cdot^{\str A\times\str B}(x_1,y_1)
=(x_0\cdot^{\str A}x_1,y_0\cdot^{\str B}y_1),
\end{equation*}
or more simply
\begin{equation*}
(x_0,y_0)(x_1,y_1)=(x_0x_1,y_0y_1).
\end{equation*}

\begin{theorem}\label{thm:2dp}
The direct product of two
\begin{compactenum}[(a)]
\item
groups is a group,
\item
associative rings is an associative ring,
\item
commutative rings is a commutative ring.
\end{compactenum}
\end{theorem}

If $G$ and $H$ are abelian, written additively, 
then their direct product is usually called a \textbf{direct sum,}\index{direct sum} 
denoted by
\begin{equation*}
G\oplus H.
\end{equation*}

The direct sum $\Zmod 2\oplus\Zmod 2$ is the \textbf{Klein four
  group,}\index{Klein four group} denoted by 
\begin{equation*}
\Kfg
\end{equation*}
(for \emph{Vierergruppe}%%%%%
\footnote{According to Wikipedia, 
Klein gave this name to the group in 1884, 
but the name was later applied to four-person anti-Nazi resistance groups.}%
%%%%%%%%%%%%%%%%%%%%%%%%%
).  
This is the smallest group containing two elements
neither of which is a power of the other.

\begin{theorem}\label{thm:coord-proj}
If $\str A$ and $\str B$ are two algebras with the same signature,
then the functions
\begin{align*}
(x,y)&\mapsto x,&(x,y)\mapsto y
\end{align*}
are homomorphisms from $\str A\times\str B$ to $\str A$ and $\str B$ respectively.
\end{theorem}

\begin{theorem}\label{thm:can-inj}
If $\str A$ and $\str B$ are two groups or two associative rings,
then the functions
\begin{align*}
x&\mapsto(x,\gid),&y&\mapsto(\gid,y)
\end{align*}
are homomorphisms from $\str A$ and $\str B$ respectively to $\str A\times\str B$.
\end{theorem}

\subsection{Quotients}

The groups $(\Zmod n,0,-,+)$
and the rings $(\Zmod n,0,-,+,1,\cdot)$ 
are instances of a general construction.

Suppose $\sim$ is an equivalence-relation
on a set $A$, so that it partitions $A$ into equivalence-classes
\begin{equation*}
  \{x\in A\colon x\sim a\};
\end{equation*}
each such class can be denoted by an expression like one of the following:
\begin{align*}
&a/\mathord{\sim},&
&\eqc a,&
\overline a.
\end{align*}
Each element of an equivalence-class is a \textbf{representative} of that class.  
The \textbf{quotient}\index{quotient} of $A$ by $\sim$
is the set of equivalence-classes of $A$ with respect to $\sim$; 
this set can be denoted by 
\begin{equation*}
A\modsim.
\end{equation*}
Suppose for some $n$ in $\upomega$ and some set $B$, we have $f\colon A^n\to B$.
Then there may or may not be a function $\tilde f$ from $(A\modsim)^n$ to $B$ such that the equation
\begin{equation}\label{eqn:wd}
\tilde f([x_0],\dots,[x_{n-1}])=f(x_0,\dots,x_{n-1})
\end{equation}
is an identity.
If there is such a function $\tilde f$, then it is unique.
In this case, the function $\tilde f$ is said to be 
\textbf{well-defined}\label{well-defined} by the given identity \eqref{eqn:wd}.
Note however that there are no ``ill-defined'' functions.
An ill-defined function would be a nonexistent function.
The point is that choosing a function $f$ 
and writing down the equation \eqref{eqn:wd} 
does not automatically give us a function $\tilde f$.
To know that there is such a function, we must check that
\begin{equation*}
a_0\sim x_0\land\dots\land a_{n-1}\sim x_{n-1}\lto f(a_0,\dots,a_{n-1})=f(x_0,\dots,x_{n-1}).
\end{equation*}
When this does hold (for all $a_i$), so that $\tilde f$ exists as in \eqref{eqn:wd}, then
\begin{equation}\label{eqn:tilde}
\tilde f\circ\mathrm p=f,
\end{equation}
where $\mathrm p$ is the function $(x_0,\dots x_{n-1})\mapsto([x_0],\dots,[x_{n-1}])$ from $A^n$ to $(A\modsim)^n$.  
Another way to express the equation~\eqref{eqn:tilde} is to say that the following diagram \textbf{commutes:}\label{commutes}
\begin{equation*}
\xymatrix{
A^n\ar^f[r]\ar_{\mathrm p}[d]&B\\
(A/\mathord{\sim})^n\ar_{\tilde f}[ur]&
}
\end{equation*}
Suppose now $\str A$ is an algebra with universe $A$.
If for all $n$ in $\upomega$, 
for every distinguished $n$-ary operation $f$ of $\str A$,
there is an $n$-ary operation $\tilde f$ on $(A\modsim)^n$
as given by \eqref{eqn:wd}, then $\sim$ is a \textbf{congruence-relation} or \textbf{congruence}\label{congruence} on $\str A$.
In this case, the $\tilde f$ 
are the distinguished operations of a structure with universe $A\modsim$.  
This new structure is the \textbf{quotient} of $\str A$ by $\sim$ 
and can be denoted by
\begin{equation*}
\str A\modsim.
\end{equation*}
For example, by Theorem~\ref{thm:mod-n} on p.\ \pageref{thm:mod-n}, 
for each $n$ in $\N$, 
congruence \emph{modulo} $n$ is a congruence on $(\Z,0,-,+,1,\cdot)$.
Then the structure $(\Zmod n,0,-,+)$ 
can be understood as the quotient $(\Z,0,-,+)\modsim$,
and $(\Zmod n,0,-,+,1,\cdot)$ as $(\Z,0,-,+,1,\cdot)\modsim$.
The former quotient is an abelian group by Theorem~\ref{thm:Zmod-group},
and the latter quotient is a commutative ring by Theorem~\ref{thm:Zmod-ring} 
on p.\ \pageref{thm:Zmod-ring}.
These theorems are special cases of the next two theorems.
In fact the first of these makes verification of Theorem~\ref{thm:Zmod-group} easier.

\begin{theorem}\label{thm:cong}
Suppose $\sim$ is a congruence-relation on a semigroup $(G,\cdot)$.
\begin{compactenum}
\item
$(G,\cdot)\modsim$ is a semigroup.
\item
If $(G,\cdot)$ expands to a group $(G,{}\inv,\cdot)$, 
then $\sim$ is a congruence-relation on this,
and $(G,{}\inv,\cdot)\modsim$ is a group.
%and the quotient of the group by $\sim$ is a group.  
\item
If this group $(G,{}\inv,\cdot)$ is abelian, 
then so is $(G,{}\inv,\cdot)\modsim$.
\end{compactenum}
\end{theorem}

\begin{theorem}\label{thm:ring-q}
Suppose $(R,0,-,+,1,\cdot)$ is an associative ring,
and $\sim$ is a congruence-relation on the reduct $(R,+,\cdot)$.
\begin{compactenum}
  \item
$\sim$ is a congruence-relation on $(R,0,-,+,1,\cdot)$.
\item
The quotient $(R,0,-,+,1,\cdot)\modsim$ is also an associative ring.
\item
If the original ring is commutative, so is the quotient.
\end{compactenum}
\end{theorem}

For another example, there is a congruence-relation on $(\R,+)$ given by
  \begin{equation*}
  a\sim b\liff a-b\in\Z.
  \end{equation*}
There is a well-defined embedding of $(\R,0,-,+)\modsim$ in
$(\units{\C},1,{}\inv,\cdot)$ denoted by
$[a]\mapsto \exp(2\uppi\mi a)$.

\subsection{Subgroups}

We defined subgroups of symmetry groups on p.\ \pageref{subgroup},
and of course subgroups of arbitrary groups are defined the same way.
A \textbf{subgroup}\index{subgroup} of a group is just a substructure of the group, 
when this group is considered as having the full signature $\{\gid,{}\inv,\cdot\}$.  
More informally, a subgroup of a group is a subset containing the identity 
that is closed under multiplication and inversion.  

The subset $\N$ of $\Qp$ contains the identity and is closed under multiplication, 
but is not closed under inversion, and so it is not a subgroup of $\Qp$.
The subset $\upomega$ of $\Z$ contains the additive identity and is closed under addition,
but is not closed under additive inversion, and so it is not a subgroup of $\Z$.

\begin{theorem}\label{thm:subgp}
  A subset of a group is a subgroup if and only if it is non-empty and
  closed under the binary operation $(x,y)\mapsto xy\inv$.
\end{theorem}

If $ H$ is a subgroup of $G$, we write
\begin{equation*}
H\subgp G.  
\end{equation*}
One could write $H\leq G$ instead, 
if one wanted to reserve the expression $H<G$ for the case 
where $H$ is a \emph{proper} subgroup of $G$.  
We shall not do this.%%%%%
\footnote{I do think it is useful to reserve the notation $A\pincluded B$ 
for the case where $A$ is a proper subset of $B$, 
writing $A\included B$ when $A$ is allowed to be equal to $B$.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
However, starting on p.\ \pageref{psubgp},
we shall want an expression for this case:
then we shall just have to write
\begin{equation*}
H\psubgp G.
\end{equation*}
Meanwhile, we have the following examples.

\begin{theorem}\label{thm:subgp-ex}
%\mbox{}
\begin{compactenum}
	\item 
	For all groups $G$,
\begin{align*}
\trivgp&\subgp G,&G&\subgp G.
\end{align*}
\item
For all groups $G_0$ and $G_1$,
if $H_0\subgp G_0$ and $H_1\subgp G_1$, 
then
\begin{equation*}
H_0\times H_1\subgp G_0\times G_1.
\end{equation*}
\item
In particular, for all groups $G$ and $H$,
\begin{align*}
G\times\trivgp&\subgp G\times H,&
\trivgp\times H&\subgp G\times H.
\end{align*}
\item
For all groups $G$,
\begin{equation*}
\{(x,x)\colon x\in G\}\subgp G\times G.
\end{equation*}
\item
The subset
\begin{equation*}
\{\gid,(0\cdiv 1),(2\cdiv 3),(0\cdiv 1)(2\cdiv 3)\}
\end{equation*}
of $\Sym4$
is a subgroup isomorphic to $\Kfg$.
\item
If $\sim$ is a congruence-relation on a group $G$, then 
%the $\sim$-class of $\gid$, namely
\begin{equation*}
\{x\in G\colon x\sim\gid\}\subgp G.
\end{equation*}
%is a subgroup of~$G$.
\end{compactenum}
\end{theorem}

It is important to note that the converse of
the last part of the theorem is false in
general: there are groups $G$ with subgroups $H$ such that for no congruence-relation on $G$ is $H$ the congruence-class of the identity.  For example,\label{ex:32} let $G$ be $\Sym 3$, and let $H$ be the image of $\Sym2$ in $G$ under the obvious embedding mentioned in \S\ref{sect:sym}.  Then $H$ contains just the identity and $(0\cdiv 1)$.  If $\sim$ is a congruence-relation on $G$ such that $(0\cdiv 1)\sim\gid$, then
\begin{equation*}
(1\cdiv 2)(0\cdiv 1)(1\cdiv 2)\sim(1\cdiv 2)\gid(1\cdiv 2)\sim\gid;
\end{equation*}
but $(1\cdiv 2)(0\cdiv 1)(1\cdiv 2)=(0\cdiv 2)$, which is not in $H$.  See \S\ref{sect:normal} (p.~\pageref{sect:normal}) for the full story.

If $f$ is a homomorphism from $G$ to $H$, then the 
\textbf{kernel}%
\index{kernel} of $f$ is the
set
\begin{equation*}
  \{x\in G\colon f(x)=\gid\},
\end{equation*}
which can be denoted by $\Ker f$.  The \textbf{image}\index{image} of $f$ is
\begin{equation*}
  \{y\in H\colon y=f(x)\text{ for some $x$ in }G\},
\end{equation*}
that is, $\{f(x)\colon x\in G\}$; this can be denoted by $\im f$.
For example, considering $\sgn$ as a homomorphism from $\Sym n$ to $\units{\Q}$, we have
\begin{align*}
\ker(\sgn)&=\Alt n,&\im{\sgn}&=\{\pm1\}.
\end{align*}
If $g$ is $(x,y)\mapsto x$ from $G\times H$ to $G$ as in Theorem~\ref{thm:coord-proj},
and $h$ is $x\mapsto(x,\gid)$ from $G$ to $G\times H$ as in Theorem~\ref{thm:can-inj},
then
\begin{align*}
&\begin{gathered}
	\ker(g)=\{\gid\}\times H,\\
	\im g=G,
\end{gathered}&
&\begin{gathered}
	\ker(h)=\{\gid\},\\
	\im h=G\times\{\gid\}.
\end{gathered}
\end{align*}
An embedding (that is, an injective homomorphism) is also called a 
\textbf{monomorphism.}%
\index{monomorphism}
A surjective homomorphism is called
an \textbf{epimorphism.}%
\index{epimorphism}
In the last example, $g$ is an epimorphism,
and $h$ is a monomorphism.

\begin{theorem}\label{thm:ker-im}
  Let $f$ be a homomorphism from $G$ to $H$.
  \begin{compactenum}
    \item
$\Ker f\subgp G$.
\item
$f$ is a monomorphism if and only if $\Ker f=\trivgp$.
\item
$\im f\subgp H$.
  \end{compactenum}
\end{theorem}

There is a monomorphism
 from $\R\oplus\R$ into $\Mat[2\times2]{\R}$, namely
 \begin{equation*}
(x,y)\mapsto
\begin{pmatrix}
  x&y\\-y&x
\end{pmatrix}.
 \end{equation*}
One can define $\C$\label{C} to be the image of this monomorphism.  
One shows that $\C$ then is a sub-ring of $\Mat[2\times2]{\R}$ and is a field.  
The elements of $\C$ usually denoted by $1$ and $\mi$ are given by
\begin{align*}
1
&=  
\begin{pmatrix}
    1&0\\0&1
  \end{pmatrix},&
  \mi
&=
  \begin{pmatrix}
    0&1\\-1&0
  \end{pmatrix}.
\end{align*}
Then every element of $\C$ is $x+y\mi$ for some unique $x$ and $y$ in $\R$.  
The function $z\mapsto\bar z$ is an automorphism of $\C$, where
\begin{equation*}
  \overline{x+y\mi}=x-y\mi.
\end{equation*}
There is then a monomorphism from $\C\oplus\C$ into $\Mat[2\times2]{\C}$,
namely
 \begin{equation*}
(x,y)\mapsto
\begin{pmatrix}
  x&y\\-\bar y&\bar x
\end{pmatrix};
 \end{equation*}
its image is denoted by\label{Ham}
\begin{equation*}
  \Ham
\end{equation*}
in honor of its discoverer Hamilton: 
it consists of the \textbf{quaternions.}\index{quaternion}  
One shows that $\Ham$ is a 
sub-ring of $\Mat[2\times2]{\C}$ 
and that all non-zero elements of $\Ham$ are invertible, 
although $\Ham$ is not commutative.
The element of $\Ham$ usually denoted by $\mj$ is given by
\begin{equation*}
  \mj=
  \begin{pmatrix}
    0&1\\-1&0
  \end{pmatrix}.
\end{equation*}

\begin{theorem}\label{thm:subgroups}
  An arbitrary intersection of subgroups is a subgroup.
\end{theorem}

\begin{proof}
This is an instance of the general observation that an arbitrary intersection of substructures is a substructure.
\end{proof}

\subsection{Generated subgroups}\label{subsect:gen-sub}

Given a subset $A$ of (the universe of) a group $G$, we can \emph{close}
under the three group-operations, 
obtaining a subgroup, $\gpgen A$.\label{gpgen}
For a formal definition, we let
\begin{equation*}
  \gpgen A=\bigcap\family S,
\end{equation*}
where $\family S$ is the set of all subgroups of $G$ that
include $A$.  Note that
\begin{equation*}
\gpgen{\emptyset}=\trivgp.
\end{equation*}
The subgroup $\gpgen A$ of $G$ 
is said to be \textbf{generated}\index{generated} by $A$,
and the elements of $A$ are said to be, collectively, \textbf{generators} of $\gpgen A$.  
If $A=\{a_0,\dotsc,a_{n-1}\}$, then for $\gpgen A$ we may write
\begin{equation*}
  \gpgen{a_0,\dotsc,a_{n-1}}.
\end{equation*}
In this case, $\gpgen A$ is said to be \textbf{finitely generated.}\index{finitely generated} 
If also $n=1$, then $\gpgen A$ is said to be \textbf{cyclic.}
It is easy to describe cyclic groups as sets,
and almost as easy to describe finitely generated \emph{abelian} groups:

\begin{theorem}\label{thm:cyc}
Let $G$ be a group.
\begin{compactenum}
\item
If $a\in G$, then
\begin{equation*}
\gpgen a=\{a^n\colon n\in\Z\}.
\end{equation*}
\item
If $\{a_0,\dots,a_{n-1}\}\included G$, and $G$ is abelian, then
\begin{multline*}
\gpgen{a_0,\dots,a_{n-1}}\\
=\{x_0a_0+\dots+x_{n-1}a_{n-1}\colon(x_0,\dots,x_{n-1})\in\Z^n\}.
\end{multline*}
\end{compactenum}
\end{theorem}

\begin{proof}
\begin{asparaenum}
\item
Let $f$ be the homomorphism $x\mapsto a^x$ from $\Z$ to $G$ 
as in Theorem~\ref{thm:exp-in-groups} (p.~\pageref{thm:exp-in-groups}).
We have to show $\gpgen a=\im f$.  
Since $a\in\im f$, it is now enough, by Theorem~\ref{thm:ker-im},
to show $\im f\included H$ for all subgroups $H$ of $G$ that contain $a$.
But for such $H$ we have
$a^0\in H$, and 
if $a^n\in\gpgen a$, 
then $a^{n\pm 1}\in\gpgen a$,
so by induction, $\im f\included H$.  
\item
The indicated set is a subgroup of $G$ by Theorem~\ref{thm:subgp}, 
and it contains the $a_i$.
It remains to note that the indicated set 
is included in every subgroup of $G$ that contains the $a_i$.\qedhere
\end{asparaenum}
\end{proof}

As examples of cyclic groups, we have $\Z$ and the $\Zmod n$.
Indeed,
\begin{align*}
\Z&=\gpgen1,&\Zmod n&=\gpgen{[1]}.
\end{align*}

\begin{theorem}\label{thm:Z-subg}
All subgroups of $\Z$ are cyclic.  
All nontrivial subgroups of $\Z$ are isomorphic to $\Z$.
\end{theorem}

\begin{proof}
Suppose $G$ is a nontrivial subgroup of $\Z$.
Then $G$ has positive elements,
so it has a least positive element, $n$.
If $a\in G$, then all residues of $a$ \emph{modulo} $n$ belong to $G$.
By Theorem~\ref{thm:res} (p.\ \pageref{thm:res}),
$a$ has a residue in $n$ (that is, $\{0,\dots,n-1\}$),
and so this residue must be $0$.
Thus $n\divides a$, so $a\in\gpgen n$.
Therefore $G=\gpgen n$.
The function $x\mapsto nx$ from $\Z$ to $\gpgen n$ is a surjective homomorphism;
that it is injective can be derived from Corollary~\ref{cor:mulcan} (p.\ \pageref{cor:mulcan}).
\end{proof}

\begin{theorem}\label{thm:mZnx}
If $n$ is a positive integer and $m$ is an arbitrary integer, then
\begin{equation*}
\gpgen{[m]}=\Zmod n\iff[m]\in\Zmodu n.
\end{equation*}
\end{theorem}

\begin{proof}
Each condition means the congruence
\begin{equation*}
mx\equiv1\pmod n
\end{equation*}
is soluble.
\end{proof}

The language of generated subgroups
is useful for establishing a basic theorem of number theory.
In $\Z$, the relation of dividing is transitive:
\begin{equation*}
a\divides b\And b\divides c\implies a\divides c.
\end{equation*}
This is just because $ax=b$ and $by=c$ imply $axy=c$.
A \textbf{common divisor} of two integers is just a divisor of each of them.
Equivalently, a common divisor of $a$ and $b$ is some $c$ such that
\begin{equation*}
\gpgen{a,b}\included\gpgen c.
\end{equation*}
Hence it makes sense to speak of 
a \textbf{greatest common divisor}\label{gcd} of two integers:  
it is a common divisor that is divisible by each common divisor.
Since $0$ divides only itself, 
it is not a common divisor of two \emph{different} integers.
If $a\neq0$, then $a$ is a greatest common divisor of $a$ and $0$.
Defining\label{abs}
\begin{equation*}
\abs a=
\begin{cases}
a,&\text{ if }a\geq0,\\
-a,&\text{ if }a<0,	
\end{cases}
\end{equation*}
we have
\begin{gather*}
c\divides d\And d\neq0\implies\abs c\leq\abs d,\\
c\divides d\And d\divides c\iff\abs c=\abs d,	
\end{gather*}
so if $d$ is a greatest common divisor of $a$ and $b$,
then so is $-d$, but nothing else.
In this case we denote $\abs d$ by
\begin{equation*}
\gcd(a,b);
\end{equation*}
this is greater (in the usual sense) than all other common divisors of $a$ and $b$.

\begin{theorem}\label{thm:ax+by=d}
Any two integers $a$ and $b$ have a greatest common divisor, and
\begin{equation*}
\gpgen{a,b}=\gpgen{\gcd(a,b)},
\end{equation*}
so that the equation
\begin{equation*}
ax+by=\gcd(a,b)
\end{equation*}
is soluble.
\end{theorem}
 
\begin{proof}
By Theorem~\ref{thm:Z-subg}, there is $d$ such that
$\gpgen{a,b}=\gpgen d$.
Since we have
\begin{equation*}
c\divides d\iff\gpgen d\included\gpgen c,
\end{equation*}
it follows that $d$ is a greatest common divisor of $a$ and $b$.
Then $\gcd(a,b)=\abs d$, so $\gpgen{\gcd(a,b)}=\gpgen d$.
\end{proof} 
 
A common divisor of $a$ and $b$ is a common divisor of $\abs a$ and $\abs b$.
The proof of Theorem~\ref{thm:Z-subg} suggests a way to find greatest common divisors,
which is the \textbf{Euclidean algorithm,}
established in Propositions VII.1 and 2 of the \emph{Elements.}
Suppose $a_0$ and $a_1$ are positive integers.
We define a sequence $(a_0,a_1,\dots)$ of positive integers 
by letting $a_{k+2}$ be the residue in $a_{k+1}$ of $a_k$ \emph{modulo} $a_{k+1}$,
if this residue is positive; otherwise $a_{k+2}$ is undefined.
Then
\begin{equation*}
a_{k+1}>a_{k+2},
\end{equation*}
so the sequence must have a last term; this is $\gcd(a_0,a_1)$.
When this is $1$, then $a_0$ and $a_1$ are said to be
\textbf{prime to} one another, or \textbf{relatively prime.}
In this case,
by Theorem~\ref{thm:ax+by=d},
the equation
\begin{equation*}
a_0x+a_1y=1
\end{equation*}
is soluble in $\Z$.

If $a\equiv b\pmod n$, then $\gcd(a,n)=\gcd(b,n)$.
Hence the following makes sense:

\begin{theorem}\label{thm:Znx}
For all positive integers $n$,
\begin{equation*}
\Zmodu n=\{x\in\Zmod n\colon\gcd(x,n)=1\}.
\end{equation*}
\end{theorem}

\begin{proof}
By the proof of Theorem~\ref{thm:mZnx},
$\Zmodu n$ consists of those $m$ in $\Zmod n$ such that the congruence
\begin{equation*}
mx\equiv1\pmod n
\end{equation*}
is soluble, that is, the equation $mx+ny=1$ is soluble,
so that $\gcd(m,n)$ must be $1$.
Conversely, if $\gcd(m,n)=1$, then the equation $mx+ny=1$ is soluble by Theorem~\ref{thm:ax+by=d}.
\end{proof}

For an arbitrary subset $A$ of an arbitrary group,
it is not so easy\label{gpgen-hard} 
to give a description of the elements of $\gpgen A$.
We shall do it 
by means of Theorem~\ref{thm:free-gp} on p.\ \pageref{thm:free-gp}.
Meanwhile, we may note some more specific examples:

The subgroup $\gpgen{(0\cdiv 1),(2\cdiv 3)}$ of $\Sym4$ 
is the subgroup given above in Theorem~\ref{thm:subgp-ex}
as being isomorphic to $\Kfg$.

The subgroup $\gpgen{\mi,\mj}$ of $\units{\Ham}$
is the \textbf{quaternion group,}\index{quaternion group} 
denoted by
\begin{equation*}\label{quat}
\quat;
\end{equation*}
it has eight elements: 
$\pm1$, $\pm\mi$, $\pm\mj$, and $\pm\mk$, where $\mk=\mi\mj$.
We consider this group further in the next section (\S\ref{sect:cyclic}) and later.

\begin{theorem}\label{thm:Dih-2}
If $n\geq3$, let 
\begin{gather*}
\sigma_n=
\begin{cycle}
      0\cdiv 1\cdiv \dots\cdiv n-1
    \end{cycle},\\
    \beta=
\begin{cycle}
      1\cdiv n-1
    \end{cycle}
\begin{cycle}
      2\cdiv n-2
    \end{cycle}
\cdots
\begin{cycle}
      m\cdiv n-m
    \end{cycle}	
\end{gather*}
in $\Sym n$,
where $m$ is the greatest integer that is less than $n/2$.
Then
\begin{equation*}
\Dih n=\gpgen{\sigma_n,\beta}=\gpgen{\beta,\beta\sigma_n}.
\end{equation*}
\end{theorem}

\begin{proof}
The subset $\{\sigma_n{}^i\beta^j\colon(i,j)\in n\times 2\}$ of $\Sym n$
is a subset of $\Dih n$ and has $2n$ distinct elements,
so by Theorem~\ref{thm:Dih} (p.~\pageref{thm:Dih}) it must be all of $\Dih n$.
Moreover
$\gpgen{\beta,\beta\sigma_n}\subgp\gpgen{\sigma_n,\beta}$, but also $\gpgen{\sigma_n,\beta}\subgp\gpgen{\beta,\beta\sigma_n}$ 
since $\sigma=\beta\cdot\beta\sigma_n$. 
\end{proof}

Our analysis of $\Dih n$ is continued in Theorem~\ref{thm:Dn} below.

In case $n=0$, the group $\gpgen{a_0,\dotsc,a_{n-1}}$ 
should logically be denoted by $\gpgen{\ }$.  
Probably most people write $\gpgen{\gid}$ instead.  
This is not wrong, but is redundant, 
since every group contains an identity, 
and the angle brackets indicate that a group is being given.
The practice of these notes will be to write $\trivgp$.

\section{Order}\label{sect:cyclic}

The \textbf{order}\index{order!--- of a group} of a group is its cardinality.  
The order of a group $G$ is therefore denoted by
\begin{equation*}
  \order G.
\end{equation*}
We have examples in Theorems~\ref{thm:Sym-ord} and \ref{thm:Dih} (pp.~\pageref{thm:Sym-ord}--\pageref{thm:Dih}).
If $a\in G$, then the order of the cyclic subgroup $\gpgen a$ of $G$
is said to be the \textbf{order}\index{order!--- of an element} of $a$ simply
and is denoted by
\begin{equation*}
  \order a.
\end{equation*}
For example, in the quaternion group $\quat$ (p.~\pageref{quat} above), we have
\begin{align*}
\gpgen{\mi}&=\{0,\mi,-1,-\mi\},&\order{\mi}&=4.
\end{align*}
In the notation of Theorem~\ref{thm:Dih-2} above,
\begin{align*}
\order{\sigma_n}&=n,&
\order{\beta}&=2=\order{\beta\sigma_n}.
\end{align*}
For another example, we have the following.

\begin{theorem}
The order of a finite permutation 
is the least common multiple of the orders of its disjoint cyclic factors.
\end{theorem}

\begin{theorem}\label{thm:el-ord}
In a group, if $a$ is an element of finite order $n$, then
\begin{equation*}
	\gpgen a=\{a^i\colon i\in n\},
\end{equation*}
and $x\mapsto a^x$ is a well-defined isomorphism from $\Zmod n$ to $\gpgen a$, 
so in particular
\begin{equation*}
a^n=\gid.
\end{equation*}
\end{theorem}

\begin{proof}
Since $\gpgen a$ does not have $n+1$ distinct elements,
for some $i$ and $j$ we have $0\leq i<j\leq n$, but $a^i=a^j$.
Therefore $\gid=a^{j-i}$, 
and hence $a^k=a^{\ell}$ whenever $k\equiv\ell\pmod{j-i}$.  
Consequently $\gpgen a$ has at most $j-i$ elements, that is, $n\leq j-i$.  
Since also $j-i\leq n$, we have $n=j-i$, and in particular $a^n=a^{j-i}=\gid$.
\end{proof}

For integers $a$ and $b$, 
the notation $a\divides b$ was defined on p.\ \pageref{divides}.

\begin{theorem}\label{thm:Znm}
The following conditions on positive integers $m$ and $n$ are equivalent.
\begin{compactenum}
\item
$\Zmod n$ has a subgroup of order $m$.
\item
$\Zmod n$ has a unique subgroup of order $m$.
\item
$m\divides n$.
\end{compactenum}
Under these conditions, the subgroup is $\gpgen{n/m}$.
\end{theorem}

The orders of certain generators of a group may determine the group up to isomorphism.
We work out a couple of examples in the next two theorems.

\begin{theorem}\label{thm:Dn}
If $n>2$, and $G=\gpgen{a,b}$, where
\begin{align*}
\order a&=n,&
\order b&=2,&
\order{ab}&=2,
\end{align*}
then
\begin{equation*}
G\cong\Dih n.
\end{equation*}
\end{theorem}

\begin{proof}
Assume $n\geq 2$.
Since $abab=\gid$ and $b\inv=b$, we have
\begin{align*}
  ba&=a\inv b,&
ba\inv&=ab.
\end{align*}
Therefore $ba^k=a^{-k}b$ for all integers
  $k$.  This shows 
\begin{equation*}
G=\{a^ib^j\colon(i,j)\in n\times 2\}.
\end{equation*}
It remains to show $\order G=2n$.
  Suppose 
  \begin{equation*}
  a^ib^j=a^kb^{\ell},
  \end{equation*}
  where $(i,j)$ and $(k,\ell)$ are in $n\times 2$.  Then 
  \begin{equation*}
  a^{i-k}=b^{\ell-j}.
  \end{equation*}
If $b^{\ell-j}=\gid$, then $\ell=j$ and $i=k$.  The alternative is that
 $b^{\ell-j}=b$.  In this case,
\begin{equation*}
n\divides2(i-k).  
\end{equation*}
If $n\divides i-k$, then $i=k$ and hence $j=\ell$.  The only other
possibility is that $n=2m$ for some $m$, and $i-k=\pm m$, so that $a^m=b$.  
But then $aa^maa^m=a^2$, while $abab=\gid$, so $n=2$.
\end{proof}

According to this theorem,
if a group with certain abstract properties of $\Dih n$ exists,
then that group is isomorphic to $\Dih n$.
In \S\ref{sect:pres}, we shall develop a way to create a group $G$ with those properties,
regardless of whether we know about $\Dih n$.
Then, using Theorem~\ref{thm:Dn}, we shall be able to conclude
that $G$ is isomorphic to $\Dih n$.
This result is Theorem~\ref{thm:Dn-pres} (p.~\pageref{thm:Dn-pres}).

\begin{theorem}\label{thm:quat}
If $G=\gpgen{a,b}$, where 
\begin{align*}
\order a&=4,&
b^2&=a^2,&
ba&=a^3b,
\end{align*}
then, under an isomorphism taking $a$ to $\mi$ and $b$ to $\mj$,
\begin{equation*}
G\cong\quat.
\end{equation*}
\end{theorem}

\begin{proof}
Since $ba=a^3b$ and $\order a=4$, we have also
\begin{equation*}
ba\inv=ba^3=a^9b=ab,
\end{equation*}
so we can write every element of $G$ 
as a product $a^ib^j$ for some $i$ and $j$ in $\Z$.
By Theorem~\ref{thm:el-ord}, since $\order a=4$, we can require $i\in4$.
Similarly, since $b^2=a^2$, we can require $j\in 2$.
In $\quat$, the elements $\mi$ and $\mj$ have the given properties of $a$ and $b$.
Moreover $\size{\quat}=8$, 
so that if $(i,j)$ and $(k,\ell)$ are distinct elements of $4\times2$,
then
\begin{equation*}
\mi^i\mj^j\neq\mi^k\mj^{\ell}.
\end{equation*}
Therefore there is a well-defined surjective function $\mi^i\mj^j\mapsto a^ib^j$ 
from $\quat$ to $G$,
and this function is a homomorphism.
It remains to show $\size G=8$.
Suppose $(i,j)$ and $(k,\ell)$ are  in $4\times 2$, and
\begin{equation*}
a^ib^j=a^kb^{\ell}.
\end{equation*}
Then $a^{i-k}=b^{\ell-k}$ and hence
\begin{equation*}
a^m=b^n
\end{equation*}
for some $n$ in $2$ and $m$ in $4$.
If $n=0$, then $m=0$ (since $\order a=4$), and so $(i,j)=(k,\ell)$.
But $a\neq b$ (since $ba=a^3b$ and $\order a=4$).
Similarly $a^3\neq b$.
Finally, $a^2\neq b$ (since $b^2=a^2$ and $\order a=4$).
Thus $n\neq1$, so $n=0$.
\end{proof}

As with $\Dih n$, so with $\quat$, we shall be able to create the group
using only the abstract properties just given, 
in Theorem~\ref{thm:quat-pres} (p.~\pageref{thm:quat-pres}).

\section{Cosets}\label{sect:cosets}

Suppose $H\subgp G$.  If $a\in G$, let
\begin{gather*}
  aH=\{ax\colon x\in H\},\\
Ha=\{xa\colon x\in H\}.
\end{gather*}
Each of the sets $aH$ is a \textbf{left coset}\index{left!---
  coset}\index{coset} 
of $H$, and the set $\{xH\colon x\in G\}$ of left cosets
is denoted by 
\begin{equation*}
  G/H.
\end{equation*}
Each of the sets $Ha$ is a \textbf{right coset}\index{right!--- coset} of $H$, and the set $\{Hx\colon x\in G\}$ of right cosets
is denoted by 
\begin{equation*}
  H\backslash G.
\end{equation*}
Note that $H$ itself is both a left and a right coset of itself.

Sometimes, for each $a$ in $G$, we have $aH=Ha$.  For example, this is the case when $G=G_0\times G_1$, and $H=G_0\times\trivgp$, so that, if $a=(g_0,g_1)$, then
\begin{equation*}
aH=H\times\{g_1\}=Ha.
\end{equation*}
Sometimes left and right cosets are different, as in the example\label{ex:32again} on p.\ \pageref{ex:32}, where $G=\Sym 3$, and $H$ is the image of $\Sym 2$ in $G$.  In this case
\begin{align*}
(0\cdiv 2)H&=\{(0\cdiv 2),(0\cdiv 1\cdiv 2)\},& H(0\cdiv 2)&=\{(0\cdiv 2),(0\cdiv 2\cdiv 1)\},\\
(1\cdiv 2)H&=\{(1\cdiv 2),(0\cdiv 2\cdiv 1)\},& H(1\cdiv 2)&=\{(1\cdiv 2),(0\cdiv 1\cdiv 2)\}.
\end{align*}
Moreover, there are no other cosets of $H$, besides $H$ itself, by the next theorem; so in the example, no left coset, besides $H$, is a right coset.

\begin{theorem}\label{thm:cosets}
Suppose $H\subgp G$.
The left cosets of $H$ in $G$ compose a partition of $G$.  Likewise for the right cosets.  All
cosets of $H$ have the same size; also, $G/H$ and
  $H\backslash G$ have the same size.
\end{theorem}

\begin{proof}
  We have $a\in aH$.  Suppose $aH\cap bH\neq\emptyset$.  Then $ah=bh_1$ for some $h$ and $h_1$ in $H$, so that $a=bh_1h\inv$, which is in $bH$.  Thus $a\in bH$, and hence $aH\included bH$.  By symmetry of the argument, we have also $bH\included aH$, and therefore $aH=bH$.  Hence the left cosets compose a partition of $G$.  By symmetry again, the same is true for the right cosets.
  
All cosets of $H$ have the same size as $H$, since
  the map $x\mapsto ax$ from $H$ to $aH$ is a bijection with inverse $x\mapsto a\inv H$, and likewise $x\mapsto xa$ from $H$ to $Ha$ is a bijection.  (One might see this as an application of Cayley's Theorem, Theorem~\ref{thm:Cay}, p.\ \pageref{thm:Cay}.)    
  
   Inversion is a
  permutation of $G$ taking $aH$ to $Ha\inv$, so $G/H$ and
  $H\backslash G$ must have the same size.
\end{proof}

\begin{corollary}\label{cor:cosets-1}
If $H\subgp G$, then the relation $\sim$ on $G$ defined by
\begin{equation*}
a\sim x\liff aH=xH
\end{equation*}
is an equivalence-relation, and
\begin{equation*}
G/H=G/\mathord{\sim}.
\end{equation*}
\end{corollary}

\begin{corollary}\label{cor:cosets-2}
If $H\subgp G$ and $aH=Hb$, then $aH=Ha$.
\end{corollary}

\begin{proof}
Under the assumption, $a\in Hb$, so $Ha\included Hb$, and therefore $Ha=Hb$.
\end{proof}

The cardinality of $G/H$ (or of $H\backslash G$) 
is called the \textbf{index}\index{index} of $H$ in $G$ 
and can be denoted by
\begin{equation*}
  [G:H].
\end{equation*}
If $G$ is finite, then by the last theorem,
\begin{equation*}
[G:H]=\frac{\size G}{\size H}.
\end{equation*}
However, $[G:H]$ may be finite, even though $G$ is not.  
In this case, $H$ must also be infinite, 
and indeed the last equation may be understood to say this, 
since an infinite cardinal divided by a finite cardinal should still be infinite.

Of the next theorem, we shall be particularly interested in a special case, 
Lagrange's Theorem, in the next section.

\begin{theorem}\label{thm:KHG}
  If $K\subgp H\subgp G$, then $[G:K]=[G:H][H:K]$.
\end{theorem}

\begin{proof}
Every left coset of $K$ is included in a left coset of $H$.
  Indeed, if $bK\cap aH\neq\emptyset$, then as in the proof of
  Theorem~\ref{thm:cosets}, $bK\included aH$.
  Moreover, every left coset of $H$ includes the same number of left cosets of $K$.  For, the bijection $x\mapsto ax$ that takes $H$ to $aH$ also takes each coset $bK$ of $K$ to a coset $abK$ of $K$.
\end{proof}

\section{Lagrange's Theorem}\label{sect:Lagrange}

According to \cite[p.~141--2]{MR1517828}, 
the following ``is implied but not explicitly proved'' 
in a memoir by Lagrange published in 1770--1.

\begin{theorem}[Lagrange]\label{thm:Lagrange}\index{Lagrange's
    Theorem}\index{theorem!Lagrange's Th---}
If $H\subgp G$ and $G$ is finite, then $\order H$ divides $\order G$.
\end{theorem}

\begin{proof}
Use Theorem~\ref{thm:KHG} when $K=\trivgp$.
\end{proof}

\begin{corollary}
  If $G$ is finite and $a\in G$, then $a^{\order G}=\gid$.
\end{corollary}

\begin{proof}
$a^{\order a}=\gid$ by Theorem~\ref{thm:el-ord} (p.~\pageref{thm:el-ord}), 
and $\order a$ divides $\order G$.
\end{proof}

Cauchy's Theorem (p.\ \pageref{thm:Cauchy})
and its generalization,
the first Sylow Theorem (p.\ \pageref{thm:Sylow-1}),
are partial converses of Lagrange's Theorem.

Meanwhile, some basic results of number theory
can be seen as applications of Lagrange's Theorem.
First we obtain a classification of certain finite groups.
An integer greater than $1$ is called \textbf{prime}\label{prime-number}
if its only divisors are itself and $1$.

\begin{theorem}
All groups of prime order are cyclic.
\end{theorem}

\begin{proof}
Say $\order G=p$.  
There is $a$ in $G\setminus\trivgp$, 
so $\order a>1$; 
but $\order a$ divides $p$, so $\order a=p$, and therefore $G=\gpgen a$.
\end{proof}

The following can be obtained
as a corollary of Theorem~\ref{thm:Znx} (p.\ \pageref{thm:Znx});
but we can obtain it also from Lagrange's Theorem.%%%%%
\footnote{This is observed by Timothy Gowers, editor of \cite{MR2467561},
in a Google+ article of December 21, 2013.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{theorem}\label{thm:p-prime}
An integer $p$ that is greater than $1$ is prime if and only if
\begin{equation*}
\Zmodu p=\{1,\dots,p-1\}.
\end{equation*}
\end{theorem}

\begin{proof}
Say $1<a<p$ and $a\in\Zmodu p$, so that $ac\equiv1\pmod p$ for some $c$.
If $ab=p$, then $ab\equiv0$, so $abc\equiv0$, hence $b\equiv0$, which is absurd.
Thus $a\ndivides p$.
Hence, if $\Zmodu p=\{1,\dots,p-1\}$, then $p$ must be prime.

Now suppose $p$ is prime and $1<a<p$,
so that $a\ndivides p$.  
But $\gcd(a,p)\divides p$ and $1\leq\gcd(a,p)\leq a$,
so $\gcd(a,p)=1$, and therefore $a\in\Zmodu p$ by Theorem~\ref{thm:Znx}.

Alternatively, $\gpgen a$ has order greater than $1$, 
so by Lagrange's Theorem this order must be $p$.
In particular $ab\equiv1\pmod p$ for some $b$, so $a\in\Zmodu p$.
\end{proof}

\begin{theorem}[Fermat]\label{thm:Fermat}
If the prime $p$ is not a factor of $a$, then
\begin{equation}\label{eqn:Fermat}
  a^{p-1}\equiv 1\pmod p.
\end{equation}
Hence for all integers $a$,
\begin{equation}\label{eqn:Fermat2}
a^p\equiv a\pmod p.
\end{equation}
\end{theorem}

\begin{proof}
By the previous theorem, 
if $p\ndivides a$, then $[a]\in\Zmodu p$, and this group has order $p-1$,
so \eqref{eqn:Fermat} holds by Lagrange's Theorem.
Also \eqref{eqn:Fermat} implies \eqref{eqn:Fermat2},
and the latter holds trivially if $p\divides a$.
\end{proof}

If $n\in\N$, then by Theorem~\ref{thm:Znx}, 
the order of $\Zmodu n$ 
is the number of elements of $\Zmod n$ that are prime to $n$.
Let this number be denoted by
\begin{equation*}
  \upphi(n).
\end{equation*}
This then is the number of generators of $\Zmod n$, that is,
the number of elements $k$ of $\Zmod n$ such that $\gpgen k=\gpgen 1$.
This feature of $\upphi(n)$ will be used in Theorem~\ref{thm:prim-roots} (p.\ \pageref{thm:prim-roots}).

\begin{theorem}[Euler]\label{thm:Euler}
\index{Euler's Theorem}
  \index{theorem!Euler's Th---} 
  If $\gcd(a,n)=1$, then
  \begin{equation*}
a^{\upphi(n)}\equiv 1\pmod n.
\end{equation*}
\end{theorem}

\begin{proof}
If $\gcd(a,n)=1$, then $[a]\in\Zmodu n$ by Theorem~\ref{thm:Znx}.
\end{proof}


\section{Normal subgroups}\label{sect:normal}

If $H\subgp G$, we investigate the possibility of defining a multiplication on $G/H$ so that
\begin{equation}\label{eqn:xHyH}
(xH)(yH)=xyH.
\end{equation}
In any case, each member of this equation is a well-defined subset of $G$.  The question is when they are the same.  Continuing with the example from pages \pageref{ex:32} and \pageref{ex:32again}, where $G=\Sym3$ and $H=\gpgen{(0\cdiv 1)}$, we have
\begin{gather*}
(1\cdiv 2)H(1\cdiv 2)H=\{\gid,(0\cdiv 1),(0\cdiv 2),(0\cdiv 1\cdiv 2)\},\\
(1\cdiv 2)(1\cdiv 2)H=H=\{\gid,(0\cdiv 1))\},
\end{gather*}
so \eqref{eqn:xHyH} fails in this case.

\begin{theorem}\label{thm:n}
  Suppose $H\subgp G$.  The following are equivalent:
  \begin{compactenum}
    \item
$G/H$ is a group whose multiplication is given by \eqref{eqn:xHyH}.
\item
Every left coset of $H$ is a right coset.
\item
$aH=Ha$ for all $a$ in $G$.
\item
$a\inv Ha=H$ for all $a$ in $G$.
  \end{compactenum}
\end{theorem}

\begin{proof}
Immediately the last two conditions are equivalent, 
and they imply the second.  
The second implies the third, 
by Corollary~\ref{cor:cosets-2} (p.~\pageref{cor:cosets-2}).

Suppose now the first condition holds.  For all $h$ in $H$, since $hH=H$, we have
\begin{equation*}
aH=\gid aH=\gid HaH=hHaH=haH,
\end{equation*}
hence $a\inv haH=H$, so $a\inv ha\in H$.  Thus $a\inv Ha\included H$, so $a\inv Ha=H$.
Conversely, if the third condition holds, then
\begin{equation*}
(xH)(yH)=xHHy=xHy=xyH.
\end{equation*}
In this case, 
the equivalence-relation $\sim$ on $G$ given as in Corollary~\ref{cor:cosets-1} 
(p.~\pageref{cor:cosets-1}) by
\begin{equation*}
a\sim x\liff aH=xH
\end{equation*}
is a congruence-relation, and so, by Theorem~\ref{thm:cong} (p.~\pageref{thm:cong}), $G/H$ is a group with respect to the proposed multiplication.
\end{proof}

A subgroup $H$ of $G$ meeting any of these equivalent conditions is
called \textbf{normal,}\index{normal!--- subgroup} and in this case we write
\begin{equation*}
  H\nsubgp G.
\end{equation*}
As trivial examples, we have
\begin{align*}
G&\nsubgp G,&\trivgp&\nsubgp G.
\end{align*}
Only slightly less trivially, all subgroups of abelian groups are normal subgroups.  
More examples arise from the following.

\begin{theorem}\label{thm:index-2}
If $[G:H]=2$, then $H\nsubgp G$.
\end{theorem}

If $n>1$, since $[\Sym n:\Alt n]=2$, we now have
\begin{equation*}
\Alt n\nsubgp\Sym n.
\end{equation*}
Of course we have this trivially if $n\leq1$.

In general, if $N\nsubgp G$, then the group $G/N$ is called the
\textbf{quotient-group}% 
\index{quotient!--- group}\index{group!quotient ---} of
$G$ by $N$.  In this case, we can write the group also as
\begin{equation*}
\frac GN.
\end{equation*}

\begin{theorem}\label{thm:NGHG}
If $N\nsubgp G$ and $H\subgp G$, then $N\cap H\nsubgp H$.
(That is, normality is preserved in subgroups.)
\end{theorem}

\begin{proof}
  The defining property of normal subgroups is universal.  
In particular,
  $N\nsubgp G$ means that the sentence
  \begin{equation*}
\Forall x\Forall y(x\in N\to yxy\inv\in N)
\end{equation*}
is true in the structure $(G,N)$.  Therefore the same sentence is true in every substructure of $(G,N)$.  If $H\subgp G$, then $(G,N\cap H)$ is a substructure of $(G,N)$.
\end{proof}

For example, if $m<n$, and we identify $\Sym m$ 
with its image in $\Sym n$ under $\sigma\mapsto\sigma\cup\id_{n\setminus m}$,
then $\Sym m\cap\Alt n\nsubgp\Sym m$.
But then, we already know this, since $\Sym m\cap\Alt n=\Alt m$.

In proving Theorem~\ref{thm:Dih-2} (p.~\pageref{thm:Dih-2}),
we showed that every element of $\Dih n$ is a product $gh$,
where $g\in\gpgen{\sigma_n}$ and $h\in\gpgen{\beta}$.
Note that that, since $\order{\sigma_n}=n$ and $\size{\Dih n}=2n$,
by Theorem~\ref{thm:index-2} we have $\gpgen{\sigma_n}\nsubgp\Dih n$.
Thus our result is a special case of the following.

\begin{lemma}
If $N\nsubgp G$ and $H\subgp G$, 
then $\gpgen{N\cup H}=NH$.  
\end{lemma}

\begin{proof}
Since
\begin{equation*}
N\cup H\included NH\included\gpgen{N\cup H},
\end{equation*}
it is enough to show $NH\subgp G$.
Suppose $n\in N$ and $h\in H$.  
Then $nh=hh\inv nh$.
Since $N\nsubgp\gpgen{N\cup H}$, we have
$h\inv nh\in N$, so $nh\in HN$. 
Thus $NH\included HN$, so by symmetry $NH=HN$.  Therefore
\begin{multline*}
NH(NH)\inv=NHH\inv N\inv=NHHN\\
\included NHN=NNH\included NH, 
\end{multline*}
that is,
$NH$ is closed under $(x,y)\mapsto xy\inv$.  Since $NH$ also contains
$\gid$, it is a subgroup of $G$ by Theorem~\ref{thm:subgp}. 
\end{proof}

\begin{theorem}\label{thm:isdp}
Suppose $N\nsubgp G$ and $H\subgp G$ and
$N\cap H=\trivgp$.  Then the surjection $(x,y)\mapsto xy$ from
$N\times H$ to $NH$ is a bijection,
and so the structure of a group is induced on $N\times H$.
\end{theorem}

\begin{proof}
If $g$ and $h$ are in $H$, and $m$ and $n$ are in $N$, and $gm=hn$,
then  
\begin{equation*}
  h\inv g=nm\inv,
\end{equation*}
so each side must be $\gid$, and hence $g=h$ and $m=n$.  
\end{proof}

Multiplication in $NH$ is given by
\begin{equation}\label{eqn:sdp}
(mg)(nh)=(m\cdot gng\inv)(gh), 
\end{equation}
while multiplication in the direct product $(N,\cdot)\times(H,\cdot)$ is given by
\begin{equation*}
  (m,g)(n,h)=(m\cdot n,gh).
\end{equation*}
Thus the direct-product structure on $N\times H$
is not necessarily the structure on $N\times H$ given by the theorem.
The latter structure is called a \textbf{semidirect product} of $N$ and $H$.
The group $NH$ is the \textbf{internal semidirect product}%
\index{internal semidirect product} 
of $N$ and $H$.  
Theorem~\ref{thm:wdp} on p.\ \pageref{thm:wdp} below 
establishes conditions under which this \emph{is} a direct product.  
Semidirect products 
are treated abstractly in \S\ref{sect:semidirect} (p.~\pageref{sect:semidirect}).
Meanwhile, again in the notation of Theorem~\ref{thm:Dih-2},
we have that $\Dih n$ is the internal semidirect product 
of $\gpgen{\sigma_n}$ and $\gpgen{\beta}$.

\begin{theorem}\label{thm:n-ker}
  The normal subgroups of a group are precisely the kernels of
  homomorphisms on the group.
\end{theorem}

\begin{proof}
  If $f$ is a homomorphism from $G$ to $H$, then for all $n$ in $\Ker f$,
  \begin{equation*}
f(ana\inv)=f(a)f(n)f(a)\inv=\gid,
\end{equation*}
so $a(\Ker
  f)a\inv\included \Ker f$; thus $\Ker f\nsubgp G$.  Conversely,
if $N\nsubgp G$, then the map $x\mapsto xN$ from $G$ to $G/N$ is a
homomorphism with kernel~$N$.
\end{proof}

For example,\label{43-1} from the homomorphism from $\Sym4$ onto $\Sym3$ 
given in Theorem~\ref{thm:43} above (p.~\pageref{thm:43}), 
$\Sym4$ has a normal subgroup 
that contains $(0\cdiv 1)(2\cdiv 3)$, $(0\cdiv 2)(1\cdiv 3)$, and $(0\cdiv 3)(1\cdiv 2)$, 
along with $\gid$.  
These four elements constitute the subgroup\label{43}
\begin{equation*}
\gpgen{(0\cdiv 1)(2\cdiv 3), (0\cdiv 2)(1\cdiv 3)}
\end{equation*}
of $\Sym4$, and this subgroup is isomorphic to $\Kfg$.
By Theorem~\ref{thm:A4} on p.\ \pageref{thm:A4} below,
this subgroup is precisely the kernel of the homomorphism in question.

In the proof of the last theorem, the map $x\mapsto xN$ is the 
\textbf{canonical projection}\label{can-proj}%
\index{projection}\index{canonical!--- projection} 
or the \textbf{quotient map}\index{quotient map}
of $G$ onto $G/N$; it may be denoted by
\begin{equation*}
\uppi.
\end{equation*}

\begin{theorem}\label{thm:hom-n}
  If $f$ is a homomorphism from $G$ to $H$, 
  and $N$ is a normal subgroup of $G$ such that $N\subgp \Ker f$, 
  then there is a unique homomorphism $\tilde f$ from $G/N$ to $H$ such that
  \begin{equation*}
f=\tilde f\circ\uppi,
\end{equation*}
 that is, the following diagram commutes\index{commutes} (see p.\ \pageref{commutes}).
  \begin{equation*}
\xymatrix{
G \ar[r]^{\uppi} \ar[d]_f & G/N \ar[dl]^{\tilde f}\\
H&
}
  \end{equation*}
\end{theorem}

\begin{proof}
If $\tilde f$ exists, it must be given by
\begin{equation*}
\tilde f(xN)=f(x).
\end{equation*}
Such $\tilde f$ does exist,
 since if $xN=yN$, then $xy\inv\in N$, so $xy\inv\in\Ker f$, hence $f(xy\inv)=\gid$, and therefore
 $f(x)=f(y)$. 
\end{proof}

\begin{corollary}[First Isomorphism Theorem]\label{cor:1it}%
\index{isomorphism!I--- Theorems|(}\index{theorem!Isomorphism Th---s|(}  
Suppose $f$ is a homomorphism from a group $G$ to some other group.
Then
\begin{equation*}
G/\Ker f\cong \im f.
\end{equation*}
In particular, if $\im f$ is finite, then
\begin{equation*}
[G:\ker(f)]=\size{\im f}.
\end{equation*}
\end{corollary}

\begin{proof}
  Let $N=\Ker f$; then $\tilde f$ is the
  desired homomorphism.
\end{proof}

For example, letting $f$ be $x\mapsto x+n\Z$ from $\Z$ to $\Zmod n$, we have
\begin{equation*}
\Z/n\Z\cong\Zmod n.
\end{equation*}
Another example is Theorem~\ref{thm:A4} below.


\begin{comment}


\begin{corollary}\label{cor:GHNM}
  If $f$ is a homomorphism from $G$ to $H$, and $N$ is a normal subgroup
  of $G$, and $M\nsubgp H$, and $f[N]\subgp M$, then there is a
  homomorphism $\tilde f$ from $G/N$ to $H/M$ such that the following
  diagram commutes:
  \begin{equation*}
    \xymatrix{
G \ar[r]^{\uppi}\ar[d]_f & G/N\ar[d]^{\tilde f}\\
H \ar[r]_{\uppi}         & H/M
}
  \end{equation*}
\end{corollary}

\begin{proof}
$N\subgp \Ker{\uppi\circ f}$.
\end{proof}


\end{comment}

\begin{corollary}[Second Isomorphism Theorem]
  If $H\subgp G$ and $N\nsubgp G$, then 
  \begin{equation*}
%  H/(H\cap N)\cong NH/N.
\frac H{H\cap N}\cong\frac{HN}N.
  \end{equation*}
\end{corollary}

\begin{proof}
The map $h\mapsto hN$ from $H$ to $HN/N$ is surjective with kernel
$H\cap N$.  So the claim follows by the First Isomorphism Theorem (that is, Corollary \ref{cor:1it}).
\end{proof}

For example,
In $\Z$, since $\gpgen n\cap\gpgen m=\gpgen{\lcm(n,m)}$ and $\gpgen
n+\gpgen m=\gpgen{\gcd(n,m)}$, we have
\begin{equation*}
  \frac{\gpgen n}{\gpgen{\lcm(n,m)}}
\cong\frac{\gpgen{\gcd(n,m)}}{\gpgen m}.
\end{equation*}

\begin{corollary}[Third Isomorphism Theorem]
  If $N$ and $K$ are normal subgroups of $G$ and $N\subgp K$, then
  \begin{align*}
K/N&\nsubgp G/N,&\frac{G/N}{K/N}&\cong G/K.
\end{align*}
\end{corollary}

\begin{proof}
For the first claim, we have
\begin{equation*}
aN\left(\frac KN\right)(aN)\inv=\frac{aKa\inv}N=\frac KN
\end{equation*}
since $(aN)(xN)(aN)\inv=axa\inv N$.
By the First Isomorphism Theorem (Corollary \ref{cor:1it})
in case $f$ is $x\mapsto xK$ from $G$ to $G/K$,
we have a homomorphism $xN\mapsto xK$ from $G/N$ to $G/K$.  
The kernel is $\{xN\colon x\in K\}$, which is just $G/N$.
The second claim follows by the First Isomorphism Theorem.
\end{proof}
\index{isomorphism!I--- Theorems|)}\index{theorem!Isomorphism Th---s|)}

One more basic result about normal subgroups 
will be Theorem~\ref{thm:GNKN} on p.\ \pageref{thm:GNKN}.
Theorem~\ref{thm:hom-n} will be used 
to prove von Dyck's Theorem (Theorem~\ref{thm:vD}, p.~\pageref{thm:vD}).
As promised, another application of the First Isomorphism Theorem 
is the following.

\begin{theorem}\label{thm:A4}
$\gpgen{(0\cdiv 1)(2\cdiv 3), (0\cdiv 2)(1\cdiv 3)}\nsubgp\Alt4$.
\end{theorem}

\begin{proof}
Let $f$ be the homomorphism from $\Sym4$ to $\Sym3$ given in Theorem~\ref{thm:43}.
Then $\size{\ker(f)}=4$.
We have already noted (p.~\pageref{43}) that
\begin{equation*}
\gpgen{(0\cdiv 1)(2\cdiv 3), (0\cdiv 2)(1\cdiv 3)}\subgp\ker(f).
\end{equation*}
Since $\gpgen{(0\cdiv 1)(2\cdiv 3), (0\cdiv 2)(1\cdiv 3)}\cong\Kfg$, the Klein four group,
it must be equal to $\ker(f)$.
Hence
$\gpgen{(0\cdiv 1)(2\cdiv 3), (0\cdiv 2)(1\cdiv 3)}\nsubgp\Sym4$.
Moreover, this normal subgroup is a subgroup of $\Alt4$, and therefore, by Theorem~\ref{thm:NGHG}, it is a normal subgroup of $\Alt4$.
\end{proof}


\section{Classification of finite simple groups}\label{sect:fin}

\subsection{Classification}

One of the goals of mathematical research 
is \textbf{classification} \cite[p.~52]{MR2467561}.
To classify is to divide into classes.
Originally, the word \emph{class} refers to a class of persons in a society.
In mathematics, the word is used for collections defined by formulas, 
as described on p.\ \pageref{class} above.
To classify a class $\bm C$ of structures is to partition it into subclasses.
Such a partitioning corresponds to an equivalence-relation on $\bm C$:
the subclasses of $\bm C$ are then the corresponding equivalence-classes.

For example, $\bm C$ might be the class of all structures.
We have classified structures according to whether they are algebras or not (p.~\pageref{algebra}).
There is a finer classification, according to the precise signatures of structures.
Within the class of structures having the signature $\{\gid,{}\inv,\cdot\}$ of groups,
we have distinguished the subclass 
consisting of those structures that actually \emph{are} groups.

For the class of groups,
or indeed for any class of structures,
the finest classification that is of interest to us
is the classification determined by the relation of isomorphism.
In an abstract sense, merely to specify the relation of isomorphism 
is to determine a classification of the class in question.
But we want to do more.
For example, we should like to be able 
to choose a representative from each isomorphism-class.

We have already done this for sets as such.
We have classified sets according to the relation of equipollence,
and then we have shown that, within every equipollence-class,
there is a unique cardinal (p.\ \pageref{cardinality}).

For the classification of groups,
Cayley's Theorem (p.\ \pageref{thm:Cay}) is of use.
If $G$ is a group, and $\size G=\kappa$,
then $G$ embeds in $\Sym{\kappa}$.
Thus the isomorphism-class of $G$ contains a subgroup of $\Sym{\kappa}$.
However, it will usually contain more than one subgroup of $\Sym{\kappa}$.

The natural numbers are classified according to whether they are prime.
Moreover, every natural number is the product of a unique set of prime powers.
We state this formally.

\begin{theorem}
For every $n$ in $\N$, there is a unique finite set $S$ of prime numbers
and a unique function $f$ from $S$ into $\N$ such that
\begin{equation*}
n=\prod_{p\in S}p^{f(p)}.
\end{equation*}
\end{theorem}

In \S\ref{sect:fgag} (p.\ \pageref{sect:fgag} below)
we are going to be able to give a similar classification
of the finitely generated abelian groups,
building on the initial distinguishing of certain groups as being cyclic.

\subsection{Finite simple groups}

A group is \textbf{simple}\index{simple group}%
\index{group!simple ---} 
if it is nontrivial and has no proper nontrivial normal subgroups.%%%%%
\footnote{In defining simple groups, 
Hungerford \cite[p.~49]{MR600654} omits the condition 
that they must be nontrivial; 
but then he immediately states our Theorem~\ref{thm:sag},
which excludes the trivial $\Zmod1$ from being simple,
because $1$ is not prime.
Lang \cite{Lang-alg} gives the nontriviality condition.}
%%%%%
In \S\ref{sect:NS} (p.~\pageref{sect:NS}) below,
culminating in the Jordan--H\"older Theorem,
we shall see that every finite group 
can be analyzed as a kind of `product' of a list of simple groups.
In this case, the analysis is not reversible;
different finite groups can yield the same list of simple groups.
A grand project of group theory has been to classify the finite simple groups.
We establish \emph{part} of this classification now.
The \emph{abelian} finite simple groups are easy to find:

\begin{theorem}\label{thm:sag}
The simple abelian groups are precisely 
the groups isomorphic to $\Zmod p$ for some prime number $p$.
\end{theorem}

As for nonabelian groups, we already know\label{43-2}
by Theorem \ref{thm:A4} that $\Alt4$ is not simple.
However, $\Alt3$ is simple, being isomorphic to $\Zmod3$.
Being trivial, $\Alt2$ is not simple.
We are going to show that $\Alt n$ is simple when $n\geq5$.

\begin{theorem}\label{thm:3-cycles}
  $\Alt n$ is generated by the $3$-cycles in $\Sym n$.
\end{theorem}

\begin{proof}
The group $\Alt n$ is generated by the products
$\begin{cycle}
  a\cdiv b
\end{cycle}
\begin{cycle}
  a\cdiv c
\end{cycle}$ and
$\begin{cycle}
  a\cdiv b
\end{cycle}
\begin{cycle}
  c\cdiv d
\end{cycle}$, where $a$, $b$, $c$, and $d$ are distinct elements of
$n$.  But 
\begin{gather*}
  \begin{cycle}
    a \cdiv  b
  \end{cycle}
  \begin{cycle}
    a \cdiv  c
  \end{cycle}=
\begin{cycle}
    a \cdiv  c \cdiv  b
  \end{cycle},\\
\begin{cycle}
 a \cdiv  b   
  \end{cycle}
  \begin{cycle}
    c \cdiv  d
  \end{cycle}
=
  \begin{cycle}
    b \cdiv  c \cdiv  a
  \end{cycle}
  \begin{cycle}
    c \cdiv  d \cdiv  b
  \end{cycle}.
\end{gather*}
Hence all $3$-cycles belong to $\Alt n$, and this group is generated
by these cycles.
\end{proof}

If $a$ and $b$ belong to an arbitrary group $G$, 
then the element $aba\inv$ of $G$ 
is called the \textbf{conjugate of $b$ by} $a$,
and the operation $x\mapsto axa\inv$ on $G$ 
is called \textbf{conjugation by}\label{conjug} $a$.
Conjugation by an element of $G$ is an automorphism of $G$:
this is stated formally
as Theorem~\ref{thm:conjug} on p.\ \pageref{thm:conjug} below.
For now, all we need to know is that, if $N\nsubgp G$,
then conjugates of elements of $N$ by elements of $G$ are elements of $N$.

\begin{theorem}
Every normal subgroup of $\Alt n$ containing a $3$-cycle \emph{is} $\Alt n$. 
\end{theorem}

\begin{proof}
By Theorem~\ref{thm:3-cycles}, it is enough to show that
for any $3$-cycle, every $3$-cycle is a conjugate of it.
We have
\begin{equation*}
  \begin{cycle}
    a \cdiv b \cdiv d
  \end{cycle}=
\underbrace{\begin{cycle}
    a \cdiv b
  \end{cycle}
  \begin{cycle}
    c \cdiv d
  \end{cycle}}
  \begin{cycle}
    c \cdiv b \cdiv a
  \end{cycle}
\underbrace{
  \begin{cycle}
    c \cdiv d
  \end{cycle}
  \begin{cycle}
    a \cdiv b
  \end{cycle}}.
  \end{equation*}
Thus, by conjugation, 
we can change any entry in a $3$-cycle's nontrivial orbit.
\end{proof}

\begin{theorem}
  $\Alt n$ is simple if $n>4$.
\end{theorem}

\begin{proof}
Suppose $\Alt n$ has normal subgroup $N$ with a nontrivial element $\sigma$.
Then $\sigma$ is the product of disjoint cycles, among which are:
  \begin{compactenum}[1)]
    \item
a cycle of order at least $4$; or
\item
two cycles of order $3$; or
\item
transpositions, only one $3$-cycle, and no other cycles; or
\item
only transpositions.
  \end{compactenum}
We show that, in each case, 
$N$ contains a $3$-cycle.
\begin{asparaenum}
\item
Suppose first that $\sigma$ is
$\begin{cycle}
0 \cdiv 1 \cdiv \dots \cdiv k-1
\end{cycle}\tau$ 
for some $\tau$ that is disjoint from 
$\begin{cycle}
0 \cdiv 1 \cdiv \cdots \cdiv k-1
\end{cycle}$.
Then $N$ contains both
\begin{equation*}
  \begin{cycle}
      0 \cdiv 1 \cdiv 2
    \end{cycle}
    \begin{cycle}
      0 \cdiv 1 \cdiv \cdots \cdiv k-1
    \end{cycle}\tau
    \begin{cycle}
      2 \cdiv 1 \cdiv 0
    \end{cycle}
\end{equation*}
and
$\tau\inv
\begin{cycle}
k-1\cdiv \cdots \cdiv 1 \cdiv 0
\end{cycle}$,
and their product is a $3$-cycle:
  \begin{multline*}
  \begin{cycle}
      0 \cdiv 1 \cdiv 2
    \end{cycle}
    \begin{cycle}
      0 \cdiv 1 \cdiv \cdots \cdiv k-1
    \end{cycle}\tau
    \begin{cycle}
      2 \cdiv 1 \cdiv 0
    \end{cycle}
\tau\inv
    \begin{cycle}
k-1\cdiv \cdots \cdiv 1 \cdiv 0
    \end{cycle}\\
    =
    \begin{cycle}
      0 \cdiv 1 \cdiv 3
    \end{cycle}.
  \end{multline*}
\item
If $\tau$ is disjoint from $\begin{cycle}
    0 \cdiv 1 \cdiv 2
  \end{cycle}
  \begin{cycle}
    3 \cdiv 4 \cdiv 5
  \end{cycle}$, then we reduce to the previous case:
  \begin{multline*}
  \begin{cycle}
    0 \cdiv 1 \cdiv 3
  \end{cycle}
\underbrace{
  \begin{cycle}
    0 \cdiv 1 \cdiv 2
  \end{cycle}
  \begin{cycle}
    3 \cdiv 4 \cdiv 5
  \end{cycle}}\tau
  \begin{cycle}
    3 \cdiv 1 \cdiv 0
  \end{cycle}
\tau\inv
\underbrace{
  \begin{cycle}
    5 \cdiv 4 \cdiv 3
  \end{cycle}
  \begin{cycle}
    2 \cdiv 1 \cdiv 0
  \end{cycle}}\\
  =
  \begin{cycle}
    0 \cdiv 1 \cdiv 4 \cdiv 2 \cdiv 3
  \end{cycle}.
  \end{multline*}
 \item
If $\tau$ is disjoint from 
$\begin{cycle}
  0 \cdiv 1 \cdiv 2
\end{cycle}$ and is the product of transpositions, then
\begin{equation*}
\left[\begin{cycle}
  0 \cdiv 1 \cdiv 2
\end{cycle}\tau\right]^2=
\begin{cycle}
2 \cdiv 1 \cdiv 0
\end{cycle}.
\end{equation*}
\item
Finally, suppose $\tau$ is a product of transpositions disjoint from 
$\begin{cycle}
  0\cdiv 1
\end{cycle}$ and
$\begin{cycle}
 2\cdiv 3 
\end{cycle}$.
Then
\begin{equation*}
  \begin{cycle}
  0 \cdiv 1 \cdiv 2
\end{cycle}
\underbrace{
\begin{cycle}
  0 \cdiv 1
\end{cycle}
\begin{cycle}
  2 \cdiv 3
\end{cycle}
\tau}
\begin{cycle}
  2 \cdiv 1 \cdiv 0
\end{cycle}
\underbrace{
\tau
\begin{cycle}
  3 \cdiv 2
\end{cycle}
\begin{cycle}
  1 \cdiv 0
\end{cycle}}
=
\begin{cycle}
  0 \cdiv 2
\end{cycle}
\begin{cycle}
  1 \cdiv 3
\end{cycle}.
\end{equation*}
Furthermore, since $n>4$, in $\Alt n$ we compute
\begin{equation*}
\begin{cycle}
  0 \cdiv 2 \cdiv 4
\end{cycle}
\underbrace{
\begin{cycle}
  0 \cdiv 2
\end{cycle}
\begin{cycle}
  1 \cdiv 3
\end{cycle}}
\begin{cycle}
  4 \cdiv 2 \cdiv 0
\end{cycle}
\underbrace{
\begin{cycle}
  3 \cdiv 1
\end{cycle}
\begin{cycle}
  2 \cdiv 0
\end{cycle}}
=
\begin{cycle}
  0 \cdiv 4 \cdiv 2
\end{cycle}.\qedhere
\end{equation*}
\end{asparaenum}
\end{proof}

For the sake of classifying small finite groups in general 
(in \S\ref{sect:class-small}, p.\ \pageref{sect:class-small}),
we shall want the following,
which assumes $\Alt n$ is defined just when $n\geq2$ (see p.\ \pageref{Alt} above).

\begin{theorem}\label{thm:SA2}
$\Alt n$ is the unique subgroup of $\Sym n$ of index $2$.
\end{theorem}


% END OF DAY 6 (October 13, 2008)

\chapter{Category theory}

\section{Products}\label{sect:prod}

There is a simple property of direct products of groups (as defined on p.\ \pageref{dp})
that will turn out to characterize these products.
If $G_0$ and $G_1$ are groups,
then we know from Theorem~\ref{thm:coord-proj} on p.\ \pageref{thm:coord-proj}
that for each $i$ in $2$, the function
\begin{equation*}
(x_0,x_1)\mapsto x_i
\end{equation*}
from $G_0\times G_1$ to $G_i$ is a homomorphism.
It can be called a \textbf{coordinate projection}\label{coord-proj} 
and denoted by
\begin{equation*}
\uppi_i.
\end{equation*}

\begin{theorem}\label{thm:prod}
Let $G_0$, $G_1$ and $H$ be groups such that,
for each $i$ in $2$, there is a homomorphism $f_i$ from $H$ to $G_i$.
Then the function
\begin{equation*}
  x\mapsto(f_0(x),f_1(x))
\end{equation*}
from $H$ to $G_0\times G_1$ is a homomorphism, 
and it is the unique homomorphism $f$ from $H$ to $G_0\times G_1$ 
such that, for each $i$ in~$2$,
  \begin{equation*}
    \uppi_if=f_i,
  \end{equation*}
that is, the following diagram commutes:
\begin{equation*}
  \xymatrix{
G_0 & \ar[l]_-{\uppi_0} G_0\times G_1 \ar[r]^-{\uppi_1} & G_1\\
& \ar[ul]^{f_0} \ar[u]_f H \ar[ur]_{f_1} &
}
\end{equation*}
If the groups $G_i$ are abelian, then so is $G_0\times G_1$.
\end{theorem}

\begin{proof}
If $u\in G_0\times G_1$, then
\begin{equation*}
u=(\uppi_0(u),\uppi_1(u)).
\end{equation*}
Hence, if $f\colon H\to G_0\times G_1$, 
then $f(x)=(\uppi_0f(x),\uppi_1f(x))$.
In particular then, $f$ is as desired if and only if
$f(x)=(f_0(x),f_1(x))$. 
\end{proof}
\newcounter{prod}
\setcounter{prod}{\value{theorem}}

Considering this theorem and its proof,
we may see that a more general result can be obtained.
This is the porism below.
We obtain it by considering 
an \textbf{indexed family} $(G_i\colon i\in I)$ of groups.  
This is an indexed set in the sense of p.\ \pageref{indexed};
we use the word \emph{family} 
to emphasize that the structure of each $G_i$ will be important.
The \textbf{direct product}\index{direct product}\label{dprod}
of the indexed family can be denoted by one of
\begin{align*}
&\prod_{i\in I}G_i,&&\prod(G_i\colon i\in I).
\end{align*}
This is, first of all, 
the set whose elements are indexed sets $(x_i\colon i\in I)$ 
such that $x_i\in G_i$ for each $i$ in $I$.  
Note a special case:  If all of the groups $G_i$ are the same group $G$, then
\begin{equation*}
\prod_{i\in I}G=G^I.
\end{equation*}
In case $I=n$, we may write $\prod_{i\in I}G_i$ also as
\begin{equation*}
  G_0\times\cdots\times G_{n-1},
\end{equation*}
and a typical element of this as $(x_0,\dots,x_{n-1})$.

\begin{theorem}
The direct product $(G_i\colon i\in I)$ of an indexed family of groups
is a group under the multiplication given by
\begin{equation*}
  (x_i\colon i\in I)\cdot(y_i\colon i\in I)=(x_i\cdot y_i\colon i\in I).
\end{equation*}
Each of the functions
\begin{equation*}
(x_j\colon j\in I)\mapsto x_i
\end{equation*}
is a homomorphism from $\prod_{j\in I}G_j$ to $G_i$.
\end{theorem}

\begin{proof}
As for Theorem~\ref{thm:power} on p.\ \pageref{thm:power} 
and Theorem~\ref{thm:coord-proj} on p.\ \pageref{thm:coord-proj}.
\end{proof}

As before, the homomorphisms in the porism are the \textbf{coordinate projections,}
denoted by
\begin{equation*}
\uppi_i.
\end{equation*}

\newcounter{savethm}
\setcounter{savethm}{\value{theorem}}
\setcounter{theorem}{\value{prod}}
\begin{porism}\label{por:prod}
  Suppose $(G_i\colon i\in I)$ is an indexed family of groups, and $H$
  is a group, and for each $i$ in $I$ there is a homomorphism from $H$
  to $G_i$.  Then there is a homomorphism
  \begin{equation}\label{eqn:unique-hom}
    x\mapsto(f_i(x)\colon i\in I)
  \end{equation}
 from $H$ to
  $\prod_{i\in I}G_i$, and this
is the unique homomorphism $f$ from $H$ to
  $\prod_{i\in I}G_i$ such that, for each $i$ in~$I$,
  \begin{equation*}
    \uppi_if=f_i,
  \end{equation*}
that is, the following diagram commutes:
\begin{equation*}
  \xymatrix{
\displaystyle\prod_{j\in I}G_j \ar[r]^-{\uppi_i} & G_i\\
H\ar[u]^<<<f \ar[ur]_{f_i} &
}
\end{equation*}
If the groups $G_i$ are abelian, then so is $\prod_{i\in I}G_i$.
\end{porism}
\setcounter{theorem}{\value{savethm}}

If we ignore the actual definition \eqref{eqn:unique-hom} of the unique homomorphism $f$,
then the porism can be summarized as being that
the direct product of an indexed family of groups
has a certain \textbf{universal property.}\label{up}
Theorem~\ref{thm:prod-un} on p.\ \pageref{thm:prod-un} below
is that the direct product is \emph{characterized} by its universal property.
Other constructions characterized by universal properties are:
\begin{compactitem}
\item
the direct sum (next section, namely \S\ref{sect:sum});
\item
the free abelian group and the free group (\S\ref{sect:free});
\item
the quotient field of an integral domain 
(\S\ref{sect:loc}, p.\ \pageref{qf});
\item
the polynomial ring (sub-\S\ref{subsect:poly-up}, 
p.\ \pageref{subsect:poly-up}).
\end{compactitem}

\section{Sums}\label{sect:sum}

We now investigate the possibility
of reversing the arrows in Theorem~\ref{thm:prod}.
If $G_0$ and $G_1$ are arbitrary groups,
then we know from Theorem~\ref{thm:can-inj} on p.\ \pageref{thm:can-inj}
that the functions
\begin{align*}
x&\mapsto(x,\gid),&
x&\mapsto(\gid,x)
\end{align*}
are homomorphisms, from $G_0$ and $G_1$ respectively to $G_0\times G_1$.
They can be called the \textbf{canonical injections,} denoted respectively by
\begin{align*}
&\upiota_0,&
&\upiota_1.
\end{align*}


\begin{theorem}\label{thm:oplus}
Let $G_0$, $G_1$ and $H$ be abelian groups such that,
for each $i$ in $2$, there is a homomorphism $f_i$ from $G_i$ to $H$.
Then the function
\begin{equation*}
  (x_0,x_1)\mapsto f_0(x_0)+f_1(x_1)
\end{equation*}
 from $G_0\oplus G_1$ to $H$
 is a homomorphism, and it is
 the unique homomorphism $f$ from $G_0\oplus G_1$ to $H$ such
  that, for each $i$ in~$2$,
  \begin{equation*}
    f\upiota_i=f_i,
  \end{equation*}
that is, the following diagram commutes:
\begin{equation*}
  \xymatrix{
G_0 \ar[r]^-{\upiota_0} \ar[dr]_{f_0} & G_0\oplus G_1 \ar[d]^f &
\ar[l]_-{\upiota_1} \ar[dl]^{f_1} G_1\\ 
& H &
}
\end{equation*}
\end{theorem}
\newcounter{sum}
\setcounter{sum}{\value{theorem}}

\begin{proof}
If $(x_0,x_1)\in G_0\oplus G_1$, then
\begin{equation*}
(x_0,x_1)=\upiota_0(x_0)+\upiota_1(x_1),
\end{equation*}
so that, if $f$ is a homomorphism on $G_0\oplus G_1$, then
\begin{equation*}
  f(x_0,x_1)=f\upiota_0(x_0)+f\upiota_1(x_1).
\end{equation*}
Hence, if $f$ is as desired, then it must be given by
\begin{equation}\label{eqn:f+}
f(x_0,x_1)=f_0(x_0)+f_1(x_1).
\end{equation}
The function so defined is indeed a
homomorphism, since 
\begin{align}\notag
f((x_0,x_1)+(u_0,u_1))
&=f(x_0+u_0,x_1+u_1)\\\notag
&=f_0(x_0+u_0)+f_1(x_1+u_1)\\\notag
&=f_0(x_0)+f_0(u_0)+f_1(x_1)+f_1(u_1)\\\label{eqn:uxxu}
&=f_0(x_0)+f_1(x_1)+f_0(u_0)+f_1(u_1)\\\notag
&=f(x_0,x_1)+f(u_0,u_1),
\end{align}
where \eqref{eqn:uxxu} uses that $H$ is abelian.
Moreover, when $f$ is as in \eqref{eqn:f+}, then
\begin{equation*}
f\upiota_0(x)=f(x,0)=f_0(x),
\end{equation*}
so $f\upiota_0=f_0$, and similarly $f\upiota_1=f_1$.
\end{proof}

In the proof, the definition of $f$ in~\eqref{eqn:f+} 
does not require that the indexed family $(G_i\colon i\in 2)$
have just two members, but that it have finitely many.
Also, as noted, $f$ is a homomorphism because $H$ is abelian;
but this condition too can be weakened.
Given an arbitrary indexed family $(G_i\colon i\in I)$ of groups,
we have, for each $i$ in $I$, 
a function $\upiota_i$ from $G_i$ to $\sum_{j\in I}G_j$ given by
\begin{equation*}
\upiota_i(x)=(x_j\colon j\in I),
\end{equation*}
where
\begin{equation*}
x_j=\begin{cases}
	x,&\text{ if }j=i,\\
	\gid,&\text{ otherwise.}
\end{cases}
\end{equation*}
The monomorphisms $\upiota_i$ 
are the \textbf{canonical injections.}\label{caninj}

\begin{porism}
Suppose $(G_i\colon i<n)$ is a finite indexed family of groups, 
and $H$ is a group, 
and for each $i$ in $n$ 
there is a homomorphism $f_i$ 
from $G_i$ to $H$.  
Suppose further that, for all distinct $i$ and $j$ in $n$,
\begin{equation*}
    f_i(x)\cdot f_j(y)=f_j(y)\cdot f_i(x).
\end{equation*}
Then the map
\begin{equation*}
  (x_i\colon i<n)\mapsto\prod_{i<n}f_i(x_i)
\end{equation*}
from $\prod_{i<n}G_i$ to $H$ 
is the unique homomorphism $f$
from $\prod_{i<n}G_i$ to $H$
such that, for each $i$ in~$n$, 
  \begin{equation*}
    f\upiota_i=f_i.
  \end{equation*}
\end{porism}

We use the porism to establish the next theorem below,
which we shall use in characterizing finite nilpotent groups
in Theorem~\ref{thm:fng} on p.\ \pageref{thm:fng}.
We need the following observation.

\begin{lemma}
If $M$ and $N$ are normal subgroups of $G$, and 
\begin{equation*}
M\cap N=\trivgp,
\end{equation*}
then each element of $M$ commutes with each element of $N$,
that is, for all $m$ in $M$ and $n$ in $N$,
\begin{equation*}
mn=nm.
\end{equation*}
\end{lemma}

\begin{proof}
We can analyze $mnm\inv n\inv$ in two ways:
\begin{compactitem}
  \item
as the element $(mnm\inv)n\inv$ of $N$,
\item
as the element $m(nm\inv n\inv)$ of $M$.
\end{compactitem}
So the element is $\gid$, and therefore
$mn=(m\inv n\inv)\inv=nm$.
\end{proof}

\begin{theorem}\label{thm:wdp}
If $(N_i\colon i<n)$ is a finite indexed family 
of normal subgroups of a group, and for each $j$ in $n\setminus\{0\}$,
\begin{equation}
N_0\dotsm N_{j-1}\cap N_j=\trivgp,
%N_j\cap\Biggl\langle\bigcup_{i\in n\setminus\{j\}}N_i\Biggr\rangle=\trivgp,
\end{equation}
then the map
\begin{equation}\label{eqn:x-prod}
(x_i\colon i<n)\mapsto\prod_{i<n}x_i
\end{equation}
from $\prod_{i<n}N_i$ to $N_0\dotsm N_{n-1}$
is an isomorphism.
\end{theorem}
\newcounter{wdp}
\setcounter{wdp}{\value{theorem}}

\begin{proof}
Say the $N_i$ are normal subgroups of the group $G$,
and let the map in \eqref{eqn:x-prod} be denoted by $h$.
Since $N_i\cap N_j=\trivgp$ whenever $i\neq j$,
the last porism and the lemma guarantee 
that $h$ is a homomorphism and, 
for each $i$ in $n$, 
the composition $h\upiota_i$ is just the inclusion of $N_i$ in $G$.  
Then the range of $h$ is
$N_0\dotsm N_{n-1}$.
To see that $h$ is injective, note that, 
if $\bm m\in\prod_{i\in n}N_i$ and $h(\bm m)=\gid$, 
then
\begin{equation*}
  m_{n-1}{}\inv=\prod_{i<n-1}m_i.
\end{equation*}
The left member is in $N_{n-1}$, 
and the right is in $N_0\dotsm N_{n-2}$, 
so each member is $\gid$.
In particular, $m_{n-1}=\gid$,
but also, we can repeat the argument to show $m_{n-2}=\gid$ and so on.
Thus $\bm m=\gid$.
\end{proof}

In the theorem, the group $N_0\dotsm N_{n-1}$
is the \textbf{internal direct product} of $(N_i\colon i<n)$.
For the result, it is not enough to assume $N_i\cap N_j=\trivgp$ when $i<j<n$.
For example, consider the subgroups 
$\gpgen{(1,0)}$, $\gpgen{(0,1)}$, and $\gpgen{(1,1)}$ of $\Kfg$.

We can generalize Theorem~\ref{thm:oplus} in another sense.
Given an arbitrary indexed family $(G_i\colon i\in I)$ of abelian groups,
we define its \textbf{direct sum,} 
\begin{equation*}
  \sum_{i\in I}G_i,
\end{equation*}
to consist of the elements $(x_i\colon i\in I)$ 
of the direct product $\prod_{i\in I}G_i$
such that the set $\{i\in I\colon x_i\neq0\}$ is finite.
The direct sum is indeed a group:

\begin{theorem}
For every indexed family $(G_i\colon i\in I)$ of abelian groups,
\begin{equation*}
  \sum_{i\in I}G_i\subgp\prod_{i\in I}G_i.
\end{equation*}
\end{theorem}


In case $I=n$, we may write $\sum_{i\in I}G_i$ also as
\begin{equation*}
  G_0\oplus\cdots\oplus G_{n-1}.
\end{equation*}
If $I$ is finite, then the direct sum is the same as the direct product.  
If $I$ is infinite, 
and the groups $G_i$ are nontrivial for infinitely many $i$ in $I$, 
then the sum is \emph{not} the same as the direct product. 
The proof uses the Axiom of Choice,
because it involves choosing a nontrivial element
from each of infinitely many of the nontrivial groups $G_i$.\label{ac}

\setcounter{savethm}{\value{theorem}}
\setcounter{theorem}{\value{sum}}
\setcounter{porism}{1}
\begin{porism}\label{por:ds}
  Suppose $(G_i\colon i\in I)$ is an indexed family of abelian groups, and $H$
  is an abelian group, and for each $i$ in $I$ there is a homomorphism $f_i$ from
  $G_i$ to $H$.  Then the map
  \begin{equation*}
    x\mapsto\sum_{i\in I}f_i(x_i)
  \end{equation*}
  from $\sum_{i\in I}G_i$ to $H$ is the unique homomorphism $f$ from
  $\sum_{i\in I}G_i$ to $H$
  such that, for each $i$ in~$I$, 
  \begin{equation*}
    f\upiota_i=f_i,
  \end{equation*}
that is, the following diagram commutes:
\begin{equation*}
\xymatrix{
G_i\ar[r]^-{\upiota_i}\ar[dr]_{f_i}&\displaystyle\sum_{j\in I}G_j\ar[d]^f\\
&H
}
\end{equation*}
\end{porism}
\setcounter{theorem}{\value{savethm}}

\section{*Weak direct products}

For completeness, 
we observe that Theorem \ref{thm:oplus} can be generalized even further.
The \textbf{weak direct product}\index{weak direct product} 
of an indexed family $(G_i\colon i\in I)$ of arbitrary groups
has the same definition as the direct sum in the abelian case;
but in the general case we use the notation
\begin{equation*}
\weakprod_{i\in I}G_i.
\end{equation*}
So this comprises those elements $(x_i\colon i\in I)$ of $\prod_{i\in I}G_i$
such that the set $\{i\in I\colon x_i\neq\gid\}$ is finite.  
For each $i$ in $I$ we have the homomorphism $\upiota_i$ 
from $G_i$ to $\textweakprod_{i\in I}G_i$,
defined as in the abelian case.
Direct products and weak direct products are related as follows.

\begin{theorem}
Let $(G_i\colon i\in I)$ be an indexed family of groups.  Then
\begin{align*}
   \upiota_j[G_j]&\nsubgp\weakprod_{i\in I}G_i,&  
\weakprod_{i\in I}G_i&\nsubgp\prod_{i\in I}G_i,&
   \upiota_j[G_j]&\nsubgp\prod_{i\in I}G_i.\qedhere
\end{align*}
\end{theorem}

Porism~\ref{por:ds} can be generalized to some
cases of arbitrary groups:

\setcounter{savethm}{\value{theorem}}
\setcounter{theorem}{\value{sum}}
\setcounter{porism}{2}
\begin{porism}
  Suppose $(G_i\colon i\in I)$ is an indexed family of groups, and $H$
  is a group, and for each $i$ in $I$ there is a homomorphism $f_i$ from
  $G_i$ to $H$.  Suppose further that, for all distinct $i$ and $j$ in $I$,
  \begin{equation*}
    f_i(x)\cdot f_j(y)=f_j(y)\cdot f_i(x).
  \end{equation*}
Then the map
\begin{equation*}
  x\mapsto\prod_{i\in I}f_i(x_i)
\end{equation*}
from $\textweakprod_{i\in I}G_i$ to $H$ is the unique
homomorphism $f$
from $\textweakprod_{i\in I}G_i$ to $H$
such that, for each $i$ in~$I$, 
  \begin{equation*}
    f\upiota_i=f_i.
  \end{equation*}
\end{porism}
\setcounter{theorem}{\value{savethm}}

\setcounter{savethm}{\value{theorem}}
\setcounter{theorem}{\value{wdp}}
\setcounter{porism}{2}
\begin{porism}%\label{thm:wdp}
  If $(N_i\colon i\in I)$ is an indexed family of normal subgroups of
  a group, and for each $j$ in $I$,
\begin{equation}
  N_j\cap\Biggl\langle\bigcup_{i\in
  I\setminus\{j\}}N_i\Biggr\rangle=\trivgp,
\end{equation}
then 
\begin{equation*}
\Bigl\langle\bigcup_{i\in I}N_i\Bigr\rangle
\cong\weakprod_{i\in I}N_i.
\end{equation*}
\end{porism}

In this porism,
the group $\Bigl\langle\bigcup_{i\in I}N_i\Bigr\rangle$
is called the
\textbf{internal weak direct product}\index{internal weak direct product} 
of the~$N_i$. 


\section{Free groups}\label{sect:free}

For every index set $I$, the direct sum $\sum_{i\in I}\Z$ is called
a \textbf{free abelian group on} $I$
for the reason given by the next theorem.	
To state the theorem, we note that,
for every $i$ in $I$,
the abelian group $\sum_{i\in I}\Z$ has the element $\upiota_i(1)$,
which can also be written as $(\updelta_j^i\colon j\in I)$, where
\begin{equation*}
\updelta_j^i=
\begin{cases}
1,&\text{ if }j=i,\\
0,&\text{ otherwise.}
\end{cases}
\end{equation*}
Let us also use the notation\label{Z^n-gen}
\begin{equation*}
\bv^i
\end{equation*}
for $\upiota_i(1)$ or $(\updelta_j^i\colon j\in I)$.
An arbitrary element of $\sum_{i\in I}\Z$ 
can then be written as
\begin{equation*}
\sum_{i\in I}x_i\bv^i.
\end{equation*}
The use of this notation implies 
that only finitely many of the $x_i$ are different from $0$.

\begin{theorem}\label{thm:free-ab}
  Suppose $G$ is an abelian group, $I$ is a set, and $f$ is a
  function from $I$ to $G$.  Then the map
  \begin{equation*}
  \sum_{i\in I}x_i\bv^i\mapsto\sum_{i\in I}x_if(i)
  \end{equation*}
 from $\sum_{i\in I}\Z$ to $G$ is the unique homomorphism $\tilde f$ from $\sum_{i\in I}$ to $G$ such that, for each $i$ in~$I$,
  \begin{equation*}
    \tilde f(\bv^i)=f(i),
  \end{equation*}
that is, the following diagram commutes, 
where $\upiota$ is the map $i\mapsto\bv^i$.  
\begin{equation*}
  \xymatrix{
I \ar[r]^-{\upiota} \ar[d]_f & \ar[dl]^{\tilde f} \displaystyle\sum_{i\in I}\Z \\
G &
}
\end{equation*}
In particular, the subgroup $\gpgen{f(i)\colon i\in I}$ of $G$ 
is isomorphic to a quotient of $\sum_{i\in I}\Z$.
\end{theorem}

As a special case, we have that every finitely generated abelian group
is isomorphic to a quotient of some $\Z\oplus\dots\oplus\Z$.
Observing this is the first step in classifying the finitely generated abelian groups
as in \S\ref{sect:fgag} (p.\ \pageref{sect:fgag}).

Meanwhile, since
\begin{equation*}
\sum_{i\in I}\Z=\gpgen{\bv^i\colon i\in I},
\end{equation*}
we can write every element as a finite sum $\sum_{i\in I}x_i\bv^i$, as we said.
But then, if $x_i>0$,
we can replace $x_i\bv^i$ with $x_i$-many copies of $\bv^i$,
and if $x_j<0$,
we can replace $x_j\bv^j$ with $-x_j$-many copies of $-\bv^j$.
For example,
\begin{equation*}
3\bv^0-2\bv^1=\bv^0+\bv^0+\bv^0-\bv^1-\bv^1.
\end{equation*}
In general, every nontrivial element of $\sum_{i\in I}\Z$ 
is \emph{uniquely} a sum of some copies of the $\bv^i$ and the $-\bv^j$, 
if we disregard order, 
and if we never allow $\bv^i$ and $-\bv^i$ for the same $i$ to appear in the same sum.
If we use multiplicative notation instead,
and if we do not disregard order,
what we get is not an abelian group, much less a free abelian group;
but it is a \emph{free group.}

To be precise, a \textbf{word}\index{word} on $I$ is a 
finite nonempty string $t_0t_1\cdots t_n$,
where
each entry $t_k$ is either $\gid$, or else $a$ or $a\inv$ for some $a$
in $I$.  A word is \textbf{reduced}\index{reduced} if
$a$ and $a\inv$ are never adjacent in it, and
$\gid$ is never adjacent to any other entry.
Thus the only reduced word in which $\gid$ can appear
is just the word of length $1$ whose only entry is $\gid$.
The \textbf{free group on} $I$, denoted by
\begin{equation*}
\free I,
\end{equation*}
consists of the reduced words on $I$. 
Multiplication in this group
is juxtaposition followed by \textbf{reduction,}\index{reduction} 
namely, replacement of each occurrence of $aa\inv$ or $a\inv a$ with $\gid$, 
and replacement of each occurrence of $x\gid$ or $\gid x$ with $x$.  
Thus, if we write an element $a$ of $I$ as $a^1$,
we can express the product of two arbitrary reduced words by the equation
  \begin{equation*}
    (a_{m}^{\epsilon(m)}\cdots a_{0}^{\epsilon(0)})
(b_{0}^{\zeta(0)}\cdots b_{n}^{\zeta(n)})=
a_{m}^{\epsilon(m)}\cdots a_{j}^{\epsilon(j)}b_j^{\zeta(j)}\cdots
b_{n}^{\zeta(n)},
  \end{equation*}
where each exponent $\epsilon(i)$ or $\zeta(i)$ is $\pm1$, and the equation
\begin{equation*}
a_i^{\epsilon(i)}= b_i^{-\zeta(i)}
\end{equation*}
is true when $i<j$, but false when $i=j$.
We consider $I$ as a subset of $\free I$.  
An element of the latter other than $\gid$ can be written also as
\begin{equation*}
a_0{}^{n(0)}\cdots a_m{}^{n(m)},
\end{equation*}
where $a_i$ and $a_{i+1}$ are always distinct elements of $I$, 
and each $n(i)$ is in $\Z\setminus\{0\}$.

We can now give the following analogue for Theorem~\ref{thm:free-ab}.
This solves the question raised on p.\ \pageref{gpgen-hard} above
of how to describe the elements of a generated subgroup $\gpgen A$ of a given group.
The answer is that these elements can be given as reduced words on $A$,
although possibly the two different reduced words 
will stand for the same element of $\gpgen A$.

\begin{theorem}\label{thm:free-gp}
Suppose $G$ is a group, $I$ is a set, 
and $f$ is a function from $I$ to $G$.  
Then the map
\begin{equation*}
a_0{}^{n(0)}\cdots a_m{}^{n(m)}
\mapsto 
f(a_0)^{n(0)}\cdots f(a_m)^{n(m)}
\end{equation*}
from $\free I$ to $G$ 
is the unique homomorphism $\tilde f$ 
from $\free I$ to $G$ such that
\begin{equation*}
\tilde f\restriction I=f,
\end{equation*}
that is, the following diagram commutes, 
where $\upiota$ is the inclusion of $I$ in $\free I$.  
\begin{equation*}
  \xymatrix{
I \ar[r]^-{\upiota} \ar[d]_f & \ar[dl]^{\tilde f} \free I \\
G &
}
\end{equation*}
In particular, the subgroup $\gpgen{f(i)\colon i\in I}$ of $G$ 
is isomorphic to a quotient of $\free I$.
\end{theorem}

\section{*Categories}\label{sect:category}

Suppose $\bm C$ is a class of structures, 
all having the same signature.
For example, $\bm C$ could be the class of all groups,
or the class of all abelian groups.
If $\str A$ and $\str B$ belong to $\bm C$,
we can denote by
\begin{equation*}
\Hom{\str A,\str B}
\end{equation*}
the set of all homomorphisms from $\str A$ to $\str B$.
By Theorem~\ref{thm:hom-comp} on p.\ \pageref{thm:hom-comp}, 
if also $\str C\in\bm C$, then
\begin{equation*}
(g,f)\mapsto g\circ f\colon\Hom{\str B,\str C}\times\Hom{\str A,\str B}
\to\Hom{\str A,\str C}.
\end{equation*}
By Theorem \ref{thm:composite} on p.\ \pageref{thm:composite},
if $f\in\Hom{\str A,\str B}$, $g\in\Hom{\str B,\str C}$, and $h\in\Hom{\str C,\str D}$,
then
\begin{equation}\label{eqn:comp-assoc}
(h\circ g)\circ f=h\circ(g\circ f).
\end{equation}
By Theorem \ref{thm:isom-inv}, 
$\Hom{\str A,\str A}$ contains $\id_A$.  
If $f\in\Hom{\str A,\str B}$ and $g\in\Hom{\str B,\str C}$, 
then by Theorem~\ref{thm:id},
\begin{align}\label{eqn:id-id}
\id_B\circ f&=f,&
g\circ\id_B&=g.
\end{align}
Because of these properties, $\bm C$ is called a \textbf{category.}
Elements of $\bm C$ are called \textbf{objects} of the category;
elements of each set $\Hom{\str A,\str B}$ 
are called \textbf{morphisms} or \textbf{arrows} of the category,
and specifically morphisms or arrows \textbf{from} $\str A$ \textbf{to} $\str B$.
Strictly, the category is specified by four things:
\begin{compactenum}[1)]
\item
the class $\bm C$,
\item
the function $(\str A,\str B)\mapsto\Hom{\str A,\str B}$ on $\bm C\times\bm C$,
\item
the functions $\circ$, satisfying \eqref{eqn:comp-assoc};
\item
the function $\str A\mapsto\id_A$ on $\bm C$, satisfying \eqref{eqn:id-id}.
\end{compactenum}
The conditions \eqref{eqn:comp-assoc} and \eqref{eqn:id-id} 
can be expressed by means of the following commutative diagrams.
\begin{align*}
  &\xymatrix{
A \ar[r]^f \ar[d]_f & B \ar[d]^g\\
B \ar[ur]|{\id_B} \ar[r]_g & C}&
&\xymatrix{
B \ar[d]_g & A \ar[l]_f \ar[dl]|{g\circ f} \ar[d]%|{h\circ g\circ f} 
\ar[r]^f & B \ar[dl]|{h\circ g} \ar[d]^g\\
C \ar[r]_h & D & C \ar[l]^h}
\end{align*}
It is possible to have a category 
in which the objects are not structures
and the arrows are not homomorphisms.
For example, if $G$ is a group, 
then its elements can be considered 
as objects of a category 
in which $\Hom{a,b}=\{ba\inv\}$, and $c\circ d=cd$,
and the function corresponding to $\str A\mapsto\id_A$ 
is simply the constant function $a\mapsto\gid$.

In an arbitrary category,
the objects may be denoted by plain capital letters like $A$ and $B$, 
and the function corresponding to $\str A\mapsto\id_A$
may be denoted simply by $A\mapsto\id_A$.
In accordance with Theorems~\ref{thm:inverses} and \ref{thm:isom-inv},
we say that an element $f$ of $\Hom{A,B}$
is an \textbf{isomorphism}\index{isomorphism} if,
for some $g$ in $\Hom{B,A}$,
\begin{align*}
g\circ f&=\id_A,&
f\circ g&=\id_B.
\end{align*}
In this case, $g$ is an \textbf{inverse}\index{inverse} of $f$.

\begin{theorem}
  In a category, inverses are unique,
  and the inverse of a morphism has its own inverse, which is that morphism.
\end{theorem}

\begin{proof}
  If $g$ and $h$ are inverses of $f$, then
  \begin{equation*}
g=g\circ\id_B=g\circ(f\circ h)=(g\circ f)\circ h=
  \id_A\circ h=h.
\end{equation*}
The rest is by symmetry of the definition.
\end{proof}

If it exists, then the inverse of $f$ is denoted by
\begin{equation*}
f\inv.
\end{equation*}

When each object of a category has an associated set,
and every arrow from an object with associated set $A$
to an object with associated set $B$
is actually a function from $A$ to $B$,
then the category is said to be \textbf{concrete.}
We shall be interested only in concrete categories.
Classes of structures, like $\bm C$ above, can be understood as concrete categories.
However, other kinds of concrete categories are possible.
For example, there is a concrete category whose objects are topological spaces
and whose arrows are continuous functions.

\subsection{Products}

Suppose $\bm C$ is a category,
and $\mathscr A$ is an indexed family $(A_i\colon i\in I)$ of objects of $\bm C$.
If it exists, the \emph{product}\index{product} of $\mathscr A$ in the category 
is an object with the properties of a direct product of groups
given by Porism~\ref{por:prod} on p.\ \pageref{por:prod}.
For a formal definition, we define a new category,
whose objects are the pairs
\begin{equation*}
\bigl(B,(f_i\colon i\in I)\bigr)
\end{equation*}
such that $B$ is an object of $\bm C$ and, for each $i$ in $I$,
\begin{equation*}
f_i\in\Hom{B,A_i}.
\end{equation*}
An element $h$ of $\Hom{C,B}$ 
is a morphism from $\bigl(C,(g_i\colon i\in I)\bigr)$ 
to $\bigl(B,(f_i\colon i\in I)\bigr)$ in the new category 
if, for each $i$ in $I$,
\begin{equation*}
f_i\circ h=g_i,
\end{equation*}
that is, the following diagram commutes.
\begin{equation*}
\xymatrix{
C\ar[r]^{g_i}\ar[d]_h&A_i\ar[d]^{\id_{A_i}}\\
B\ar[r]_{f_i}&A_i
}
\end{equation*}
Suppose, in the new category, 
there is an object  
to which there is a \emph{unique} morphism from every other object.
This object
is called a \textbf{product} of $\mathscr A$.

By Porism~\ref{por:prod},
if $(G_i\colon i\in I)$ is an indexed family of groups,
then the ordered pair $\bigl(\prod_{i\in I}G_i,(\uppi_i\colon i\in I)\bigr)$
is a product of the indexed family in the category of groups.
If the $G_i$ are abelian, then the pair is a product
in the category of abelian groups.

\begin{theorem}\label{thm:prod-un}
  Any two products of the same indexed family of objects in the same category
  are uniquely isomorphic.
\end{theorem}

Thus, if $\mathscr A$ is an indexed family $(A_i\colon i\in I)$
of objects in a category with products,
then we may refer to \emph{the} product of $\mathscr A$, denoting it by
\begin{equation*}
\bigl(\prod\mathscr A,(\uppi_i\colon i\in I)\bigr).
\end{equation*}
We may still refer to the morphisms $\uppi_i$ 
as \textbf{coordinate projections.}\label{coord-proj-2}%
\index{projection}\index{canonical!--- projection}


\subsection{Coproducts}

Given a category,
if we can reverse all of the arrows,
and if we reverse composition correspondingly,
then we still have a category,
called the \textbf{dual}%
\index{category!dual ---}%
\index{dual category}
or \textbf{opposite} of the original category.
A \emph{co-product}\index{product!co---} or \emph{sum}\index{sum} in a
category is a product in the dual.  
Thus, suppose $\bm C$ is a category,
and $\mathscr A$ is an indexed family $(A_i\colon i\in I)$ of objects of $\bm C$.
We define a new category,
whose objects are the pairs
\begin{equation*}
\bigl(B,(f_i\colon i\in I)\bigr)
\end{equation*}
such that $B$ is an object of $\bm C$ and, for each $i$ in $I$,
\begin{equation*}
f_i\in\Hom{A_i,B}.
\end{equation*}
An element $h$ of $\Hom{B,C}$ 
is a morphism 
from $\bigl(B,(f_i\colon i\in I)\bigr)$ 
to $\bigl(C,(g_i\colon i\in I)\bigr)$ 
in the new category 
if, for each $i$ in $I$,
\begin{equation*}
h\circ f_i=g_i,
\end{equation*}
that is, the following diagram commutes.
\begin{equation*}
\xymatrix{
C&A_i\ar[l]_{g_i}\\
B\ar[u]^h&A_i\ar[l]^{f_i}\ar[u]_{\id_{A_i}}
}
\end{equation*}
Suppose, in the new category, 
there is an object 
from which there is a \emph{unique} morphism to every other object.
This object
is called a \textbf{coproduct} or \textbf{sum} of $\mathscr A$.

By Porism~\ref{por:ds},
if $(G_i\colon i\in I)$ is an indexed family of abelian groups,
then the pair $\bigl(\sum_{i\in I}G_i,(\upiota_i\colon i\in I)\bigr)$ 
is its coproduct in the category of abelian groups.

By Theorem~\ref{thm:prod-un}, coproducts are unique when they exist at all.
Thus if $\mathscr A$ is an indexed family $(A_i\colon i\in I)$ of objects
in a category with coproducts,
then we may refer to \emph{the} coproduct of $\mathscr A$,
denoting it by one of
\begin{align*}
&\Bigl(\coprod\mathscr A,(\upiota_i\colon i\in I)\Bigr),&
&\Bigl(\sum\mathscr A,(\upiota_i\colon i\in I)\Bigr).
\end{align*}
We may still refer to the the morphisms $\upiota_i$ as \textbf{canonical
  injections.}%
\index{injection}%
\index{canonical!--- injection}

Although weak direct products of groups
are defined like sums of abelian groups,
they are \emph{not} coproducts
in the category of groups.
However, this category \emph{has} coproducts, as follows.

The \textbf{free product}\index{free product} of an indexed family
$(G_i\colon i\in I)$ of groups is the group, denoted by
  \begin{equation*}
    \freeprod_{i\in I}G_i,
  \end{equation*}
or by
\begin{equation*}
G_0*\dots*G_{n-1}
\end{equation*}
if $I$ is some $n$ in $\upomega$,
  comprising the string $\gid$ together with strings   
   $t_0\cdots t_m$, where each entry $t_i$ is an ordered pair
  $(g,n(i))$ such that $n(i)\in I$ and $g\in
  G_{n(i)}\setminus\{\gid\}$, and $n(i)\neq n(i+1)$.  This complicated
  definition allows for the possibility that $G_i$ might be the same
  as $G_j$ for some distinct $i$ and $j$; the groups $G_i$ and $G_j$
  must be considered as distinct in the formation of the free
  product. 
Multiplication on $\textfreeprod_{i\in I}G_i$, as on $\free I$, is
juxtaposition followed by reduction, so that if $(g,i)$ is followed
directly by $(h,i)$, then they are replaced with $(gh,i)$, and all
instances of $(\gid,i)$ are deleted, or replaced with $\gid$ if there
is no other entry. 
Each $G_j$ embeds in $\textfreeprod_{i\in I}G_i$ under $\upiota_j$,
namely $x\mapsto(x,j)$.  
%We now have the following analogue of Porism~\ref{por:ds}.

\begin{theorem}%\label{thm:free-prod}
If $(G_i\colon i\in I)$ is an indexed family of groups, 
then $\bigl(\textfreeprod_{i\in I}G_i,(\upiota_i\colon i\in I)\bigr)$
is its coproduct in the category of groups.
\end{theorem}

\subsection{Free objects}\label{subsect:free}

Given a \emph{concrete} category $\bm C$ and a set $I$,
we define a new category,
whose objects are the pairs
\begin{equation*}
(f,A),
\end{equation*}
where $A$ is an object of $\bm C$,
and $f$ is a function from $I$ to (the associated set of) $A$.
An element $h$ of $\Hom{A,B}$ 
is a morphism from $(f,A)$ to $(g,B)$ in the new category if
\begin{equation*}
h\circ f=g,
\end{equation*}
that is, the following diagram commutes.
\begin{equation*}
\xymatrix{
I\ar[r]^f\ar[d]_{\id_I}&A\ar[d]^h\\
I\ar[r]_g&B
}
\end{equation*}
Suppose, in the new category,
from the object $(f,A)$, there is a unique morphism to every other object.
Then $A$ is a \textbf{free object} on $I$ with respect to $f$.

\begin{theorem}\label{thm:free}
In a concrete category $\bm C$,
if $A$ is a free object on a set $I$ with respect to a function $f$,
and $B$ is a free object on $I$ with respect to $g$,
then there is a unique isomorphism $h$ from $A$ to $B$
such that $h\circ f=g$.
\end{theorem}

By Theorems~\ref{thm:free-ab} and~\ref{thm:free-gp},
free objects exist in the categories of abelian groups and of arbitrary groups. 
Another example will be given 
by Theorem~\ref{thm:poly-free} (p.\ \pageref{thm:poly-free}).


\section{Presentation of groups}\label{sect:pres}

We develop a method for describing groups as quotients of free groups.
Let us first note that every group \emph{is} (isomorphic to) such a quotient.

\begin{theorem}
  Every group is isomorphic to the quotient of a free group 
by some normal subgroup.
\end{theorem}

\begin{proof}
By Theorem~\ref{thm:free-gp} (p.\ \pageref{thm:free-gp}), 
the identity map from $G$ to itself
extends to a homomorphism from $\free G$ to $G$.
Since this homomorphism is surjective, 
the claim follows by the First Isomorphism Theorem 
(p.\ \pageref{cor:1it}).
\end{proof}

If $A$ is a subset of some group $G$,
on p.\ \pageref{gpgen}
we defined $\gpgen A$ as the intersection of (the set of) subgroups of $G$
that include $A$.
We know this intersection is a subgroup of $G$, by Theorem~\ref{thm:subgroups}.
But possibly $\gpgen A$ is not a \emph{normal} subgroup of $G$.
However, we have the following.

\begin{theorem}
  An arbitrary intersection of normal subgroups is a subgroup.
\end{theorem}

Now, given a subset $B$ of a group $G$, we can define
\begin{equation*}
  \nsubgen B=\bigcap\mathcal N,
\end{equation*}
where $\mathcal N$ is the set of all normal subgroups of $G$ that include $B$.
If $A$ is an arbitrary set, and $B\included\free A$, we define
\begin{equation*}
\gpres AB=\free A/\nsubgen B.
\end{equation*}
This is the group with \textbf{generators} $A$ and \textbf{relations} $B$.
Note however that, strictly, 
the elements of $A$ as such do not generate the group;
rather, the cosets $a\nsubgen B$, where $a\in A$, 
generate the group.
But we can understand $a$ as a name for the coset $a\nsubgen B$.

Suppose there is a function $f$ from $A$ to a group $G$,
and $\tilde f$ is the homomorphism from $\free A$ to $G$ that extends $f$,
and this homomorphism is surjective, and its kernel is $\nsubgen B$.
By the First Isomorphism Theorem,
\begin{equation*}
G\cong\gpres AB.
\end{equation*}
We say in this case that $\gpres AB$ is a \textbf{presentation} of $G$.
If $A=\{a_0,\dots,a_n\}$, and $B=\{w_0,\dots,w_m\}$, then
$\gpres AB$ can be written as
\begin{equation*}
\gpres{a_0,\dots,a_n}{w_0,\dots,w_m}.
\end{equation*}
Sometimes, instead of $w_i$, one may write $w_i=\gid$ or an equivalent equation.
Meanwhile, $\free A$ can be presented as $\gpres A{\emptyset}$.
In particular $\Z$ can be presented as $\gpres a{\emptyset}$, but also
as $\gpres{a,b}{ab\inv}$ or $\gpres{a,b}{a=b}$. 
The group $\Zmod n$ has the presentation $\gpres a{a^n}$.
More examples are given by the theorems after the next.

\begin{theorem}[von Dyck\footnotemark]\label{thm:vD}%%%%%
\footnotetext{Walther von Dyck (1856--1934) 
gave an early (1882--3) definition of abstract groups 
\cite[ch.~49, p.~1141]{MR0472307}.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Suppose $G$ is a group, $A$ is a set, $f\colon A\to G$, and
$\tilde f$ is the induced homomorphism from $\free A$ to $G$.
Suppose further
\begin{equation*}
B\included\ker{\tilde f}
\end{equation*}
Then there is a
well-defined homomorphism $g$ from $\gpres A B$ to $G$ such that
$g(a\nsubgen B)=f(a)$ for each $a$ in $A$,
that is, the following diagram commutes.  
\begin{equation*}
\xymatrix{
A \ar[r]^f \ar[d] & G \\
\free A \ar[ur]|{\tilde f} \ar[r]_{\uppi} & \gpres AB \ar[u]_g
}
  \end{equation*}
If $G=\gpgen{f(a)\colon a\in A}$, then $g$ is an epimorphism. 
\end{theorem}

\begin{proof}
Since $\ker(\tilde f)$ is a normal subgroup of $\free A$ that includes $B$,
we have $\nsubgen B\subgp \ker{\tilde f}$.
Hence $g$ is well-defined by Theorem~\ref{thm:hom-n} on p.\ \pageref{thm:hom-n}. 
\end{proof}

\begin{theorem}\label{thm:Dn-pres}
If $n>2$, then $\Dih n$ has the presentation
\begin{equation*}
\gpres{a,b}{a^n,b^2,(ab)^2}.
\end{equation*}
\end{theorem}

\begin{proof}
Note first that, in the group $\gpres{a,b}{a^n,b^2,(ab)^2}$,
the order of $a$ must divide $n$, 
and each of the orders of $b$ and $ab$ must divide $2$.
Now, by Theorem~\ref{thm:Dih-2} on p.\ \pageref{thm:Dih-2}, 
$\Dih n$ has elements $\alpha$ and $\beta$ that generate the group
and are such that $\alpha^n$, $\beta^2$, and $(\alpha\beta)^2$ are all equal to $\gid$.
By von Dyck's Theorem then, 
there is an epimorphism 
from $\gpres{a,b}{a^n,b^2,(ab)^2}$ to $\Dih n$ 
taking $a$ to $\alpha$ and $b$ to $\beta$ and hence $ab$ to $\alpha\beta$.
Therefore the order of $a$ must be exactly $n$,
and the orders of $b$ and of $ab$ must be $2$.
By Theorem~\ref{thm:Dn} on p.\ \pageref{thm:Dn},
the epimorphism onto $\Dih n$ must be an isomorphism.
\end{proof}

\begin{theorem}\label{thm:quat-pres}
The quaternion group $\quat$ has the presentation
\begin{equation*}
  \gpres{\mi,\mj}{\mi^4,\mi^2\mj^2,\mi\mj\mi^3\mj},
\end{equation*}
or equivalently $\gpres{\mi,\mj}{\mi^4=\gid,\ \mi^2=\mj^2,\ \mj\mi=\mi^3\mj}$.
\end{theorem}

\begin{proof}
Use von Dyck's Theorem and Theorem~\ref{thm:quat} in the manner of the previous proof.
\end{proof}

Yet another example of a presentation 
will be given in Theorem~\ref{thm:S3Z4} on p.\ \pageref{thm:S3Z4}.

\section%[Fin.~gen.~ab.~groups]
{Finitely generated abelian groups}\label{sect:fgag}

We now classify,
in the sense of \S\ref{sect:fin} (p.\ \pageref{sect:fin}),
the abelian groups with finite sets of generators,
and in particular the finite abelian groups.
A useful application of this
will be that the group of units of every finite field is cyclic
(Theorem~\ref{thm:Zp-cross}). 

\begin{theorem}\label{thm:prod-quot}
If $(G_i\colon i\in I)$ is an indexed family of groups,
and for each $i$ in $I$, $N_i\nsubgp G_i$,
then
\begin{align*}
\prod_{i\in I}N_i&\nsubgp\prod_{i\in I}G_i,&
\left.\prod_{i\in I}G_i\right/\prod_{i\in I}N_i
&\cong\prod_{i\in I}\frac{G_i}{N_i}.
\end{align*}
\end{theorem}

\begin{theorem}\label{thm:fin-gen-ab}
  For every abelian group $G$ on $n$ generators, there is a unique
  element $k$ of $n+1$, along with positive integers $d_0$, \dots,
  $d_{k-1}$, where  
  \begin{equation}\label{d}
  d_0\divides d_1\land\dots\land d_{k-2}\divides d_{k-1},
  \end{equation}
such that
  \begin{equation}\label{FH}
G\cong    
\Zmod{d_0}\oplus\dotsb\oplus\Zmod{d_{k-1}}
\oplus\underbrace{\Z\oplus\dotsb\oplus\Z}_{n-k}.
  \end{equation}
\end{theorem}

\begin{proof}
Suppose $G=\gpgen{g^i\colon i<n}$ and is abelian.  
Let $F$ be the free abelian group $\sum_{i\in n}\Z$. 
Using notation from p.\ \pageref{Z^n-gen}, 
we have that $F=\gpgen{\bv^0,\dots,\bv^{n-1}}$,
and there is a surjective function
\begin{equation*}
\sum_{i\in n}x_i\bv^i\mapsto\sum_{i\in n}x_ig^i
\end{equation*}
from $F$ to $G$.  Let $N$ be its kernel, so that
\begin{equation*}
  G\cong F/N.
\end{equation*}
Suppose it should happen to be that $N=\gpgen{d_0\bv^0,\dots,d_{k-1}\bv^{k-1}}$.
We have
\begin{equation*}
F\cong\gpgen{\bv^0}\oplus\dots\oplus\gpgen{\bv^{n-1}},
\end{equation*}
and under the isomorphism,
\begin{equation*}
N\cong\gpgen{d_0\bv^0}\oplus\dots\oplus\gpgen{d_{k-1}\bv^{k-1}}
\oplus\trivgp\oplus\dots\oplus\trivgp.
\end{equation*}
By the lemma then,
\begin{equation*}
F/N\cong\frac{\gpgen{\bv^0}}{\gpgen{d_0\bv^0}}\oplus\dots
\oplus\frac{\gpgen{\bv^{k-1}}}{\gpgen{d_{k-1}\bv^{k-1}}}
\oplus\gpgen{\bv^k}\oplus\dots\oplus\gpgen{\bv^{n-1}},
\end{equation*}
which has the form in \eqref{FH}, although \eqref{d} might not hold.
Not every subgroup of $F$ is given to us so neatly, 
but we shall be able to put it into the desired form,
even satisfying \eqref{d}.

We can identify $F$ with $\Mat[1\times n]{\Z}$.
If $X\in\Mat[m\times n]{\Z}$,
let us denote by $\gpgen X$ the subgroup of $F$
generated by the rows of $X$.
If $P\in\GL[m]{\Z}$ and $Q\in\GL{\Z}$, then
\begin{align*}
\gpgen X&=\gpgen{PX},&
F/\gpgen X&\cong F/\gpgen{XQ}.
\end{align*}
Now we can choose $P$ and $Q$ 
so as to effect certain row operations (as on p.\ \pageref{ero})
and column operations, respectively.
In particular, assuming $m\geq n$,
for some $P$ we have
\begin{equation*}
  PX=
  \begin{pmatrix}
    U\\\hline0
  \end{pmatrix},
\end{equation*}
where $U$ is an $n\times n$ \textbf{upper triangular} matrix, that is,
\begin{equation*}
U=
\begin{pmatrix}
  *&\cdots&*\\
   &\ddots&\vdots\\
  0&      &*
\end{pmatrix}.
\end{equation*}
Then we may assume $m=n$, so $PX=U$.
For some $Q$, the matrix $PXQ$ is \textbf{diagonal,} so that
\begin{equation*}
  PXQ=
  \begin{pmatrix}
    d_0&      &0\\
       &\ddots&\\
      0&      &d_{n-1}
  \end{pmatrix}.
\end{equation*}
By further adjusting $P$ and $Q$, we may ensure that~\eqref{d} holds, while $d_k=\cdots=d_{n-1}=0$.
Indeed, suppose $b,c\in\Z$ and $\gcd(b,c)=d$.  
By elementary row and column operations, from a matrix
  \begin{equation*}
  \begin{pmatrix}
    b&0\\0&c
  \end{pmatrix}
  \end{equation*}
   we obtain $\begin{pmatrix}
    b&0\\c&c
  \end{pmatrix}$ and then
$\begin{pmatrix}
  d&e\\0&f
\end{pmatrix}$,
where $e$ and $f$ are multiples of $c$ and hence of $d$; hence, with
an invertible column operation, we get
\begin{equation*}
  \begin{pmatrix}
    d&0\\0&f
  \end{pmatrix}.
\end{equation*}
where again $d\divides f$.  Applying such
transformations as needed to pairs of entries in $D$
yields~\eqref{d}. 
The number $k$ is uniquely determined by $X$.
We have shown that every subgroup of $F$ is generated by a set of at most $n$ elements.
Then we may assume $N=\gpgen{X}$, so that $F/N$ is as desired.
\end{proof}


\begin{porism}\label{por:fin-gen-ab}
Every subgroup of a free abelian group on $n$ generators 
is free abelian on $n$ generators or fewer.
\end{porism}

In the theorem, not only is $k$ unique,
but the numbers $d_j$ are also unique.
This can be established 
by means of an alternative classification
of the finitely generated abelian groups.

The following is Proposition VII.30 of Euclid's \emph{Elements.}
In \S\ref{sect:int-dom} (p.\ \pageref{sect:int-dom}), 
we are going to be interested in rings besides $\Z$ 
in which the proof can be carried out.
Meanwhile, the theorem will motivate the definition of \emph{prime ideal}
in \S\ref{sect:comm} (p.\ \pageref{sect:comm}).

\begin{theorem}[Euclid's Lemma]\label{thm:Euc-Lem}
If $p$ is a prime number,
then for all integers $a$ and $b$,
\begin{equation*}
p\divides ab\And p\ndivides a\implies p\divides b.
\end{equation*}
\end{theorem}

\begin{proof}
Given that $p\ndivides a$, we know that $\gcd(p,a)=1$ 
by the proof of Theorem~\ref{thm:p-prime}
(p.\ \pageref{thm:p-prime}; 
or by the result of this theorem and Theorem~\ref{thm:Znx}, 
p.\ \pageref{thm:Znx}).
Hence by Theorem~\ref{thm:ax+by=d}, we can solve $ax+py=1$.
In this case we obtain
\begin{equation*}
 abx+pby=b,
\end{equation*}
so if $p\divides ab$, then, 
since immediately $p\divides pby$,
we must have $p\divides b$.
\end{proof}

\begin{porism}
  If $m$ and $n$ are relatively prime integers,
then for all integers $a$,
\begin{equation*}
  m\divides na\implies m\divides a.
\end{equation*}
\end{porism}

\begin{theorem}[Chinese Remainder]\label{thm:CRT}
If $\gcd(m,n)=1$, 
then the homomorphism $x\mapsto(x,x)$ 
from $\Zmod{mn}$ to $\Zmod m\oplus\Zmod n$ 
is an isomorphism. 
\end{theorem}

\begin{proof}
If $x\equiv0\pmod m$ and $x\equiv 0\pmod n$,
then, by the porism,
since $\gcd(m,n)=1$, we have
$x\equiv0\pmod{mn}$.  
Hence the given homomorphism is injective.  
Since $\Zmod{mn}$ and $\Zmod m\oplus\Zmod n$
both have order $mn$,
the given homomorphism must also be surjective,
by Theorem~\ref{thm:Ded} on p.\ \pageref{thm:Ded}.
\end{proof}

The Chinese Remainder Theorem will be generalized 
as Theorem~\ref{thm:CRT-R} on p.\ \pageref{thm:CRT-R}. 
In the usual formulation of the theorem, every system
\begin{align*}
  x&\equiv a\pmod m,&x&\equiv b\pmod n
\end{align*}
of congruences
has a unique solution \emph{modulo} $mn$; but this solution is just
the inverse image of $(a,b)$ under the isomorphism $x\mapsto(x,x)$.

\begin{theorem}
For every finite abelian group, there is a unique list $(p_i:i<k)$ of primes, where
\begin{equation*}
p_0\leq\dots\leq p_{k-1},
\end{equation*}
there are unique elements $m(0)$, \dots, $m(k-1)$ of $\N$, 
and there is a unique $r$ in $\upomega$
such that 
\begin{equation*}
G\cong  
\Zmod{p_0{}^{m(0)}}\oplus\dotsb\oplus\Zmod{p_{k-1}{}^{m(k-1)}}
\oplus\underbrace{\Z\oplus\dotsb\oplus\Z}_r.
\end{equation*}
\end{theorem}

\begin{proof}
To obtain the analysis, apply the Chinese Remainder Theorem to
Theorem~\ref{thm:fin-gen-ab}. 
The analysis is
unique, provided it is unique in the case where all of the $p_j$ are
the same.  But in this case, the analysis is unique, by repeated
application of the observation that the order of the group is the
highest prime power appearing in the factorization.
\end{proof}

\begin{theorem}\label{thm:Zp-cross}
The group of units of every finite field is cyclic.
In particular, if $p$ is prime, then
\begin{equation*}
\Zmodu p\cong\Zmod{p-1}.
\end{equation*}
\end{theorem}

\begin{proof}
Let $F$ be a finite field.
By Theorem~\ref{thm:fin-gen-ab},
  \begin{equation*}
\units F\cong    
\Zmod {d_0}\oplus\Zmod{d_{k-1}}\oplus \Zmod m
  \end{equation*}
  for some $d(i)$ and $m$ such that
  \begin{equation*}
d_0\divides d_i\land\dots\land d_{k-1}\divides m.
\end{equation*}
In particular,
\begin{equation*}
m\leq\size{\units F}.
\end{equation*}
Also, every element of $\units F$ is a zero of the polynomial $x^m-1$.
But this polynomial can have at most $m$ roots in a field.
Thus
\begin{equation*}
\size{\units F}\leq m.
\end{equation*}
Hence $\size{\units F}=m$ and so $\units F\cong\Zmod m$.
\end{proof}

If $\Zmodu n$ is cyclic, then its generators are called \textbf{primitive roots} of $n$;
Gauss \cite[p.~37]{Gauss} attributes the terminology to Euler.
Recall from p.\ \pageref{thm:Euler} the definition
\begin{equation*}
\upphi(n)=\size{\Zmodu n}.
\end{equation*}
Thus, if $\Zmodu n$ is indeed cyclic, it is isomorphic to $\Zmod{\upphi(n)}$.

\begin{theorem}\label{thm:prim-roots}
If $n$ has a primitive root $a$,
then it has exactly $\upphi(\upphi(n))$ primitive roots,
namely those $a^k$ such that $\gcd(k,\upphi(n))=1$.
\end{theorem}

By Theorem \ref{thm:Zp-cross}, primes have primitive roots.
We have to find them by trial.
For example, $2$ is not a primitive root of $7$,
but $3$ is, by the following computations.
\begin{equation*}
\begin{array}{|c|*6{r|}c|}\hline
  k&0&1& 2& 3& 4& 5&\pmod6\\\hline
2^k&1&2&-3& 1& 2&-3&\pmod7\\\hline
3^k&1&3& 2&-1&-3&-2&\pmod7\\\hline
\end{array}
\end{equation*}
Then $5$ (or $-2$) is the only other primitive root of $7$.

\chapter{Finite groups}


\section{Semidirect products}\label{sect:semidirect}

Recall from p.\ \pageref{conjug} 
that \emph{conjugation} in a group 
is an operation $x\mapsto axa\inv$ for some element $a$ of the group.
The following is reminiscent of Cayley's Theorem 
(Theorem \ref{thm:Cay} on p.\ \pageref{thm:Cay}),
although the homomorphism now need not be an embedding.

\begin{theorem}\label{thm:conjug}
Conjugation in a group is an automorphism.
For every group $G$, the function
\begin{equation*}
g\mapsto(x\mapsto gxg\inv)
\end{equation*}
from $G$ to $\Aut G$ is a homomorphism.
\end{theorem}

Conjugation by an arbitrary element of a group
is also called 
an \textbf{inner automorphism}\index{inner automorphism} of the group.  
The kernel of the homomorphism in the theorem
is the \textbf{center}\index{center} of $G$, denoted by
\begin{equation*}
  \centr G.
\end{equation*}
We shall generalize this notion 
in \S\ref{sect:nilpotent} (p.\ \pageref{sect:nilpotent}).%%%%%  
\footnote{Repeating the process of forming inner automorphisms, 
we can define a function $\alpha\mapsto G_{\alpha}$ 
on the class of ordinals so that
$G_0=G$, and $G_{\alpha'}=\Aut{G_{\alpha}}$,
and if $\beta$ is a limit, 
then $G_{\beta}$ is the so-called \emph{direct limit} 
of $(G_{\alpha}\colon\alpha<\beta)$.
Then for some ordinal $\alpha$, for all ordinals $\beta$,
if $\beta\geq\alpha$, then $G_{\beta}=G_{\alpha}$: 
Simon Thomas~\cite{MR801316} shows this in
case $G$ has trivial center; Joel Hamkins~\cite{MR1487370},
in the general case.}
Meanwhile, it will be useful 
to have the following generalization of the last theorem.

\begin{theorem}\label{thm:GNG}
For every group $G$, if $N\nsubgp G$, then there is a homomorphism
\begin{equation*}
g\mapsto(x\mapsto gxg\inv) 
\end{equation*}
from $G$ to $\Aut N$.
\end{theorem}

In the theorem, let the homomorphism be $g\mapsto\sigma_g$.  
Suppose also $H\subgp G$, and $N\cap H=\trivgp$.  
Then the conditions of Theorem~\ref{thm:isdp} (p.\ \pageref{thm:isdp}) are met, 
and $NH$ is an internal semidirect product.  
Equation~\eqref{eqn:sdp} describing multiplication on $NH$, 
namely
\begin{equation*}
(mg)(nh)=(m\cdot gng\inv)(gh), 
\end{equation*}
can be rewritten as 
\begin{equation*}
  (mg)(nh)=(m\cdot\sigma_g(n))(gh).
\end{equation*}

\begin{theorem}
  Suppose $N$ and $H$ are groups, and $g\mapsto\sigma_g$ is a
  homomorphism from $H$ to $\Aut N$.  Then the set $N\times H$ becomes
  a group when multiplication is defined by
  \begin{equation*}
  (m,g)(n,h)=(m\cdot\sigma_g(n),gh).    
  \end{equation*}
\end{theorem}

The group given by the theorem is the \textbf{semidirect
  product}\index{semidirect product} of
$N$ and $H$ with respect to $\sigma$; it can be denoted by
\begin{equation*}
  N\rtimes_{\sigma}H.
\end{equation*}
The bijection in Theorem~\ref{thm:isdp} is an isomorphism from
$N\rtimes_{\sigma}H$ to $NH$ when $\sigma$ is the homomorphism in
Theorem~\ref{thm:GNG}.

Now recall from Theorem~\ref{thm:x-lambda_x} (p.\ \pageref{thm:x-lambda_x})
that for every associative ring $(R,1,\cdot)$, 
the function $x\mapsto\uplambda_x$
embeds the ring in $(\End R,\id_R,\circ)$.
From this we obtain the following.

\begin{theorem}
For every associative ring $(R,1,\cdot)$, 
the function
\begin{equation*}
x\mapsto\uplambda_x
\end{equation*}
embeds $\units{(R,\cdot)}$ in $\Aut R$.
\end{theorem}

The embedding is sometimes an isomorphism:

\begin{theorem}\label{thm:Znx-AutZn}
For all $n$ in $\N$, the function
\begin{equation*}
x\mapsto\uplambda_x
\end{equation*}
is an isomorphism from $\Zmodu n$ to $\Aut{\Zmod n}$.
\end{theorem}

\begin{theorem}\label{thm:pq}
If $p$ and $q$ are primes such that
\begin{equation*}
q\divides p-1,
\end{equation*}
then there is an embedding $\sigma$ of $\Zmod q$ in $\Aut{\Zmod p}$,
and hence there is a semidirect product
\begin{equation*}
\Zmod p\rtimes_{\sigma}\Zmod q,
\end{equation*}
which is not abelian.
If $\tau$ is another embedding of $\Zmod q$ in $\Aut{\Zmod p}$,
then for some $n$ in $\Zmod q$,
the map
\begin{equation*}
(y,x)\mapsto(y,nx)
\end{equation*}
is an isomorphism from $\Zmod p\rtimes_{\tau}\Zmod q$ 
to $\Zmod p\rtimes_{\sigma}\Zmod q$.
\end{theorem}

\begin{proof}
The prime $p$ has a primitive root $a$ 
by Theorem~\ref{thm:Zp-cross} (p.\ \pageref{thm:Zp-cross}).
Letting $b=a^{(p-1)/q}$,
we have an isomorphism $x\mapsto b^x$ from $\Zmod q$ to $\gpgen b$,
and $\gpgen b$ is the unique subgroup of $\Zmodu p$ of order $q$
(Theorem~\ref{thm:Znm}, p.\ \pageref{thm:Znm}).
By the last theorem, the map $x\mapsto\uplambda_{b^x}$
is an embedding of $\Zmod q$ in $\Aut{\Zmod p}$.
Calling this embedding $\sigma$, we can form
\begin{equation*}
\Zmod p\rtimes_{\sigma}\Zmod q.
\end{equation*}
Now suppose $\tau$ is an arbitrary embedding
of $\Zmod q$ in $\Aut{\Zmod p}$.
By uniqueness of $\gpgen b$ as a subgroup of $\Zmodu p$ of order $q$,
the images of $\tau$ and $\sigma$ must be the same,
and so $\tau_1=\uplambda_{b^n}$ for some $n$ in $\Zmodu q$,
and hence
\begin{equation*}
\tau_x=\sigma_{nx}.
\end{equation*}
The function $f$ from $\Zmod p\times\Zmod q$ to itself given by
\begin{equation*}
f(y,x)=(y,nx)
\end{equation*}
is a bijection.
If we denote multiplication in $\Zmod p\rtimes_{\tau}\Zmod q$ by $\cdot^{\tau}$,
and likewise with $\sigma$ for $\tau$, then
\begin{align*}
f\bigl((c,b)\cdot^{\tau}(y,x)\bigr)
&=f(c+\tau_b(y),b+x)\\
&=(c+\sigma_{nb}(y),n(b+x))\\
&=(c+\sigma_{nb}(y),nb+nx)\\
&=(c,nb)\cdot^{\sigma}(y,nx)\\
&=f(c,b)\cdot^{\sigma}f(y,x).
\end{align*}
Thus $f$ is an isomorphism from $\Zmod p\rtimes_{\tau}\Zmod q$ 
to $\Zmod p\rtimes_{\sigma}\Zmod q$.
\end{proof}

In case $q=2$, the group in the theorem is isomorphic to $\Dih p$.
We investigate groups of order $pq$ a bit more in the next section.
The final classification of them 
will be Theorem~\ref{thm:pq-class} on p.\ \pageref{thm:pq-class}.

\section{Cauchy's Theorem}

We can partition a group $G$ into subsets $\{a,a\inv\}$.
Many of these may indeed have size $2$; but $\{\gid,\gid\inv\}=\{\gid\}$.
Hence, if $G$ is finite of \emph{even} order, 
we must have $\{a,a\inv\}=\{a\}$ for some $a$ other than $\gid$.
In this case, $a$ has order $2$.

We can recast this argument as follows.
The function $x\mapsto x\inv$ is a permutation $\sigma$ of $G$ as a set.
The function $f$ from $\Zmod2$ to $\Sym G$ given by
\begin{align*}
f_0&=\id_G,&f_1&=\sigma
\end{align*}
is a homomorphism.
Then $G$ is partitioned by the sets $\{f_x(a)\colon x\in\Zmod2\}$.
The size of such a set is $1$ or $2$.
Hence the number of such sets of size $1$ is congruent \emph{modulo} $2$
to the order of $G$.

Now we can generalize by replacing $2$ with an arbitrary prime.
Thus we obtain the first promised partial converse 
of the Lagrange Theorem (p.\ \pageref{thm:Lagrange}).
Galois apparently used the following in 1831--2;
Cauchy published a proof in 1844 \cite[pp.~142--4]{MR1517828}. 

\begin{theorem}[Cauchy]\label{thm:Cauchy}
For all primes $p$, 
every finite group whose order is a multiple of $p$ 
has an element of order $p$.
\end{theorem}

\begin{proof}[Proof (J. H. McKay \cite{MR0098777}).]
Suppose $G$ is a finite group whose order is divisible by $p$.
Let $A$ be the range of the map
\begin{equation*}
(x_0,\dots,x_{p-2})\mapsto\bigl(x_0,\dots,x_{p-2},(x_0\dotsm x_{p-2})\inv\bigr).
\end{equation*}
from $G^{p-1}$ to $G^p$.
Thus
\begin{align*}
A&=\biggl\{(x_i\colon i<p)\in G^p\colon\prod_{i<p}x_i=\gid\biggr\},&
\size A&=\size{G^{p-1}}.
\end{align*}
If $(x_i\colon i<p)\in A$ and $0<k<p-1$, then
\begin{equation*}
(x_0\dotsm x_{k-1})\inv=x_k\dotsm x_{p-1},
\end{equation*}
and so $(x_k,\dots,x_{p-1},x_0,\dots,x_{k-1})\in A$.
Thus we have a homomorphism $f$ from $\Zmod p$ to $\Sym A$ given by
\begin{equation*}
f_k(x_0,\dots,x_{k-1},x_k,\dots,x_{p-1})=(x_k,\dots,x_{p-1},x_0,\dots,x_{k-1}).
\end{equation*}
Then
\begin{gather*}
f_k(\bm x)=f_{\ell}(\bm x)\iff f_{k-\ell}(\bm x)=\bm x,\\	
\{k\in\Zmod p\colon f_k(\bm x)=\bm x\}\subgp \Zmod p.
\end{gather*}
Subgroups of $\Zmod p$ have order $1$ or $p$,
and so the set $\{f_k(\bm x)\colon k\in\Zmod p\}$ has size $p$ or $1$.
Such subsets partition $A$.
One of the subsets, namely $\{(\gid,\dots,\gid)\}$, has size $1$.
Since $\size A$ is a multiple of $p$,
there must be $\bm x$ in $A$ different from $(\gid,\dots,\gid)$ 
such that $f_k(\bm x)=\bm x$ for all $k$ in $\Zmod p$.
In this case, $\bm x$ must be $(x,\dots,x)$ for some $x$ in $G\setminus\trivgp$.
Thus $x$ has order $p$.
\end{proof}

A \textbf{$p$-group} is a group 
the order of whose every element is a power of $p$.

\begin{corollary}
A finite group is a $p$-group if and only if 
its order is a power of~$p$. 
\end{corollary}

\begin{proof}
Let $\ell$ be a prime different from $p$.
if $\ell$ divides $\size G$, 
then $G$ has an element of order $\ell$,
so $G$ is not a $p$-group.
Conversely, if $g\in G$ and $\ell$ divides $\order g$, 
then $\ell$ divides $\size G$.  
\end{proof}

For example, the trivial group $\trivgp$ is a $p$-group for every prime $p$.
All groups $\Zmod{p^k}$, and direct sums of them, are $p$-groups.
If $n>1$, then $\Dih{2^n}$ is a nonabelian $2$-group.

By Cauchy's Theorem, the hypothesis of the following is always satisfied.

\begin{theorem}\label{thm:abe}
Suppose $p$ and $q$ are distinct primes, and $G$ is a group of order $pq$.
If $a$ and $b$ are elements of $G$ of orders $p$ and $q$ respectively,
then
\begin{align*}
\gpgen a\cap\gpgen b&=\trivgp,& G&=\gpgen a\gpgen b.
\end{align*}
\end{theorem}

In the theorem, if $\gpgen a$ is a \emph{normal} subgroup of $G$,
then $G$ is a semidirect product, 
by Theorem~\ref{thm:isdp} on p.\ \pageref{thm:isdp}.
If also $\gpgen b\nsubgp G$, then $G$ is actually a direct product,
isomorphic to $\Zmod p\times\Zmod q$.
Otherwise, $G$ is not abelian, and by Theorem~\ref{thm:pq} there is only one possibility.
With Theorem~\ref{thm:pq-class} on p.\ \pageref{thm:pq-class},
we shall show that
one of $\gpgen a$ and $\gpgen b$ must be a normal subgroup of $G$,
and so $G$ is indeed either a direct or a semidirect product.

\section{Actions of groups}\label{sect:actions}

A homomorphism from a group $G$ to the symmetry group of a set $\setactedon$
is called an \textbf{action} of $G$ on $\setactedon$.
An alternative characterization of actions is given by the following.

\begin{theorem}
  Let $G$ be a group, and $\setactedon$ a set.  There is a one-to-one
  correspondence between
  \begin{compactenum}
    \item
homomorphisms $g\mapsto(a\mapsto ga)$ from $G$ into
$\Sym{\setactedon}$, and 
\item
functions $(g,a)\mapsto ga$ from $G\times\setactedon$ into $\setactedon$ such that
\begin{align}\label{act:gha}
\gid a&=a,&
(gh)a&=g(ha)
\end{align}
for all $h$ and $h$ in $G$ and $a$ in $\setactedon$.
  \end{compactenum}
\end{theorem}

\begin{proof}
If $g\mapsto(a\mapsto ga)$ maps $G$ homomorphically into
$\Sym{\setactedon}$, then the identities in~\eqref{act:gha} follow.
Suppose conversely that these hold.  Then, in particular,
\begin{equation*}
g(g\inv a)=(gg\inv)a=\gid a=a
\end{equation*}
 and likewise $g\inv(ga)=a$, so $a\mapsto g\inv a$ is the inverse of
$a\mapsto ga$, and the function $g\mapsto(a\mapsto ga)$ does map $G$
into $\Sym{\setactedon}$, homomorphically by~\eqref{act:gha}.  
\end{proof}

Usually it is a function $(g,a)\mapsto ga$ from $G\times\setactedon$ to $\setactedon$
as in the theorem that is called an action of $G$ on $\setactedon$.
So in the notation of the proof of Cauchy's Theorem,
the function $(k,\bm x)\mapsto f_k(\bm x)$ is an action of $\Zmod p$ on $\setactedon$.
Immediately, for any set $\setactedon$,
the function $(\sigma,x)\mapsto\sigma(x)$ from $\Sym{\setactedon}\times\setactedon$ to $\setactedon$ 
is an action of $\Sym{\setactedon}$ on $\setactedon$.
Other examples that will be of interest to us are given by the following. 

\begin{theorem}
Let $G$ be a group and $H<G$.  Then $G$ acts:
\begin{compactenum}[a)]
\item
on itself by $(g,x)\mapsto\uplambda_g(x)$ (left multiplication),
\item
on $G/H$ by $(g,xH)\mapsto gxH$ (left multiplication),
\item
on itself by $(g,x)\mapsto gxg\inv$ (conjugation),
\item
on $\{xHx\inv\colon x\in G\}$ by $(g,K)\mapsto gKg\inv$ (conjugation).
\end{compactenum}
\end{theorem}

Suppose $(g,x)\mapsto gx$ is an arbitrary action of $G$ on
$\setactedon$.  
If $a\in\setactedon$, then
the subset $\{g\colon ga=a\}$ of $G$ is the \textbf{stabilizer}\index{stabilizer} of $a$,
denoted by 
\begin{equation*}
G_a;
\end{equation*}
the subset $\{ga\colon g\in G\}$ of ${\setactedon}$ is the
\textbf{orbit}\index{orbit} of $a$, denoted by 
\begin{equation*}
  Ga.
\end{equation*}
The subset $\{x\colon G_x=G\}$ of ${\setactedon}$ can be denoted by
\begin{equation*}
  {\setactedon}_0.
\end{equation*}
Note how all of these were used in the proof of Cauchy's Theorem.
Also, in the proof we established the appropriate case of the following.

\begin{theorem}\label{thm:action}
Suppose a group $G$ acts on a set $\setactedon$.
Then the orbits of the elements of $\setactedon$ under the action
are a partition of $\setactedon$, that is,
\begin{align*}
Ga\neq Gb&\implies Ga\cap Gb=\emptyset,&\bigcup_{a\in\setactedon}Ga&=\setactedon.
\end{align*}  
Moreover, for all $a$ in $\setactedon$,
\begin{align*}
G_a&\subgp G,&[G:G_a]&=\size {Ga}.
\end{align*}
\end{theorem}

\begin{proof}
Let the action be $(g,x)\mapsto gx$.
for the last equation, 
we establish a bijection between $G/G_a$ and $Ga$
by noting that
\begin{equation*}
gG_a=hG_a\iff h\inv g\in G_a\iff ga=ha;
\end{equation*}
so the bijection is $gG_a\mapsto ga$.
\end{proof}

\begin{corollary}
If there are only finitely many orbits in $\setactedon$ under $G$,
then
\begin{equation}\label{eqn:class}
  \size {\setactedon}=\size{{\setactedon}_0} +\sum_{a\in X}[G:G_a]
\end{equation}
for some set $X$ of elements of $\setactedon$ whose orbits are nontrivial.  
\end{corollary}

Equation~\eqref{eqn:class} is called the \textbf{class equation.}\index{class equation} 
We used it implicitly in the proof of Cauchy's Theorem.
In fact we used it to derive the appropriate case of the following.

\begin{theorem}\label{thm:act-cong}
  If ${\setactedon}$ is acted on by a finite $p$-group, then
  \begin{equation*}
\size{\setactedon}\equiv\size{{\setactedon}_0}\pmod p
\end{equation*}
\end{theorem}

\begin{proof}
In the class equation,  $[G:G_a]$ is a multiple of $p$ in each
  case.
\end{proof}

\subsection{Centralizers}

Suppose $G$ acts on itself by conjugation, and $a\in G$.
Then $Ga$ is the \textbf{conjugacy class}\index{conjugacy class} of $a$, while
$G_a$ is the \textbf{centralizer}\index{centralizer} of $a$, denoted
by\footnote{More generally, if $H\subgp G$, then
  $\centralizer gH=\{h\in H\colon hgh\inv=g\}$.}
  \begin{equation}
  \centralizer aG.
  \end{equation}
Finally, $G_0$ is the \textbf{center}\index{center} of $G$, denoted by
    \begin{equation*}
\centr G;  
    \end{equation*}
this is a normal subgroup of $G$.  
The class equation for the present case can now be written as
\begin{equation*}%\label{eqn:cleq}
  \size G=\size{\centr G}+\sum_{a\in X}[G:\centralizer aG].
\end{equation*}

\begin{theorem}\label{thm:p^2}
  All groups of order $p^2$ are abelian.
\end{theorem}

\begin{proof}
Let $G$ have order $p^2$.  
In particular, $G$ is a $p$-group.
By Theorem~\ref{thm:act-cong},
either $\centr G=G$, in which case $G$ is abelian,
or else $\size{\centr G}=p$.
In the latter case, 
let $a\in G\setminus\centralizer aG$.
Then
\begin{equation*}
G=\centr G\gpgen a.
\end{equation*}
But elements of $\centr G$ commute with all elements of $G$; and
$a$ commutes with itself.
If the generators commute with one another, the whole group is abelian.
Therefore $G$ must be abelian.
\end{proof}

\begin{porism}\label{por:p-ntc}
  Every nontrivial $p$-group has nontrivial center.
\end{porism}

\subsection{Normalizers}

If $H\subgp G$, let $G$ act on the set of conjugates of $H$ by conjugation.
The stabilizer of $H$ under this action 
is called the \textbf{normalizer}\index{normalizer} of $H$ in $G$
and is denoted by\footnote{More generally, if also $K\subgp G$, then
  $\normalizer HK=\{k\in K\colon kHk\inv=H\}$.}
\begin{equation*}
\normalizer HG.
\end{equation*}
Explanation of the name is given by the following.

\begin{theorem}
If $H\subgp K\subgp G$, then 
\begin{equation*}
  H\nsubgp K\iff K\subgp \normalizer HG.
\end{equation*}
\end{theorem}

We establish some technical results
for the sake of proving the Sylow Theorems of the next subsection.

\begin{lemma}\label{lem:norm}
Suppose $H<G$, and let $H$ act on $G/H$ by left multiplication.
Then
\begin{equation*}
  (G/H)_0=\normalizer HG/H.
\end{equation*}
\end{lemma}

\begin{proof}
Supposing $g\in G$, we have $gH\in(G/H)_0$ if and only if,
for all $h$ in $H$,
\begin{gather*}
  hgH=gH,\\
g\inv hgH=H,\\
g\inv hg\in H.
\end{gather*}
Thus
\begin{align*}
  gH\in(G/H)_0
&\iff g\inv Hg=H\\
&\iff g\inv\in\normalizer HG\\
&\iff g\in\normalizer HG\\
&\iff gH\in\normalizer HG/H.  \qedhere
\end{align*}
\end{proof}

A \textbf{$p$-subgroup} of a group 
is a subgroup that is a $p$-group. 
Every group has at least one $p$-subgroup,
namely the trivial subgroup $\trivgp$.

\begin{lemma}%\label{thm:HpGNGH}
  If $H$ is a $p$-subgroup of $G$, then
  \begin{equation*}
    [G:H]\equiv[\normalizer HG:H]\pmod p.
  \end{equation*}
\end{lemma}

\begin{proof}
Theorem~\ref{thm:act-cong} and the last lemma.
\end{proof}

\begin{lemma}
If $H$ is a $p$-subgroup of $G$, 
and $p$ divides $[G:H]$, then 
for some subgroup $K$ of $G$,
\begin{align*}
  H&\nsubgp K,&[K:H]&=p.
\end{align*}
\end{lemma}

\begin{proof}
By the last lemma, $p$ divides $[\normalizer HG:H]$.  
Since $H\nsubgp\normalizer HG$, 
the quotient $\normalizer HG/H$ is a group.
By Cauchy's Theorem (Theorem~\ref{thm:Cauchy}),
this group has an element $gH$ of order $p$.  
Then $H\gpgen g$ is the desired group~$K$.  
\end{proof}

Now can start proving the Sylow Theorems.

\subsection{Sylow subgroups}

A \textbf{Sylow $p$-subgroup}\index{Sylow!--- subgroup} of a group
is a maximal $p$-subgroup.  
Then every $p$-subgroup of a finite group $G$
is a subgroup of a Sylow $p$-subgroup of $G$.%%%%%
\footnote{The same is true for infinite groups $G$,
by the version of the Axiom of Choice known as Zorn's Lemma;
but we shall not make use of this result.}
In particular, since $G$ does have the $p$-subgroup $\trivgp$,
it has at least one Sylow $p$-subgroup.
We now establish that the order of every Sylow $p$-subgroup of a finite group
is as large as Lagrange's Theorem (p.\ \pageref{thm:Lagrange}) allows it to be.

\begin{theorem}[Sylow I]\label{thm:Sylow-1}%
\index{Sylow!--- Theorems|(}\index{theorem!Sylow Th---s|(}
If $G$ is a finite group of order $p^nm$, where $\gcd(p,m)=1$,
then every Sylow $p$-subgroup of $G$ has order $p^n$.
\end{theorem}

\begin{proof}
Use the last lemma repeatedly.
\end{proof}

\begin{porism}
If $\size G=p^nm$, where $p\ndivides m$, 
then there is a chain 
\begin{equation*}
H_0\subgp H_1\subgp\cdots\subgp H_n
\end{equation*}
of $p$-subgroups of $G$, where
\begin{align*}
H_0&=\trivgp,&H_i&\nsubgp H_{i+1},&[H_{i+1}:H_i]&=p.
\end{align*} 
In particular, $H_n$ is a Sylow $p$-subgroup of $G$.
Every $p$-subgroup of $G$ appears on such a chain.
\end{porism}

In the notation of the porism,
although $H_i\nsubgp H_{i+1}$ and $H_{i+1}\nsubgp H_{i+2}$, 
we need not have $H_i\nsubgp H_{i+2}$.
For a counterexample, consider $\Dih 4$:
\begin{align*}
\gpgen{(1\cdiv 3)}&\nsubgp\gpgen{(1\cdiv 3),(0\cdiv 2)},&
\gpgen{(1\cdiv 3),(0\cdiv 2)}&\nsubgp\Dih 4,
\end{align*}
but $\gpgen{(1\cdiv 3)}\nnsubgp\Dih 4$ since
\begin{align*}
(0\cdiv 1\cdiv 2\cdiv 3)&\in\Dih4,&
(3\cdiv 2\cdiv 1\cdiv 0)(1\cdiv 3)(0\cdiv 1\cdiv 2\cdiv 3)&=(0\cdiv 2).
\end{align*}

The following is as close as can be to a converse of Lagrange's Theorem.

\begin{corollary}
Suppose $G$ is a finite group.
Then $G$ has a subgroup of every order that divides $\size G$,
provided that order is a prime power.
\end{corollary}

The converse of the first part of the following will be the Second Sylow Theorem.

\begin{corollary}\label{cor:conj-Syl}
Every conjugate of every Sylow $p$-subgroup of a finite group
is also a Sylow $p$-subgroup.
Thus if a finite group has a unique Sylow $p$-subgroup,
this must be a normal subgroup.
\end{corollary}

To prove the Second Sylow Theorem,
we shall use a generalization of Lemma~\ref{lem:norm}.

\begin{lemma}
Suppose $G$ is a group with subgroups $H$ and $K$.
Under the action of $H$ on $G/K$ by left multiplication,
\begin{equation*}
  gK\in(G/K)_0\liff H\subgp gKg\inv.
\end{equation*}
\end{lemma}

\begin{proof}
The first part of the proof of Lemma~\ref{lem:norm} shows this.
Indeed, for all $g$ in $G$, we have $gK\in(G/K)_0$ if and only if,
for all $h$ in $H$,
\begin{gather*}
  hgK=gK,\\
g\inv hgK=K,\\
g\inv hg\in K,\\
h\in gKg\inv.\qedhere
\end{gather*}
\end{proof}


\begin{theorem}[Sylow II]\label{thm:Sylow-2}
  All Sylow $p$-subgroups of finite groups are conjugate.
\end{theorem}

\begin{proof}
Say $H$ and $K$ are Sylow $p$-subgroups of $G$.
Then $H$ acts on  the set $G/K$ by left multiplication.  
By Theorem~\ref{thm:act-cong}, 
since $[G:K]$ is not a multiple of $p$,
the set $(G/K)_0$ has an element $aK$.  
By the lemma, $H\subgp aKa\inv$.  
Then $H=aKa\inv$ by the First Sylow Theorem.
\end{proof}

\begin{theorem}[Sylow III]\label{thm:Sylow-3}
If $\size G=p^nm$, where $\gcd(p,m)=1$, 
and $\setactedon$ is the set of Sylow $p$-subgroups of $G$,
then
\begin{align*}
\size{\setactedon}&\equiv1\pmod p,&
\size{\setactedon}&\text{ divides }m.
\end{align*}
\end{theorem}

\begin{proof}
$G$ acts on $\setactedon$ by conjugation, by the First Sylow Theorem
(more precisely, Corollary~\ref{cor:conj-Syl}).  
Let $H\in \setactedon$.  By the Second Sylow Theorem,
the orbit of $H$ is just $\setactedon$.  
The stabilizer of $H$ is $\normalizer HG$.  
Since by Theorem~\ref{thm:action} 
the index of the stabilizer is the size of the orbit,
we have
  \begin{equation*}
    [G:\normalizer HG]=\size{\setactedon},
  \end{equation*}
and so $\size{\setactedon}$ divides $\size G$.
Now suppose also $K\in\setactedon$.
Then $K$ must be the unique Sylow $p$-subgroup of $\normalizer KG$.
Considering $H$ as acting on $\setactedon$ by conjugation, we have
\begin{align*}
K\in\setactedon_0
&\iff H<\normalizer KG\\
&\iff H=K.
\end{align*}
Therefore $\setactedon_0=\{H\}$, 
so by Theorem~\ref{thm:act-cong},
\begin{equation*}
  \size{\setactedon}\equiv1\pmod p.
\end{equation*}
It now follows that $\size{\setactedon}$ divides $m$.
\end{proof}\index{Sylow!--- Theorems|)} \index{theorem!Sylow Th---s|)}


\section{*Classification of small groups}\label{sect:class-small}

We can now complete the work, 
begun in \S \ref{sect:semidirect} (p.\ \pageref{sect:semidirect}), 
of classifying the groups of order $pq$ for primes $p$ and $q$.

\begin{theorem}\label{thm:pq-class}
Suppose $p$ and $q$ are distinct primes, with $q<p$, 
and $G$ is a group of order $pq$.
Either
\begin{equation*}
G\cong\Zmod p\times\Zmod q,
\end{equation*}
which is cyclic, or else $p\equiv1\pmod q$ and
\begin{equation*}
G\cong\Zmod p\rtimes_{\sigma}\Zmod q
\end{equation*}
for some embedding $\sigma$ of $\Zmod q$ in $\Aut{\Zmod p}$.
In particular, if $q=2$, then
\begin{equation*}
G\cong\Dih p.
\end{equation*}
\end{theorem}

\begin{proof}
By Cauchy's Theorem,
$G$ has elements $a$ and $b$, of orders $p$ and $q$ respectively.
Then $\gpgen a$ and $\gpgen b$ are Sylow subgroups of $G$.
Let $\setactedon$ be the set of Sylow $p$-subgroups of $G$.  
By the Third Sylow Theorem, 
$\size{\setactedon}$ divides $q$.
Since $p\ndivides q-1$,
we must have $\size{\setactedon}=1$.
Thus $\gpgen a$ is the unique Sylow $p$-sugroup of $G$,
and so it is a normal subgroup.
By Theorems~\ref{thm:abe} and \ref{thm:isdp} 
(pages~\pageref{thm:abe} and~\pageref{thm:isdp}),
$G$ is the semidirect product of $\gpgen a$ and $\gpgen b$.
If it is not actually a direct product,
then $\gpgen b$ must not be a normal subgroup of $G$,
and so $q$ does divide $p-1$, and the rest follows.
\end{proof}

We now know all groups of order less than $36$, but different from
$8$, $12$, $16$, $18$, $20$, $24$, $27$, $28$, $30$, and $32$.

\begin{theorem}\label{thm:8}
  Every group of order $8$ is isomorphic to one of
  \begin{align*}
    &\Zmod 8,&
&\Zmod 2\oplus\Zmod 4,&
&\Zmod 2\oplus\Zmod 2\oplus\Zmod 2,&
&\Dih 4,&
&\quat.
  \end{align*}
\end{theorem}

\begin{proof}
Say $\order G=8$.  If $G$ is abelian, 
then its possibilities are given 
by the classification of finitely generated abelian groups
(Theorem~\ref{thm:fin-gen-ab}, p.\ \pageref{thm:fin-gen-ab}).  
Suppose $G$ is not abelian.
Then $G$ has an element $a$ of order greater than $2$
by Theorem~\ref{thm:x2e} (p.\ \pageref{thm:x2e}),
and so $\order a=4$ (since $G\ncong\Zmod 8$).  
Then $\gpgen a\nsubgp G$ by Theorem~\ref{thm:index-2} (p.\ \pageref{thm:index-2}).
Let $b\in G\setminus\gpgen a$.  
Then $b^2$ is either $\gid$ or $a^2$
(since otherwise $b$ would generate $G$).  
In the former case,
$G=\gpgen a\rtimes\gpgen b$, so $G\cong\Dih 4$.  
In the latter case, $G\cong\quat$. 
\end{proof}

\begin{theorem}\label{thm:S3Z4}
The subgroup of $\Sym3\times\Zmod4$ generated by the two elements
\begin{align*}
&\bigl((0\cdiv 1\cdiv 2),2\bigr),&&\bigl((0\cdiv 1),1\bigr)
\end{align*}
has order $12$ and has the presentation
\begin{equation*}
\gpres{a,b}{a^6,a^3b^2,bab\inv a}.
\end{equation*}
\end{theorem}

\begin{lemma}
If $H\subgp G$, 
and $\sigma$ is the homomorphism $g\mapsto(xH\mapsto gxH)$ from $G$ to $\Sym{G/H}$, 
then
\begin{equation*}
\ker(\sigma)\subgp H.
\end{equation*}
\end{lemma}

\begin{theorem}
  Every group of order $12$ is isomorphic to one of
  \begin{align*}
    \Zmod{12},&
&\Zmod 2\oplus\Zmod 6,&
&\Alt 4,&
&\Dih 6,&
&\gpres{a,b}{a^6,a^3b^2,bab\inv a}.
  \end{align*}
\end{theorem}

\begin{proof}
Suppose $\order G=12$.  
By Cauchy's Theorem, $G$ has an element $c$ of order $3$.
Then $G$ acts on $G/\gpgen c$ by left multiplication, 
which gives us a homomorphism from $G$ to $\Sym{G/\gpgen c}$.
Since $[G:\gpgen c]=4$, 
there is a homomorphism from $G$ to $\Sym 4$.  
If this is an embedding, 
then $G\cong\Alt 4$ by Theorem~\ref{thm:SA2} (p.\ \pageref{thm:SA2}).  
Otherwise, by the lemma, the kernel of the homorphism must be $\gpgen c$.
In this case,
\begin{equation*}
\gpgen c\nsubgp G.
\end{equation*}
Now let $H$ be a Sylow $2$-subgroup of $G$. 
Having order $2^2$, it is abelian (Theorem~\ref{thm:p^2}, p.\ \pageref{thm:p^2}).
If $G$ is not abelian, 
then the action of $H$ on $\gpgen c$ by conjugation must be nontrivial.
But since $\size{\Aut{\gpgen c}}=6$,
which is indivisible by the order of $H$,
there must be some $d$ in $H$ that commutes with $c$.
Then $\gpgen{c,d}\cong\Zmod6$. 
Let $a=cd$, so $\gpgen a=\gpgen{c,d}$. 
Let $b\in G\setminus\gpgen a$, so
\begin{equation*}
G=\gpgen{a,b}.
\end{equation*}
If $\order b=2$, then $G\cong\Dih 6$.
In any case,
conjugation by $b$ is a nontrivial automorphism of $\gpgen a$,
and in particular $bab\inv$ is a generator of $\gpgen a$ different from $a$.
There is only one of these, namely $a\inv$, so
\begin{equation}\label{eqn:bab}
bab\inv=a\inv.
\end{equation}
Also $b^2=a^k$ for some $k$ in $\Zmod6$.
If $k=\pm1$, then $G=\gpgen b$.
Suppose $k=\pm2$.  
Then $\order b=6$,
so $\gpgen b\nsubgp G$, and therefore
\begin{equation}\label{eqn:aba}
ab\inv a\inv=b.
\end{equation}
From \eqref{eqn:bab} we have
\begin{align}\label{eqn:abba}
	ab\inv&=b\inv a\inv,&
	ba&=a\inv b.
\end{align}
From \eqref{eqn:aba} we have $ab\inv=ba$,
so all members of the equations in~\eqref{eqn:abba} are equal to one another.
In particular,
\begin{align*}
ab\inv&=a\inv b,&
ba&=b\inv a\inv,
\end{align*}
which yield $a^2=b^2$ and $b^2=a^{-2}$ respectively, 
contradicting that $\order a=6$.
The only remaining possibility is $k=3$,
which yields the last group listed.
\end{proof}


\section{Nilpotent groups}\label{sect:nilpotent}

For a group, what is the next best thing to being abelian?
A group $G$ is abelian if and only if $\centr G=G$.  
To weaken this condition, 
we define the \textbf{commutator}\index{commutator} 
of two elements $a$ and $b$ of $G$ to be
\begin{equation*}
  aba\inv b\inv;
\end{equation*}
this can be denoted by
\begin{equation*}
[a,b].
\end{equation*}
Then
\begin{equation*}
  \centr G=\bigl\{g\in G\colon \Forall x(x\in G\lto[g,x]=\gid)\bigr\}.
\end{equation*}
We now generalize this by defining
\begin{align*}
  \cseries 0G&=\trivgp,\\
\cseries{n+1}G
&=\Bigl\{g\in G\colon \Forall x\bigl(x\in G\lto[g,x]\in\cseries nG\bigr)\Bigr\}.
\end{align*}
Then $\centr G=\cseries 1G$.
Also,
\begin{multline*}
\cseries nG
=\Biggl\{g\in G\colon\Forall{\bm x}\biggl(\bm x\in G^n\lto\\
\biggl[\Bigl[\dots\bigl[[g,x_0],x_1\bigr],\cdots\Bigr], x_{n-1}\biggr]
=\gid\biggr)\Biggr\}.
\end{multline*}

The following general result will now be useful.

\begin{theorem}\label{thm:GNKN}
  Suppose $N\nsubgp G$.
Every subgroup $H$ of $G/N$ is of the form $K/N$ 
for some subgroup $K$ of $G$ of which $N$ is a normal subgroup.
Moreover,
\begin{equation*}
  K/N\nsubgp G/N\iff K\nsubgp G.
\end{equation*}
\end{theorem}

\begin{theorem}\label{thm:central}
For all groups $G$, for all $n$ in $\upomega$,
\begin{gather}\label{eqn:C1}
\cseries nG\nsubgp G,\\\label{eqn:C2}
\cseries nG\subgp \cseries{n+1}G,\\\label{eqn:C3}
\cseries{n+1}G/\cseries nG=\centr{G/\cseries nG}.  
\end{gather}
\end{theorem}

\begin{proof}
We use induction.
  Trivially, \eqref{eqn:C1} holds when $n=0$.  
Suppose it holds when $n=k$.
Then the following are equivalent:
  \begin{gather*}
g\in \cseries{k+1}G,\\
\Forall x\bigl(x\in G\lto[g,x]\in\cseries kG\bigr),\\
\Forall x\bigl(x\in G\lto[g,x]\cseries kG=\cseries kG\bigr),\\
\Forall x\bigl(x\in G\lto[g\cseries kG,x\cseries kG]=\cseries kG\bigr),\\
g\cseries kG\in\centr{G/\cseries kG}.
  \end{gather*}
Thus \eqref{eqn:C2} and \eqref{eqn:C3} hold when $n=k$.
In particular, 
\begin{equation*}
\cseries{k+1}G/\cseries
kG\nsubgp G/\cseries kG, 
\end{equation*}
and so, by the last theorem, \eqref{eqn:C1} holds when $n=k+1$.
\end{proof}

The sequence $(\cseries nG\colon n\in\upomega)$ may be written out as
\begin{equation*}
  \trivgp\nsubgp\centr G\nsubgp\cseries 2G\nsubgp\cseries 3G\nsubgp\dotsb
\end{equation*}
although strictly this expression 
is not a noun,
but the conjunction of the statements 
$\trivgp\nsubgp\centr G$, $\centr G\nsubgp\cseries 2G$, 
$\cseries2G\nsubgp\cseries3G$, and so on.
By the last theorem (and Theorem~\ref{thm:NGHG} on p.\ \pageref{thm:NGHG}),
the relation $\nsubgp$ on the set $\{\cseries nG\colon n\in\upomega\}$ 
is indeed transitive.
A group is called \textbf{nilpotent}\index{nilpotent} 
if for some $n$ in~$\upomega$,
\begin{equation*}
  \cseries nG=G.
\end{equation*}
So an abelian group is nilpotent, since its center is itself.%%%%%
\footnote{Apparently the term \emph{nilpotent} arises for the following reason.
If $\cseries nG=G$ and, 
for some $g$ in $G$, 
$f$ is the element $x\mapsto[g,x]$
of the monoid $(G^G,\id_G,\circ)$, 
then $f^n$ is the constant function $x\mapsto\gid$.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Other examples of nilpotent groups are given by:

\begin{theorem}\label{thm:p-nil}
  Finite $p$-groups are nilpotent.
\end{theorem}

\begin{proof}
If $G$ is a $p$-group and $\cseries kG\psubgp G$,\label{psubgp} 
then $G/\cseries kG$ is a nontrivial $p$-group, 
so by Porism~\ref{por:p-ntc} it has a nontrivial center.  
By Theorem~\ref{thm:central} then,
$\cseries kG\psubgp\cseries{k+1}G$.
\end{proof}

The converse fails, because of:

\begin{theorem}\label{thm:prod-nil}
The direct product of a finite family of nilpotent groups is nilpotent.
\end{theorem}

\begin{proof}
Use Theorem~\ref{thm:prod-quot} (p.\ \pageref{thm:prod-quot}) and
\begin{equation*}
\centr{G\times H}=\centr G\times\centr H.
\end{equation*}
If $\cseries nG=G$ and $\cseries mH=H$, then
$\cseries{\max\{n,m\}}{G\times H}=G\times H$.
\end{proof}

Thus, if all Sylow subgroups of a finite group $G$ are \emph{normal} subgroups,
then $G$ must be nilpotent.
We now proceed to a partial converse of this result.
Given that $G$ is a finite nilpotent group
with a Sylow $p$-subgroup $P$ for some prime $p$,
we want to show $P\nsubgp G$, that is, $\normalizer PG=G$.

\begin{lemma}\label{lem:NNG}
If $G$ is a finite group with Sylow $p$-subgroup $P$, then
\begin{equation*}
\normalizer{\normalizer PG}G=\normalizer PG.
\end{equation*}
\end{lemma}

\begin{proof}
Let $N=\normalizer PG$.
Supppose $g\in\normalizer NG$, that is,
\begin{equation*}
gNg\inv=N.
\end{equation*}
Since $P\subgp N$, we have also $gPg\inv\subgp N$.
But $P\nsubgp N$, so $P$ is the unique Sylow $p$-subgroup of $N$.
Since $gPg\inv$ is also a Sylow $p$-subgroup of $N$,
we must have $gPg\inv=P$.
Thus
\begin{equation*}
g\in N.
\end{equation*}
We have now proved $\normalizer NG\subgp N$.
\end{proof}

Now, in the notation of the lemma,
we want to show that,
if $N\psubgp G$,
then either $N\psubgp\normalizer NG$,
or else $G$ is not finite and nilpotent.
We shall use the following.

\begin{lemma}
If $\cseries nG\subgp H$, 
then $\cseries{n+1}G\subgp\normalizer HG$. 
\end{lemma}

\begin{proof}
  Say $g\in\cseries{n+1}G$; we show $gHg\inv\included H$.  But if $h\in
  H$, then $[g,h]\in\cseries nG$, 
  so $ghg\inv\in\cseries nGh\included H$.  Therefore $gHg\inv\included
  H$.
\end{proof}

\begin{lemma}\label{lem:HpsubG}
  If $G$ is nilpotent, and $H\psubgp G$, then
  $H\psubgp\normalizer HG$.  
\end{lemma}

\begin{proof}
  Let $n$ be maximal such that $\cseries nG\subgp H$.  Then
  $\cseries{n+1}G\setminus H$ is non-empty, but, by the last lemma, it
  contains members of $\normalizer HG$.
\end{proof}

\begin{theorem}\label{thm:fng}
A finite nilpotent group is the direct product of its Sylow subgroups.
\end{theorem}

\begin{proof}
Suppose $G$ is a finite nilpotent group.  
By Lemmas~\ref{lem:NNG} and~\ref{lem:HpsubG},
every Sylow subgroup of $G$ is a normal subgroup.
Suppose the Sylow subgroups of $G$ compose a list $(P_i\colon i<n)$,
where each $P_i$ is a $p_i$-group, 
and $p_i\neq p_j$ when $i\neq j$.
If, for some $i$ in $n$,
the product $P_0\dotsm P_{i-1}$ is an internal direct product,
then its order is indivisible by $p_i$,
and so $P_0\dotsm P_{i-1}\cap P_i=\trivgp$.
Hence, by Theorem~\ref{thm:wdp} (p.\ \pageref{thm:wdp}) and induction,
each product $P_0\dotsm P_i$ is an internal direct product.
Then also the order of $P_0\dotsm P_{n-1}$
is the order of $G$, so the two groups are the same.
\end{proof}

Theorems~\ref{thm:p-nil}, \ref{thm:prod-nil}, and \ref{thm:fng} 
give us a classification of the finite nilpotent groups.

\section{Soluble groups}

Having defined the commutator of two elements of a group,
we define the \textbf{commutator subgroup}\index{commutator!--- subgroup} 
of a group $G$ to be the subgroup
\begin{equation*}
  \gpgen{[x,y]\colon(x,y)\in G^2}
\end{equation*}
generated by the commutators of all pairs of elements of $G$.
We denote this subgroup by
\begin{equation*}
  G'.
\end{equation*}
Its interest arises from the following.

\begin{theorem}\label{thm:G'}
  $G'$ is the smallest of the normal subgroups $N$ of $G$ such that
  $G/N$ is abelian.
\end{theorem}

\begin{proof}
  If $f$ is a homomorphism defined on $G$, then
  \begin{equation*}%\label{eqn:f([x,y])}
    f([x,y])
    %=f(xyx\inv y\inv)=f(x)f(y)f(x)\inv f(y)\inv
    =[f(x),f(y)].
  \end{equation*}
Thus, if $f\in\Aut G$, then
 $f(G')\subgp G'$.  
 In particular, $xG'x\inv\subgp G'$ for all $x$ in $G$; so $G'\nsubgp G$.  
 Suppose $N\nsubgp G$; then the following are equivalent.
  \begin{compactenum}
    \item
$G/N$ is abelian.
\item
$N=[x,y]N$ for all $(x,y)$ in $G^2$.
\item
$G'\subgp N$.\qedhere
  \end{compactenum}
\end{proof}

We now define the \textbf{derived subgroups}\index{derived subgroup}
$\dsubgp nG$ of $G$ by 
\begin{align*}
  \dsubgp 0G&=G,&
\dsubgp {n+1}G&=(\dsubgp nG)'.
\end{align*}
We have a descending sequence
\begin{equation*}
  G\nsupgp G'\nsupgp\dsubgp 2G\nsupgp\dotsb
\end{equation*}
The group $G$ is called \textbf{soluble}\index{soluble}
or \textbf{solvable}
if this sequence reaches $\trivgp$ (after finitely many steps).%%%%%
\footnote{If $f$ is a polynomial in one variable over $\Q$,
let $A$ be the set of its zeros in the field $\C$, 
and let $G=\{\sigma\restriction A\colon\sigma\in\Aut{\C}\}$.
Then $G\subgp\Sym A$,
and $G$ is soluble if and only if
the elements of $A$ can be obtained from $\Q$
by the field operations and taking $n$th roots for arbitrary $n$ in $\N$.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Immediately, abelian groups are soluble.
For more examples, let $K$ be a field, and if $n\in\N$, let
$G$ be the subgroup of $\GL K$ consisting of \textbf{upper triangular
  matrices.}\index{upper triangular}  So 
$G$ comprises the matrices
\begin{equation*}
    \begin{pmatrix}
    a_0&      &*\\
       &\ddots& \\
    0  &      &a_{n-1}
  \end{pmatrix}
\end{equation*}
where $a_0\dotsm a_{n-1}\neq0$.
We have
\begin{equation*}
  \begin{pmatrix}
    a_0&      &*\\
       &\ddots& \\
    0  &      &a_{n-1}
  \end{pmatrix}
  \begin{pmatrix}
    b_0&      &*\\
       &\ddots& \\
    0  &      &b_{n-1}
  \end{pmatrix}
=
  \begin{pmatrix}
    a_0b_0&      &*\\
       &\ddots& \\
    0  &      &a_{n-1}b_{n-1}
  \end{pmatrix}
\end{equation*}
and therefore every element of $G'$ is
\textbf{unitriangular,}\index{unitriangular} that is, it takes the form of
\begin{equation*}
  \begin{pmatrix}
    1&      &*\\
      &\ddots& \\
    0  &      &1
  \end{pmatrix}.
\end{equation*}
We also have
\begin{equation*}
    \begin{pmatrix}
    1&a_1&      &*\\
     &1  &\ddots& \\
     &   &\ddots&a_{n-1}\\
    0&   &      &1
  \end{pmatrix}
    \begin{pmatrix}
    1&b_1&      &*\\
     &1  &\ddots& \\
     &   &\ddots&b_{n-1}\\
    0&   &      &1
  \end{pmatrix}
=
    \begin{pmatrix}
    1&c_1&      &*\\
     &1  &\ddots& \\
     &   &\ddots&c_{n-1}\\
    0&   &      &1
  \end{pmatrix}
\end{equation*}
\begin{comment}


\begin{multline*}
    \begin{pmatrix}
    1&a_1&      &*\\
     &1  &\ddots& \\
     &   &\ddots&a_{n-1}\\
    0&   &      &1
  \end{pmatrix}
    \begin{pmatrix}
    1&b_1&      &*\\
     &1  &\ddots& \\
     &   &\ddots&b_{n-1}\\
    0&   &      &1
  \end{pmatrix}\\
=
    \begin{pmatrix}
    1&a_1+b_1&      &*\\
     &1  &\ddots& \\
     &   &\ddots&a_{n-1}+b_{n-1}\\
    0&   &      &1
  \end{pmatrix},
\end{multline*}


\end{comment}
where $c_i=a_i+b_i$ in each case,
so the elements of $G''$ take the form of
\begin{equation*}
  \begin{pmatrix}
    1&0&      &*\\
     &1  &\ddots& \\
     &   &\ddots&0\\
    0&   &      &1
  \end{pmatrix}.
\end{equation*}
Proceeding, we find $\dsubgp{n+1}G=\trivgp$. 

\begin{theorem}
  Nilpotent groups are soluble.
\end{theorem}

\begin{proof}
Each quotient $\cseries{k+1}G/\cseries kG$ 
is the center of some group, namely $G/\cseries kG$, so it is abelian.
By Theorem~\ref{thm:G'} then,
\begin{equation*}
  \cseries{k+1}G'\subgp \cseries kG.
\end{equation*}
Suppose $G$ is nilpotent, so that $G=\cseries nG$ for some $n$ in $\upomega$.  
Then
\begin{equation*}
\dsubgp0G\subgp\cseries nG.
\end{equation*}
If $\dsubgp kG\subgp\cseries{n-k}G$, then
\begin{equation*}
\dsubgp{k+1}G\subgp(\cseries{n-k}G)'\subgp\cseries{n-k-1}G.
\end{equation*}
By induction, $\dsubgp nG\subgp\cseries0G=\trivgp$.
\end{proof}

The foregoing argument might be summarized in the following commutative diagram,
which is built up from left to right,  
the arrows being inclusions:
\begin{equation*}
\xymatrix{
G \ar[d] & G' \ar[l]\ar[d] & \dsubgp 2G \ar[l]\ar[d] & \ar[l] \dsubgp
3G \ar[d] & \dsubgp nG \ar@{.>}[l]\ar[d]\\
G \ar[d] & \cseries nG' \ar[l]\ar[d] & \cseries{n-1}G'\ar[l]\ar[d] &
\ar[l]\ar[d]\cseries{n-2}G'\ar[l] & \centr G' \ar@{.>}[l] 
\ar[d]\\
\cseries nG & \ar[l] \cseries {n-1}G & \ar[l] \cseries{n-2}G & \ar[l]
\cseries{n-3}G & \trivgp \ar@{.>}[l]
}
\end{equation*}

Since $\Sym3/\Alt3$ is abelian, we have
\begin{align*}
\Sym3'&\subgp\Alt3,&
\Sym3''\subgp\Alt3'=\trivgp,
\end{align*}
so $\Sym3$ is soluble.  However,
\begin{equation*}
\Sym3=\Alt3\rtimes\gpgen{(0\cdiv 1)},
\end{equation*}
the semidirect product of its Sylow subgroups;
but the product is not \emph{direct,}
so $\Sym3$ is not nilpotent.

\begin{theorem}
Let $H\subgp G$ and $N\nsubgp G$.
\begin{compactenum}
\item
If $G$ is soluble, then so are $H$ and $G/N$.
\item
If $N$ and $G/N$ are soluble, then so is $G$.
\end{compactenum}
\end{theorem}

\begin{proof}
\begin{asparaenum}
\item
$\dsubgp kH\subgp\dsubgp kG$ and $\dsubgp k{(G/N)}=\dsubgp kGN/N$.
\item
If $G/N$ is soluble,
then $\dsubgp nG\subgp N$ for some $n$.
If also $N$ is soluble,
then $\dsubgp mN=\trivgp$ for some $m$,
so $\dsubgp{n+m}G\subgp\dsubgp mN=\trivgp$.\qedhere
\end{asparaenum}
\end{proof}

\begin{theorem}
Groups with non-abelian simple subgroups are not soluble.  
\end{theorem}

\begin{proof}
  Suppose $H$ is simple.  Since $H'\nsubgp H$, we have either
  $H'=\trivgp$ or $H'=H$.  In the former case, $H$ is abelian; in
  the latter, $H$ is insoluble.
\end{proof}

In particular, $\Sym 5$ is not soluble if $n\geq 5$.%%%%%
\footnote{This is why the general $5$th-degree polynomial equation
%  $a+bx+cx^2+dx^3+ex^4+x^5=0$
is insoluble by radicals.}


\section{Normal series}\label{sect:NS}

A \textbf{normal series}\index{normal!--- series} for a group $G$ 
is a list $(G_0,\dots,G_n)$ of subgroups, where
\begin{equation*}
G=G_0\nsupgp G_1\nsupgp\dots\nsupgp G_n=\trivgp.
\end{equation*}
We do not require $G_k\nsupgp G_{k+2}$.%%%%%
\footnote{One may call a normal series a \emph{subnormal series,}
reserving the term \emph{normal series} for the case where $G\nsupgp G_k$ for each $k$.
However, we shall not be interested in the distinction recognized by this terminology.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%
The quotients $G_k/G_{k+1}$
are called the \textbf{factors}\index{factor} of the normal series.  
The series is called
\begin{compactenum}[1)]
  \item
a \textbf{composition series,}\index{composition series}\index{series!composition ---} if the factors are simple; 
\item
a \textbf{soluble series,}\index{soluble series}\index{soluble!---
  series}\index{series!soluble ---} if the factors are abelian.
\end{compactenum}

\begin{theorem}
A group is soluble if and only if it has a soluble series.
\end{theorem}

\begin{proof}
If $\dsubgp nG=\trivgp$, 
then $(\dsubgp0G,\dots,\dsubgp nG)$ is a soluble series for $G$,
by Theorem~\ref{thm:G'}.
Suppose conversely $(G_0,\dots,G_n)$ is a soluble series for $G$.
Again by Theorem~\ref{thm:G'}, we have $G_k{}'\subgp G_{k+1}$ for each $k$ in $n$.
Since also $H\subgp K$ implies $H'\subgp K'$, we have
\begin{gather*}
	G'\subgp G_1,\\
	G''\subgp G_1{}'\subgp G_2,\\
	\makebox[4.5cm]{\dotfill},\\
	\dsubgp nG\subgp \dsubgp{n-1}{G_1}\subgp \dots\subgp G_{n-1}{}'\subgp G_n=\trivgp.\qedhere
\end{gather*}
\end{proof}

Since not every finite group is soluble, 
not every finite group has a soluble series.
However:

\begin{theorem}\label{thm:comp}
  Every finite group has a composition series.
\end{theorem}

\begin{proof}
Trivially $(\trivgp)$ is a composition series.
Every nontrivial finite group $G$ has at least one proper normal subgroup, 
namely $\trivgp$.
Being finite, $G$ has only finitely many normal subgroups.
Therefore $G$ has a maximal proper normal subgroup, $G^*$
(which need not be unique).
Then $G/G^*$ is simple, by Theorem~\ref{thm:GNKN} (p.\ \pageref{thm:GNKN}): 
every normal subgroup of $G/G^*$ is $K/G^*$ 
for some normal subgroup $K$ of $G$ such that $G^*\subgp K$, 
and therefore $K$ is either $G^*$ or $G$,
so $K/G^*$ is either $\trivgp$ or $G/G^*$.  

Now let $G_0=G$, and let $G_{k+1}=G_k{}^*$ unless $G_k=\trivgp$.
Since $G$ is finite and $G_k\psupgp G_{k+1}$, 
we must have $G_n=\trivgp$ for some $n$.
Then $(G_0,\dots,G_n)$ is the desired composition series.
\end{proof}

Two normal series are \textbf{equivalent} 
if they have the same \emph{multiset}\label{multiset} of 
(isomorphism classes of) nontrivial factors.
A \textbf{multiset} is a set in which repetitions of members are allowed.
For a formal definition,
we can say a multiset is a pair $(A,f)$,
where $A$ is a set and $f\colon A\to\N$.
For example, the two series
\begin{align*}
&(\Zmod{60},\gpgen2,\gpgen6,\gpgen{12},\trivgp),&
(\Zmod{60},\gpgen3,\gpgen{15},\gpgen{15},\gpgen{30},\trivgp)
\end{align*}
are equivalent, because the factors of the first 
are isomorphic to $\Zmod2$, $\Zmod3$, $\Zmod2$, and $\Zmod5$ respectively, 
and the factors of the second 
are isomorphic $\Zmod3$, $\Zmod5$, $\trivgp$, $\Zmod2$, and $\Zmod2$ respectively,
so each series has the same multiset of factors, namely
\begin{equation*}
\{\Zmod2,\Zmod2,\Zmod3,\Zmod5\}.
\end{equation*}
These series are \emph{not} equivalent to $(\Zmod{30},\gpgen2,\gpgen6,\trivgp)$,
whose factors are $\Zmod2$, $\Zmod3$, and $\Zmod5$.

If, from a normal series for a group, 
another normal series for the group can be obtained by deleting some terms,
then the former series is a \textbf{refinement}\index{refinement} of the latter.
For example,
the series $(\Zmod{60},\gpgen2,\gpgen4,\gpgen{12},\trivgp)$ 
is a refinement of $(\Zmod{60},\gpgen4,\gpgen{12},\trivgp)$.
Every normal series is a refinement of a normal series with no trivial factors,
and these two series are equivalent.
Among normal series with no trivial factors, 
composition series are \emph{maximal} in that they have no proper refinements.
If
\begin{gather*}
  G=G_0(0)\nsupgp G_0(1)\nsupgp G_0(2)\nsupgp\dotsb\nsupgp G_0(n_0)=\trivgp,\\
  G=G_1(0)\nsupgp G_1(1)\nsupgp G_1(2)\nsupgp\dotsb\nsupgp G_1(n_1)=\trivgp,
\end{gather*}
and the two normal series are equivalent
and have no trivial factors,
this means $n_0=n_1$, and there is $\sigma$ in $\Sym{n_0}$ such that 
\begin{equation*}
  G_0(i)/G_0(i+1)\cong G_1(\sigma(i))/G_1(\sigma(i)+1)
\end{equation*}
for each $i$ in $n_0$.  

\begin{theorem}
A soluble series for a finite group has a refinement 
in which the nontrivial factors are cyclic of prime order.
\end{theorem}

We now aim to prove Theorem~\ref{thm:JH} below.
The proof will use the following,
which is known as the Butterfly Lemma,
because the groups that it involves form the commutative diagram
in Figure~\ref{fig:butt}
(in which arrows are inclusions).
\begin{figure}[ht]
\begin{equation*}
  \xymatrix{
   &            H_0        &               & H_1                   &\\
   &            N_0H\ar[u] &               & N_1H\ar[u]            &\\
   &                       &H\ar[ul]\ar[ur]&                       &\\
   &N_0(H_0\cap N_1)\ar[uu]&               &N_1(H_1\cap N_0)\ar[uu]&\\
N_0\ar[ur]&&K\ar[ul]\ar[ur]\ar[uu]&&N_1\ar[ul]\\
&H_1\cap N_0\ar[ul]\ar[ur]&&H_0\cap N_1\ar[ul]\ar[ur]&
}
\end{equation*}
\caption{The Butterfly Lemma}\label{fig:butt}
\end{figure}


\begin{lemma}[Zassenhaus]\index{Zassenhaus
    Lemma}\index{Butterfly Lemma}\index{theorem!Zassenhaus Lemma}
  \index{theorem!Butterfly Lemma}\index{lemma|see{theorem}}
For a group $G$, suppose
\begin{align*}
N_0&\nsubgp H_0\subgp G,&
N_1&\nsubgp H_1\subgp G,
\end{align*}  
and let
\begin{align*}
K&=(H_0\cap N_1)(H_1\cap N_0),&
H&=H_0\cap H_1.
\end{align*}  
Then
\begin{equation*}
K\nsubgp H,
\end{equation*}
and for each $i$ in $2$, 
there is a well-defined epimorphism
\begin{equation*}
nh\mapsto Kh
\end{equation*}
from $N_iH$ to $H/K$ with kernel $N_i(H_i\cap N_{1-i})$.
Hence:
  \begin{compactenum}[1)]
    \item
$N_i(H_i\cap N_{1-i})\nsubgp N_iH$ for each $i$ in $2$, and
\item
the two groups $N_iH/N_i(H_i\cap N_{1-i})$ are isomorphic to one another.
  \end{compactenum}
\end{lemma}

\begin{proof}
For each $i$ in $2$, we have $H_i\cap N_{1-i}\nsubgp H$ 
by Theorem~\ref{thm:NGHG} (p.\ \pageref{thm:NGHG}).
Hence $K\nsubgp H$.
If $n,n'\in N_0$ and
$h,h'\in H$ and $nh'=n'h$, then
\begin{equation*}
  h'h\inv=n\inv n',
\end{equation*}
which is in $N_0\cap H$ and hence in $K$,
so that $Kh=Kh'$.  
Thus $nh\mapsto Kh$ (where $n\in N_0$ and $h\in H$) 
is indeed a well-defined homomorphism $f$ from
$N_0H$ into $H/K$.
It is clear that $f$ is surjective.

Now let $n\in N_0$ and $h\in H$,
and suppose $nh\in\ker(f)$,
that is, 
\begin{equation*}
h\in K.
\end{equation*}
Then $h=n_0n_1$ for some $n_0$ in $H_1\cap N_0$ and $n_1$ in $H_0\cap N_1$.
Hence $nh=nn_0n_1$, 
which is in $N_0(H_0\cap N_1)$.
Thus
\begin{equation*}
nh\in N_0(H_0\cap N_1).
\end{equation*}
Conversely, suppose this last condition holds.  
Since $h=n\inv nh$, we now have also
\begin{equation*}
h\in N_0(H_0\cap N_1).
\end{equation*}
so $h=n'h'$ for some $n'$ in $N_0$ 
and some $h'$ in $H_0\cap N_1$.  
Then $n'=h(h')\inv$, which is in $H(H_0\cap N_1)$;
but this is a subgroup of $H_1$.
So $n'\in N_0\cap H_1$, 
and therefore $n'h'$, which is $h$, is in $K$,
and so $nh\in\ker(f)$.
Thus $\ker(f)=N_0(H_0\cap N_1)$.
\end{proof}

\begin{theorem}[Schreier]\index{Schreier Theorem}
  \index{theorem!Schreier Th---}
  Any two normal series have equivalent refinements.
\end{theorem}

\begin{proof}
Suppose
\begin{equation*}
G=G_i(0)\nsupgp G_i(1)\nsupgp\dotsb\nsupgp G_i(n_i)=\trivgp,
\end{equation*}
where $i<2$.
In particular then,
\begin{align*}
G_0(j+1)&\nsubgp G_0(j)\subgp G,&
G_1(k+1)&\nsubgp G_1(k)\subgp G.
\end{align*}
Define
\begin{gather*}
  G_0(j,k)=G_0(j+1)\cdot\bigl(G_0(j)\cap G_1(k)\bigr),\\
	G_1(j,k)=G_1(k+1)\cdot\bigl(G_0(j)\cap G_1(k)\bigr),
\end{gather*}
where $(j,k)\in n_0\times n_1$.  
Then by the Butterfly Lemma
\begin{gather*}
  G_0(j)=G_0(j,0)\nsupgp\dotsb\nsupgp G_0(j,n_1)=G_0(j+1),\\
  G_1(k)=G_1(0,k)\nsupgp\dotsb\nsupgp G_1(n_0,k)=G_1(k+1),	
\end{gather*}
giving us normal series that are refinements of the original ones,
and also
\begin{equation*}
  G_0(j,k)/G_0(j,k+1)\cong G_1(j,k)/G_1(j+1,k),
\end{equation*}
so that the two refinements are equivalent.
\end{proof}

\begin{theorem}[Jordan--H\"older]\label{thm:JH}\index{Jordan--H\"older
    Theorem}\index{theorem!Jordan--H\"older Th---}
  Any two composition series of a group are equivalent.
\end{theorem}

\begin{proof}
By Schreier's Theorem, any two composition series of a group have equivalent refinements;
but every refinement of a composition series is already equivalent to that series.
\end{proof}

Combining this with Theorem~\ref{thm:comp}, 
we have that every finite group determines a multiset 
of finite simple groups,
and these are just the nontrivial factors 
of any composition series of the group.
Hence arises the interest in the classification of the finite simple groups:
it is like studying the prime numbers.


\part{Rings}

\chapter{Rings}

\section{Rings}\label{sect:nna-rings}

We defined associative rings 
in \S\ref{sect:rings} (p.\ \pageref{sect:rings}).
Now we define rings in general.
If $E$ is an abelian group (written additively), 
then a \textbf{multiplication}\index{multiplication} on $E$ 
is a binary operation $\cdot$ that distributes in both senses over addition,
so that
\begin{align*}
  x\cdot(y+z)&=x\cdot y+x\cdot z,&
(x+y)\cdot z&=x\cdot z+y\cdot z.
\end{align*}
A \textbf{ring}\index{ring} is an abelian group with
a multiplication.  
In particular, 
if $(R,1,\cdot)$ is an associative ring,
then $(R,\cdot)$ is a ring.
However, rings that are not (reducts of) associative rings
are also of interest:
see the next section.

\begin{theorem}\label{thm:0x0}
Every ring satisfies the identities
\begin{align*}
  (x-y)\cdot z&=x\cdot z-y\cdot z,&
x\cdot(y-z)&=x\cdot y-x\cdot z.
\end{align*}
Hence, in particular,
\begin{gather*}
    0\cdot x=0=x\cdot 0,\\
(-x)\cdot y=-(x\cdot y)=x\cdot(-y).
\end{gather*}
\end{theorem}

By Theorem~\ref{thm:exp-in-groups} (p.\ \pageref{thm:exp-in-groups}),
given an abelian group $E$, we have a homomorphism $n\mapsto(x\mapsto nx)$
from the monoid $(\Z,1,\cdot)$ to the monoid $(E^E,\id_E,\circ)$.
This is actually a homomorphism of associative rings:

\begin{theorem}\label{thm:Z-action}
For every abelian group $E$,
\begin{equation*}
n\mapsto (x\mapsto nx)\colon(\Z,0,-,+,1,\cdot)\to(\End E,\id_E,\circ).
\end{equation*}
%the map $n\mapsto (x\mapsto nx)$ is a homomorphism from $(\Z,0,-,+,1,\cdot)$ to $(\End E,\id_E,\circ)$.
In particular,
\begin{align*}
0x&=0,&1x&=x,&(-1)x&=-x.
\end{align*}
\end{theorem}

In the theorem, if the abelian group has a multiplication, then
\begin{equation*}
  0\cdot x=0x,
\end{equation*}
where the zeros come from the ring and from $\Z$ respectively.
If, further, the multiplication has the identity $1$, then
\begin{equation*}
1\cdot x=1x.
\end{equation*}
More generally, we have

\begin{theorem}
For every integer $n$, every ring satisfies the identities
  \begin{equation*}
(nx)\cdot y=n(x\cdot y)=x\cdot ny.
  \end{equation*}
\end{theorem}

The kernel of the homomorphism in Theorem~\ref{thm:Z-action}
is $\gpgen k$ for some $k$ in $\upomega$,
by Theorem~\ref{thm:Z-subg} (p.\ \pageref{thm:Z-subg}).
Then $k$ can then be called the \textbf{characteristic} of $E$.
For example, if $n\in\N$, then $\Zmod n$ has characteristic $n$,
while $\Z$ has characteristic $0$.


\begin{theorem}
If $(E,1,\cdot)$ is a ring with a multiplicative identity $1$,
then
\begin{equation*}
n\mapsto n1\colon(\Z,0,-,+,1,\cdot)\to(E,1,\cdot).
\end{equation*}
The kernel of this homomorphism is $\gpgen k$,
where $k$ is the characteristic of~$E$.
\end{theorem}


\begin{theorem}
  Every ring embeds in a ring with identity having the same
  characteristic, and in a ring with identity having characteristic $0$.
\end{theorem}

\begin{proof}
 Suppose $R$ is a ring of characteristic $n$.  Let $A$ be $\Z$ or
 $\Zmod n$, and give $A\oplus R$ the multiplication defined by
 \begin{equation*}
   (m,x)(n,y)=(mn,my+nx+xy);
 \end{equation*}
then $(1,0)$ is an identity, and $x\mapsto(0,x)$ is an embedding.
\end{proof}


\section{Examples}\label{sect:non-assoc}

The continuous functions on $\R$ with compact support 
compose a ring with respect to the operations induced from $\R$.
Multiplication in this ring is associative,
but there is no identity. 

If $n>1$, then $\gpgen n$ is a sub-ring of $\Z$ with no identity.

On p.\ \pageref{Ham} we obtained $\Ham$ 
as the sub-ring of $\Mat[2\times2]{\C}$
that is the image of $\C\oplus\C$ under the group-homomorphism
 \begin{equation*}
(x,y)\mapsto
\begin{pmatrix}
  x&y\\-\bar y&\bar x
\end{pmatrix}.
 \end{equation*}
We also defined
\begin{equation*}
  \mj=
  \begin{pmatrix}
    0&1\\-1&0
  \end{pmatrix},
\end{equation*}
so that every element of $\Ham$ is $z+w\mj$ 
for some unique $(z,w)$ in $\C^2$.
Then $\Ham$ has the automorphism $z+w\mj\mapsto\overline{z+w\mj}$, where
\begin{equation*}
\overline{z+w\mj}=\bar z-w\mj.
\end{equation*}
then the same construction that creates $\Ham$ out of $\C$
can be applied to $\Ham$ itself, 
yielding the ring $\Oct$ of \textbf{octonions;}\index{octonion}
but this ring is not associative.  


In any ring $(E,\cdot)$, we define
\begin{equation*}
[x,y]=x\cdot y-y\cdot x; 
\end{equation*}
Then the binary operation $(x,y)\mapsto[x,y]$
is also a multiplication on $E$.
This operation can be called the \textbf{Lie bracket.}
We have
\begin{equation}\label{eqn:xx0}
  [x,x]=0.
\end{equation}

\begin{theorem}
In an associative ring,
\begin{equation}\label{eqn:xyz}
\bigl[[x,y],z\bigr]=\bigl[x,[y,z]\bigr]-\bigl[y,[x,z]\bigr].
\end{equation}
\end{theorem}

The identity \eqref{eqn:xyz} is called the \textbf{Jacobi identity.}
A \textbf{Lie ring} is a ring whose multiplication 
has the properties of the Lie bracket 
given by the identities~\eqref{eqn:xx0} and~\eqref{eqn:xyz}.
if $(E,1,\cdot)$ is an associative ring, 
and $\bracket$ is the Lie bracket in this ring,
then $(E,\bracket)$ is a Lie ring.
However, we shall see presently
that there are Lie rings that do not arise in this way.

If $(E,\cdot)$ is a ring,
and $D$ is an element of $\End E$ satisfying the \textbf{Leibniz rule}
\begin{equation*}
D(x\cdot y)=Dx\cdot y+x\cdot Dy,
\end{equation*}
then $D$ is called a \textbf{derivation} of $(E,\circ)$.\label{derivation}
For example, 
let $\diff$ be the set of all infinitely differentiable functions 
from $\R$ to itself.
This is an associative ring in the obvious way.
Then differentiation is a derivation of $\diff$.

\begin{theorem}
The set of derivations of a ring $(E,\cdot)$
is the universe of an abelian subgroup of $\End E$
and is closed under the bracket
\begin{equation*}
(X,Y)\mapsto X\circ Y-Y\circ X.
\end{equation*}
\end{theorem}

The abelian group of derivations of a ring $(E,\cdot)$
can be denoted by
\begin{equation*}
  \Der{E,\cdot}.
\end{equation*}
Then $(\Der{E,\cdot},\bracket)$ is a sub-ring of $\End E,\bracket)$,
but is not generally closed under $\circ$.


\section{Associative rings}


We know from Theorem~\ref{thm:units} (p.\ \pageref{thm:units})
that an associative ring $(R,1,\cdot)$ has a group of units, $\units R$.  
In particular, in an associative ring,
when an element has both a left and a right inverse,
they are equal.
However, the example on p.\ \pageref{exa:no-unit} 
shows that some ring elements can have right inverses that are not units.  

A \textbf{zero-divisor}\index{zero-divisor}\index{divisor!zero ---} 
of the ring $R$ is a nonzero element $b$ 
such that the equations
\begin{align*}
  bx&=0,&yb&=0
\end{align*}
have nonzero solutions in $R$.  
So zero-divisors are not units. 
For example, if $m>1$ and $n>1$, 
then $m+\gpgen {mn}$ and $n+\gpgen{mn}$ are zero-divisors in $\Zmod {mn}$.  
The unique element of the trivial ring $\Zmod 1$ is a unit, 
but not a zero-divisor.

A commutative ring is an
\textbf{integral domain}\index{integral domain}\index{domain!integral ---}%
\index{ring|seealso{domain}} if it has no zero-divisors and $1\neq0$.
If $n\in\N$, the ring $\Zmod n$ is an integral domain 
if and only if $n$ is prime.%%%%%
\footnote{Lang refers to integral domains 
as \emph{entire} rings \cite[p.~91]{Lang-alg}.
It would appear that integral domains 
were originally subgroups of $\C$
that are closed under multiplication 
\emph{and} that include the integers \cite[p.~47]{Cohn-ANT}.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Hence the characteristic of an integral domain must be prime or $0$.
Fields are integral domains,
but $\Z$ is an integral domain that is not a field.
If $p$ is prime, then, 
by Theorem~\ref{thm:p-prime} (p.\ \pageref{thm:p-prime}),
$\Zmod p$ is a field,
and as such it is denoted by
\begin{equation*}
  \F_p.
\end{equation*} 

An arbitrary associative ring $R$ 
such that $R\setminus\units R=\{0\}$ 
is a \textbf{division ring.}\index{division ring}  
So fields are division rings;
but $\Ham$ is a non-commutative division ring.

If $R$ is an associative ring, and $G$ is a group, 
we can form the direct sum $\sum_{g\in G}R$, 
which is, first of all, an abelian group.
It becomes a module over $R$
(in the sense of sub-\S\ref{subsect:mod}, p.\ \pageref{subsect:mod})
when we define
\begin{equation*}
  r\cdot(x_g\colon g\in G)=(r\cdot x_g\colon g\in G)
\end{equation*}
for all $r$ in $R$ and $(x_g\colon g\in G)$ in $\sum_{g\in G}R$.
If $g\in G$, we have the canonical injection $\upiota_g$ 
of $R$ in $\sum_{g\in G}R$ as defined on p.\ \pageref{caninj}.
Let us denote $\upiota_g(1)$ also by
\begin{equation*}
  g.
\end{equation*}
Then
\begin{equation*}
(r_g\colon g\in G)=\sum_{g\in G}r_g\cdot g.
\end{equation*}
Thus an element of $\sum_{g\in G}R$ 
becomes a \textbf{formal $R$-linear combination}\index{linear combination}  
of elements of $G$.  
Then multiplication on $\sum_{g\in G}R$ is defined in an obvious way:
if $r_i\in R$ and $g_i\in G$ for each $i$ in $2$, then
\begin{equation*}
  (r_0\cdot g_0)(r_1\cdot g_1)=r_0r_1\cdot g_0g_1.
\end{equation*}
The definition extends to all of $\sum_{g\in G}R$ by distributivity.  
The resulting ring can be denoted by
\begin{equation*}
  R(G);
\end{equation*}
it is the \textbf{group ring}\index{group ring} of $G$ over $R$.

We can do the same construction with monoids, rather than groups.  
For example, if we start with the free monoid generated by a symbol $X$, 
we get a \textbf{polynomial ring}\index{polynomial ring}\label{poly-ring} 
in one variable, denoted by 
\begin{equation*}
  R[X];
\end{equation*}
this is the ring of formal $R$-linear combinations of powers of $X$.
Such combinations can be written as
\begin{equation*}
  \sum_{k<n}a_kX^k,
\end{equation*}
where $(a_k\colon k<n)\in R^n$, where $n\in\upomega$.  
In case $n=0$, the indicated combination is $0$;
in case $n=m+1$, the combination can be written as one of
\begin{align*}
&\sum_{k=0}^ma_kX^k,&&a_0+a_1X+a_2X^2+\dots+a_mX^m.
\end{align*}
This combination too is $0$ when each $a_k$ is $0$.
We could use a second variable,
getting for example $R[X,Y]$,
which is just $R[X][Y]$.  
Usually $R$ here is commutative and is in particular a field
or at least an integral domain.
We shall develop the theory of polynomial rings 
in \S\ref{sect:poly-rings} (p.\ \pageref{sect:poly-rings}),
but shall use them meanwhile as examples.

\section{Ideals}

Suppose $(R,0,-,+,\cdot)$ is a ring,
and $\sim$ is a congruence-relation on $(R,+,\cdot)$.
By Theorem~\ref{thm:ring-q} on p.\ \pageref{thm:ring-q}, 
$\sim$ is a congruence-relation on the ring.
(The theorem is stated for associative rings,
but does not require the associativity.)
If $A=\{x\in R\colon x\sim0\}$, 
then by Theorem~\ref{thm:subgp-ex} (p.\ \pageref{thm:subgp-ex}),
$A$ is a \emph{subgroup} of $R$, that is,
\begin{equation*}
(A,0,-,+)\subgp(R,0,-,+).
\end{equation*}
Similarly, $A$ is even a sub-ring of $R$, that is,
in addition to being a subgroup, it is closed under multiplication.
We have
\begin{align*}
b\sim x
&\iff b-x\sim0\\
&\iff b-x\in A\\
&\iff b+A=x+A.
\end{align*}
In short,
\begin{equation*}
b\sim x\iff b+A=x+A.
\end{equation*}
Conversely, given a sub-ring $A$ of $R$,
we can use the last equivalence as a definition of $\sim$.
Then $\sim$ is an equivalence-relation on $R$
by Corollary~\ref{cor:cosets-1} (p.\ \pageref{cor:cosets-1}),
and by this and Theorem~\ref{thm:n} (p.\ \pageref{thm:n}),
$\sim$ is even a congruence-relation on $R$ \emph{as a group.}
However, $\sim$ need not be a congruence-relation on $R$ as a ring.
That is, it may not be possible to define a multiplication on $R/A$ by
\begin{equation}\label{eqn:q-mul}
  (x+A)(y+A)=xy+A.
\end{equation}
For example, we cannot use this to define a multiplication on $\Q/\Z$,
since for example
\begin{align*}
\frac12+\Z&=\frac32+\Z,&\frac14+\Z&\neq\frac34+\Z.
\end{align*}

\begin{theorem}
Suppose $R$ is a ring and $A$ is a sub-ring.
The group $R/A$ expands to a ring with multiplication as in~\eqref{eqn:q-mul}
if and only if
\begin{equation}\label{eqn:raar}
r\in R\And a\in A\implies ra\in A\And ar\in A.
\end{equation}
\end{theorem}

\begin{proof}
If $R/A$ does expand to a ring, and $a\in A$, 
then $a+A$ is $0$ in this ring, 
and hence so are $ra+A$ and $ar+A$ by Theorem~\ref{thm:0x0},
so that \eqref{eqn:raar} holds.
Conversely, suppose this holds.
If $a+A=x+A$ and $b+A=y+A$, then $A$ contains $a-x$ and $b-y$,
so $A$ contains also
\begin{equation*}
(a-x)\cdot y+a\cdot(b-y),
\end{equation*}
which is $ab-xy$, so $ab+A=xy+A$.
\end{proof}

Under the equivalent conditions of the theorem,
$A$ is called an \textbf{ideal} of $R$.
The historical reason for the name 
is suggested in \S\ref{sect:ant} (p.\ \pageref{sect:ant}).
Meanwhile, he have the following counterpart 
of Theorem~\ref{thm:n-ker} (p.\ \pageref{thm:n-ker}).

\begin{theorem}
A sub-ring of a ring $R$ is an ideal of $R$ 
if and only if it is the kernel of a homomorphism on $R$.
\end{theorem}

We can express \eqref{eqn:raar} as
\begin{align*}
RA&\included A,&AR&\included A.
\end{align*}
If only one of these holds,
then $A$ is called respectively
a \textbf{left ideal}\index{left!--- ideal}\index{ideal!left ---}
of $R$ 
or a \textbf{right ideal} of $R$.
However, left ideals and right ideals are not kinds of ideals;
rather, an ideal is a left ideal that is also a right ideal.
One may therefore refer to ideals as \textbf{two-sided ideals.}

For example,
the set of matrices
\begin{equation*}
  \begin{pmatrix}
    * & 0 & \dots & 0\\
\vdots & \vdots & & \vdots\\
* & 0 & \dots & 0
  \end{pmatrix}
\end{equation*}
is a left ideal of $\MatR$, but not a right ideal unless $n=1$.
Also, for every element $a$ of an \emph{associative} ring $R$, 
the subset $Ra$ is a left ideal of $R$, 
while $RaR$ is a two-sided ideal.

We have the following counterpart to Theorem~\ref{thm:hom-n} for groups.

\begin{theorem}
If $f$ is a homomorphism from a ring $R$ to a ring $S$, 
and $I$ is a two-sided ideal of $R$ included in $\Ker f$, 
then there is a unique homomorphism $\tilde f$ from $R/I$ to $S$ 
such that $f=\tilde f\circ\uppi$.
\end{theorem}

Hence the isomorphism theorems, as for groups.


Suppose $(A_i\colon i\in I)$ is an indexed family of left ideals of a ring $R$.  
Let the abelian subgroup of $R$ generated by $\bigcup_{i\in I}A_i$ be denoted by 
\begin{equation*}
  \sum_{i\in I}A_i;
\end{equation*}
this is the \textbf{sum}\index{sum} of the left ideals $A_i$.  
This must not be confused with the \emph{direct sums} 
defined in \S \ref{sect:sum} (p.\ \pageref{sect:sum}).

Given a \emph{finite} indexed family $(A_0,\dots,A_{n-1})$
of left ideals of an \emph{associative} ring $R$,
we let the abelian subgroup of $R$ generated by
\begin{equation*}
  \{a_0\dotsm a_{n-1}\colon a_i\in A_i\}
\end{equation*}
be denoted by
\begin{equation*}
  A_0\dotsb A_{n-1};
\end{equation*}
this is the \textbf{product}\index{product} of the left ideals $A_i$.


\begin{theorem}
Sums and finite products of left ideals are left ideals; sums and
products of two-sided ideals are two-sided ideals.  Addition and
multiplication of ideals are associative; addition is commutative;
multiplication distributes over addition.
\end{theorem}

\begin{theorem}
If $A$ and $B$ are left ideals of a ring, then so is $A\cap B$.
If they are two-sided ideals, then $AB\included A\cap B$.
\end{theorem}

Usually $AB$ does not include $A\cap B$, 
since for example $A^2$ might not include $A$; 
such is the case when $A=2\Z$, since then $A^2=4\Z$.


\chapter{Commutative rings}

Throughout this chapter, ``ring'' means commutative ring. 
We shall often identify properties of $\Z$ 
and then consider arbitrary rings with these properties.
If $R$ is a ring (that is, a commutative ring) with an ideal $I$,
and $a+I=b+I$, we may write this as
\begin{equation*}
a\equiv b\pmod I.
\end{equation*}

\section{Commutative rings}\label{sect:comm}

A subset $A$ of a ring $R$ determines the ideal denoted by
\begin{equation*}
  (A),
\end{equation*}
namely the smallest ideal including $A$.  This consists of the
\textbf{$R$-linear combinations}\index{linear combination} of elements
of $A$, namely the well-defined sums
\begin{equation*}
\sum_{a\in
  A}r_aa, 
\end{equation*}
where $r_a\in R$; 
in particular, $r_a=0$ for all but finitely many $a$. 
If $A=\{a_i\colon i<n\}$, then $(A)$ can be written as one of
\begin{align*}
  &(a_i\colon i<n),&&Ra_0+\dots+Ra_{n-1}.
\end{align*}
In particular, if $A=\{a\}$, then $(A)$ is denoted by one of
\begin{align*}
  &(a),&&Ra
\end{align*}
and is called a \textbf{principal ideal.}%%%%%
\index{principal!--- ideal}\index{ideal!principal ---}  
Then
\begin{equation*}
(a_i\colon i<n)=(a_0)+\dots+(a_{n-1}).
\end{equation*}
In $\Z$, the ideal $(a)$ is the same as the subgroup $\gpgen a$.
Therefore every ideal of $\Z$ is principal,
by Theorem~\ref{thm:Z-subg} (p.\ \pageref{thm:Z-subg}).
A \textbf{principal ideal domain}%%%%%
\index{principal!--- ideal domain}\index{domain!principal ideal ---} 
or \textbf{\pid} is an integral domain whose every ideal is principal.
Thus $\Z$ is a \pid,
but the polynomial ring $\Q[X,Y]$ is not,
since the ideal $(X,Y)$ is not principal. 

An ideal of a ring is \textbf{proper} if it is not the whole ring.
A ring has a unique improper ideal, namely itself, which can be written as
\begin{equation*}
(1).
\end{equation*}
Thus an ideal is proper if and only if it does not contain $1$.
When $A$ is the empty subset of a ring,
then the ideal $(A)$, which is $\{0\}$, is usually denoted by
\begin{equation*}
(0).
\end{equation*}
This can be counted as a principal ideal.
Considering Euclid's Lemma 
(Theorem \ref{thm:Euc-Lem}, p.\ \pageref{thm:Euc-Lem}),
and noting that, in $\Z$,
\begin{equation*}
a\divides b\iff b\in(a),
\end{equation*}
we refer to a \emph{proper} ideal $P$ of a ring $R$ as
\begin{compactitem}
\item
\textbf{prime,}\index{prime}\label{prime} 
if for all $a$ and $b$ in $R$,
\begin{equation}\label{eqn:p-ideal}
  ab\in P\And a\notin P\implies b\in P;
\end{equation}
\item
\textbf{maximal,} 
if for all ideals $J$ of $R$,
\begin{equation*}
I\pincluded J\implies J=R.
\end{equation*}
\end{compactitem}

\begin{theorem}
Let $R$ be a ring.
\begin{compactenum}
\item
$R$ is an integral domain $\iff$ $(0)$ is a prime ideal.
\item
$R$ is a field $\iff$ $(0)$ is a maximal ideal.
\end{compactenum}
More generally, for an arbitrary ideal $I$ of $R$:
\begin{compactenum}\setcounter{enumi}2
\item
$R/I$ is an integral domain $\iff$ $I$ is a prime ideal.
\item
$R/I$ is a field $\iff$ $I$ is a maximal ideal.
\end{compactenum}
\end{theorem}

\begin{proof}
\begin{asparaenum}
\item
This is immediate from the definitions of integral domain and prime ideal,
once we note that $x\in(0)$ means $x=0$.
\item
If $R$ is a field and $(0)\pincluded I$,
then $I\setminus(0)$ contains some $a$, 
and then $a\inv\cdot a\in I$, so $I=R$.
Conversely, if $(0)$ is maximal, then for all $a$ in $R\setminus(0)$
we have $(a)=(1)$, so $a$ is invertible.
\item
The ideal $(0)$ of $R/I$ is $\{I\}$, and
\begin{equation*}
(a+I)(b+I)=I\iff ab\in I.
\end{equation*}
\item
By Theorem~\ref{thm:GNKN} (p.\ \pageref{thm:GNKN}),
every ideal of $R/I$ is $J/I$ for some subgroup $J$ of $R$.
Moreover, this $J$ must be an ideal of $R$.
In this case, $J$ is maximal if and only if $J/I$ is a maximal ideal of $R/I$.\qedhere
\end{asparaenum}
\end{proof}

\begin{corollary}
  Maximal ideals are prime.
\end{corollary}

The prime ideals of $\Z$ are precisely the ideals $(0)$ and $(p)$,
where $p$ is prime.
Indeed, $(0)$ is prime because $\Z$ is an integral domain,
and if $p$ is prime, then $\Zmod p$ is the field $\F_p$,
so $(p)$ is even maximal.
If $n>1$ and is not prime, 
so that $n=ab$ for some $a$ and $b$ in $\{2,\dots,n-1\}$,
then $a$ and $b$ are zero-divisors in $\Zmod n$,
so $(n)$ is not prime.

The converse of the corollary fails easily, 
since $(0)$ is a prime but non-maximal ideal of $\Z$.
However, every prime ideal of $\Z$ other than $(0)$ is maximal.
This is not the case for $\Q[X,Y]$,
which has the prime but non-maximal ideal $(X)$.

In some rings, \emph{every} prime ideal is maximal.
Such is the case for fields, since their only proper ideals are $(0)$.
It is also the case for \emph{Boolean rings.}
A ring is called \textbf{Boolean}\index{Boolean} if it satisfies the identity
  \begin{equation*}
    x^2=x.
  \end{equation*}
In defining ultraproducts in \S\ref{sect:ultra} (p.\ \pageref{sect:ultra}),
we shall use the example established by the following:
  
\begin{theorem}\label{thm:Br}
if $\Omega$ is a set, 
then $\pow{\Omega}$\label{pow} is a Boolean ring, where 
\begin{align*}
X\cdot Y&=X\cap Y,&X+Y&=(X\setminus Y)\cup(Y\setminus X).
\end{align*}
\end{theorem}

\begin{theorem}\label{thm:Br-2}
Every Boolean ring in which $0\neq1$ has characteristic $2$.
\end{theorem}

\begin{proof}
In a Boolean ring, $2x=(2x)^2=4x^2=4x$, so 
\begin{equation*}
2x=0.  \qedhere
\end{equation*}
\end{proof}

The following will be generalized 
by Theorem~\ref{thm:reg-pr-max} (p.\ \pageref{thm:reg-pr-max}).

\begin{theorem}\label{thm:Boole}
In Boolean rings, all prime ideals are maximal.
\end{theorem}

  \begin{proof}
In a Boolean ring,
\begin{equation*}
x\cdot(x-1)=x^2-x=x-x=0,
\end{equation*}
so every $x$ is a zero-divisor unless $x$ is $0$ or $1$.
Therefore there are no Boolean integral domains besides $\{0,1\}$,
which is the field $\F_2$.
\end{proof}

In $\Z$, by Theorem~\ref{thm:Z-subg} (p.\ \pageref{thm:Z-subg}),
the ideal $(a,b)$ is the principal ideal generated by $\gcd(a,b)$.  
So $a$ and $b$ are relatively prime if and only if $(a,b)=\Z$.  
We can write this condition as
\begin{equation*}
(a)+(b)=\Z.
\end{equation*}
Then the following generalizes Theorem~\ref{thm:CRT}
(p.\ \pageref{thm:CRT}).

\begin{theorem}[Chinese Remainder Theorem]\label{thm:CRT-R}\index{Chinese
    Remainder Theorem} 
  \index{theorem!Chinese Remainder Th---}
Suppose $R$ has an indexed family $(I_i\colon i<n)$ of ideals such that
\begin{equation*}
i<j<n\implies I_i+I_j=R.
\end{equation*}
The monomorphism 
  \begin{equation}\label{eqn:xxx}
  x+\bigcap_{i<n}I_i\mapsto(x+I_i\colon i<n)
  \end{equation}
from $R/\bigcap_{i<n}I_i$ to $\sum_{i<n}R/I_i$
is an isomorphism.
That is, 
every system
\begin{equation*}
%\bigwedge_{i<n}\bigl(x\equiv a_i\pmod{I_i}\bigr)
\bigl(x\equiv a_0\pmod{I_0}\bigr)\And\dots\And\bigl(x\equiv a_{n-1}\pmod{I_{n-1}}\bigr)
\end{equation*}
of congruences has a solution in $R$, 
and the solution is unique \emph{modulo} $I_0\cap\dots\cap I_{n-1}$.
\end{theorem}

\begin{proof}
We proceed by induction.  The claim is trivially true when $n=1$.
In case $n=2$, we have $b_0+b_1=1$ for some $b_0$ in $I_0$ and $b_1$ in $I_1$.  
Then
\begin{align*}
  b_0&\equiv 0\pmod{I_0},&b_0&\equiv1\pmod{I_1},\\
  b_1&\equiv 1\pmod{I_0},&b_1&\equiv0\pmod{I_1}.
\end{align*}
Therefore
\begin{align*}
  b_1a_0+b_0a_1&\equiv a_0\pmod{I_0},&
  b_1a_0+b_0a_1&\equiv a_1\pmod{I_1}.
\end{align*}
Thus $(a_0+I_0,a_1+I_1)$ is in the image of the map in~\eqref{eqn:xxx}.

Finally, if the claim holds when $n=m$,
then it holds when $n=m+1$ by the proof of the case $n=2$,
once we note that if
\begin{equation*}
a_i+b_i=1
\end{equation*}
for some $a_i$ in $I_i$ and $b_i$ in $I_m$ for each $i$ in $m$, then
\begin{equation*}
\prod_{i<m}(a_i+b_i)=1;
\end{equation*}
but this product%%%%%
\footnote{The technique of multiplying elements of sums of ideals
will be used also in proving Lemma~\ref{lem:nilrad}, p.\ \pageref{lem:nilrad}.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
is the sum of $\prod_{i<m}a_i$ and an element of $I_m$, and
\begin{equation*}
\prod_{i<m}a_i\in\bigcap_{i<m}I_i.\qedhere
\end{equation*}
\end{proof}

\section{Division}\label{sect:factor}

As in $\Z$ (p.\ \pageref{divides}), so in an arbitrary ring $R$, 
an element $a$ is called a \textbf{divisor}\index{divisor} 
or \textbf{factor} of an element $b$, 
and $a$ is said to \textbf{divide}\index{divides} $b$, 
and we write
\begin{equation*}
  a\divides b,
\end{equation*}
if the equation
\begin{equation*}
ax=b
\end{equation*}
is soluble in $R$.  
Two elements of $R$ that divide each other
can be called \textbf{associates.}\index{associates}
Zero is an associate only of itself.

\begin{theorem}\label{thm:div}
  In any ring:
  \begin{compactenum}
    \item
$a\divides b \iff (b)\included (a)$;
\item
$a$ and $b$ are associates if and only if $(a)=(b)$.
  \end{compactenum}
Suppose $a=bx$.
\begin{compactenum}\setcounter{enumi}{2}
\item
If $x$ is a unit, then $a$ and $b$ are associates.
\item
If $b$ is a zero-divisor or $0$, then so is $a$.
\item
If $a$ is a unit, then so is $b$.
  \end{compactenum}
\end{theorem}

For example, in $\Zmod 6$, the elements $1$ and $5$ are units; the
other non-zero elements are zero-divisors.  Of these, $2$ and $4$ are
associates, since 
\begin{align}\label{eqn:2.2=4}
2\cdot 2&\equiv4,&4\cdot 2&\equiv 2\pmod 6; 
\end{align}
but $3$
is not an associate of these.  

We now distinguish the properties of certain ring-elements 
that, by Euclid's Lemma (p.\ \pageref{thm:Euc-Lem}),
are the same in $\Z$. 
In an arbitrary ring $R$, an element $\pi$\label{pi-2} 
that is neither $0$ nor a unit is called
\begin{compactitem}
\item
\textbf{irreducible,} if for all $a$ and $b$ in $R$,
\begin{equation*}
\pi=ab\And a\notin\units R\implies b\in\units R;
\end{equation*}
\item
\textbf{prime,} if for all $a$ and $b$ in $R$,
\begin{equation*}
\pi\divides ab\And\pi\ndivides a\implies\pi\divides b.
\end{equation*}
\end{compactitem}

\begin{theorem}
A nonzero ring-element $\pi$ is
\begin{compactenum}[1)]
\item
irreducible $\iff$ $(\pi)$ is maximal among the proper principal ideals;
\item
prime $\iff$ $(\pi)$ is prime.
\end{compactenum}
\end{theorem}

For example, in $\Q[X,Y]$, the element $X$ is both irreducible and prime, 
although $(X)$ is not a maximal ideal.  
However, if $(X)\included(f)\pincluded\Q[X,Y]$, 
then $f$ must be constant in $Y$, 
and then it must have degree $1$ in $X$, 
and then its constant term must be $0$; 
so $f$ is just $aX$ for some $a$ in $\units{\Q}$,
and thus $(X)=(f)$.

If $\pi$ is irreducible \emph{or} prime, and $\pi=ab$, 
then $\pi$ is an associate of $a$ or $b$.
However, neither irreducibility nor primality implies the other.
For example,
in $\Zmod6$, the element $2$ is prime.  
Indeed, $(2)=\{0,2,4\}$, so $\Zmod6\setminus(2)=\{1,3,5\}$,
and the product of no two of these elements is in $(2)$.  
Similarly, $4$ is prime.  
However, $2$ and $4$ are not irreducible, by~\eqref{eqn:2.2=4} above.

Also, in $\C$ we have
\begin{equation}\label{eqn:236}
2\cdot 3=(1+\sqrt{-5})(1-\sqrt{-5}).
\end{equation}
The factors $2$, $3$, and $1\pm\sqrt{-5}$ 
are all irreducible in the smallest sub-ring of $\C$ that contains $\sqrt{-5}$, 
but none of these factors divides another,
and so these factors cannot be prime.  
Details are worked out in the next section.

\section{*Quadratic integers}\label{sect:ant}

Every subfield of $\C$ includes $\Q$,
and every sub-ring of $\C$ includes $\Z$.
If $\omega\in\C$,\label{omega} 
then the smallest subfield of $\C$ that contains $\omega$ is denoted by
\begin{equation*}
\Q(\omega),
\end{equation*}
and the smallest sub-ring of $\C$ that contains $\omega$ is denoted by
\begin{equation*}
\Z[\omega].
\end{equation*}
A \textbf{squarefree} integer, 
is an element of $\Z$ different from $1$ 
that is not divisible by the square of a prime number.  
Suppose $D$ is such.
As groups,
\begin{compactitem}
\item
$\Z[\sqrt D]$ is the free abelian group $\gpgen{1,\sqrt D}$,
\item
$\Q(\sqrt D)$ is the image of $\Q\oplus\Q$ under
$(x,y)\mapsto x+y\sqrt D$.
\end{compactitem}
If $x=k+n\sqrt D$ for some $k$ and $n$ in $\Z$,
then
\begin{gather*}
(x-k)^2=n^2D,\\
x^2-2kx+k^2-n^2D=0.
\end{gather*}
Thus all elements of $\Z[\sqrt D]$ 
are solutions in $\Q(\sqrt D)$ of quadratic equations
\begin{equation}\label{eqn:x^2}
x^2+bx+c=0, 
\end{equation}
where $b$ and $c$ are in $\Z$,
and there is no leading coefficient.%%%%%
\footnote{\label{note:ring}If $\xi$ is a solution of such an equation,
so that $\xi^2=-b\xi-c$, David Hilbert referred to the group $\gpgen{1,\xi}$
as a \emph{number ring} (\emph{Zahlring}) \cite[p.~49]{Cohn-ANT}.
This is apparently the origin of our term \emph{ring.}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Conversely, from school the solutions of~\eqref{eqn:x^2} are
\begin{equation*}
x=\frac{-b\pm\oldsqrt{b^2-4c}}2.
\end{equation*}
Suppose one of these is in $\Q(\sqrt D)$.  
Then $b^2-4c=a^2D$ for some $a$ in $\Z$, so that
\begin{equation*}
x=\frac{-b\pm a\sqrt D}2.
\end{equation*}
If $b$ is odd, then $b^2-4c\equiv1\pmod 4$, so $a$ must be odd and $D\equiv1\pmod 4$.  
If $b$ is even, then $b^2-4c\equiv0\pmod 4$, so $a$ is even.
Assume now
\begin{equation*}
D\not\equiv1\pmod4.
\end{equation*}
Then $\Z[\sqrt D]$ consists precisely of the solutions in $\Q(\sqrt D)$
of equations of the form~\eqref{eqn:x^2}.
Therefore the elements of $\Z[\sqrt D]$ 
are called the \textbf{integers of} $\Q(\sqrt D)$.%%%%%
\footnote{In case $D\equiv1\pmod4$, the integers of $\Q(\sqrt D)$
constitute the ring $\Z[(1+\sqrt D)/2]$.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
In this context, the elements of $\Z$ are the integers of $\Q$,
or the \textbf{rational integers.}
Note that $\Z[\sqrt D]\cap\Q=\Z$.

The field $\Q(\sqrt D)$ has one nontrivial automorphism,
namely $z\mapsto z'$,
where
\begin{equation*}
(x+y\sqrt D)'=x-y\sqrt D.
\end{equation*}
In case $D<0$, this automorphism is
complex conjugation.
In any case, we next define a function $N$ from $\Q(\sqrt D)$ to $\Q$ by
\begin{equation*}
N(z)=zz'.
\end{equation*}
Here $N(z)$ can be called the \textbf{norm} of $z$.\label{norm}
The function $N$ is multiplicative, that is, 
\begin{equation*}
N(\alpha\beta)=N(\alpha)\cdot N(\beta).  
\end{equation*}
Also,
\begin{equation*}
N(x+\sqrt D y)=x^2-Dy^2,
\end{equation*}
so $N$ maps $\Z[\sqrt D]$ into $\Z$.  
In particular, if $\alpha$ is a unit of $\Z[\sqrt D]$,
then $N(\alpha)$ must be a unit of $\Z$, namely $\pm1$.
Conversely, if $N(\alpha)=\pm1$, this means $\alpha\cdot(\pm\alpha')=1$,
so $\alpha$ is a unit.

If $D<0$, then $N$ maps $\Z[\sqrt D]$ into $\N$,
and so $\alpha$ is a unit in $\Z[\sqrt D]$ if and only if $N(\alpha)=1$.  
Also, $\alpha$ in $\Z[\sqrt D]$ is irreducible 
if and only if it has no divisor $\beta$ such that $1<N(\beta)<N(\alpha)$ and $N(\beta)\divides N(\alpha)$.  

In case $D=-5$ we have
\begin{equation}\label{eqn:N23}
  \begin{array}{c||c|c|c}
    x&2&3&1\pm\sqrt{-5}\\\hline
N(x)&4&9&6
  \end{array}.
\end{equation}
Since no elements of $Z[\sqrt{-5}]$ have norm $2$ or $3$, 
the elements $2$, $3$, and $1\pm\sqrt{-5}$ are irreducible. 
However, they are not prime,
because each of them divides the product of \emph{two} of the others,
but it does not divide \emph{one} of the others,
since if $\alpha\divides\beta$, 
then $N(\alpha)\divides N(\beta)$, 
but no norm in~\eqref{eqn:N23} divides another.  

There are however 
factorizations of the relevant ideals.
For example,
\begin{multline*}
(2,1+\sqrt{-5})(2,1+\sqrt{-5})
=(2,1+\sqrt{-5})(2,1-\sqrt{-5})\\
=(4,2+2\sqrt{-5},6)=(2).
\end{multline*}
Similarly,
\begin{gather*}
(3)=(3,1+\sqrt{-5})(3,1-\sqrt{-5}),\\
  (1+\sqrt{-5})=(2,1+\sqrt{-5})(3,1+\sqrt{-5}),\\
  (1-\sqrt{-5})=(2,1+\sqrt{-5})(3,1-\sqrt{-5}).
\end{gather*}
These factorizations are \emph{prime} factorizations. 
We show this as follows.
Every subgroup of $\gpgen{1,\sqrt D}$ has at most two generators,
by Porism~\ref{por:fin-gen-ab} (p.\ \pageref{por:fin-gen-ab}).
When that subgroup is a nonzero ideal $I$ of $\Z[\sqrt D]$,
then it must have more than one generator as a group,
since a cyclic subgroup will not be closed under multiplication by $\sqrt D$.
For example, since
\begin{equation*}
(a+b\sqrt D)\cdot\sqrt D=bD+a\sqrt D,
\end{equation*}
the ideal $(a+b\sqrt D)$ is the group
\begin{equation*}
\gpgen{a+b\sqrt D,bD+a\sqrt D}.
\end{equation*}
Let $G$ be the map
\begin{equation*}
\begin{pmatrix}
a&b\\
c&d
\end{pmatrix}
\mapsto
\gpgen{a+b\sqrt D,c+d\sqrt D}
\end{equation*}
from $\Mat[n\times n]{\Z}$ to the set of subgroups of $\Z[\sqrt D]$.
If $G(X)$ is an ideal, then $\det(X)\neq0$.
Also, $G(X)\subgp G(Y)$ if and only if $X=ZY$ for some $Z$ such that $\det(Z)\neq0$.
Hence $G(X)=G(Y)$ if and only if $X=ZY$ for some $Z$ in $\GL[2]{\Z}$.
By the methods of the proof 
of Theorem~\ref{thm:fin-gen-ab} (p.\ \pageref{thm:fin-gen-ab}),
every ideal of $\Z[\sqrt D]$ has the form
\begin{equation*}
\gpgen{a,b+c\sqrt D},
\end{equation*}
where $a>b\geq0$.
(This is not a sufficient condition for being an ideal, however.)
We have a well-defined function $N$ 
from the set of subgroups of $\Z[\sqrt D]$ to $\N$ given by
  \begin{equation*}
  N\bigl(G(X)\bigr)=\abs{\det(X)}.
  \end{equation*}
In case $D<0$, this new function $N$ 
agrees with the earlier function called $N$ in the sense that
\begin{multline*}
N\bigl((a+b\sqrt D)\bigr)
=\N\bigl(\gpgen{a+b\sqrt D,bD+a\sqrt D}\bigr)\\
=\abs{a^2-b^2D}
=a^2-b^2D
=N(a+b\sqrt D).
\end{multline*}
If $I$ and $J$ are ideals of $\Z[\sqrt D]$ such that
$I\pincluded J\pincluded\Z[\sqrt D]$, 
then we must have
\begin{align*}
N(J)&\divides N(I),&N(I)&>N(J)>1.
\end{align*}
In case $d=-5$, we compute
\begin{gather*}
  (2,1+\sqrt{-5})=\gpgen{2,2\sqrt{-5},1+\sqrt{-5},\sqrt{-5}-5}
=\gpgen{2,1+\sqrt{-5}},\\
(3,1\pm\sqrt{-5})=\gpgen{3,3\sqrt{-5},1\pm\sqrt{-5},\sqrt{-5}\mp 5}
=\gpgen{3,1\pm\sqrt{-5}},
\end{gather*}
hence
\begin{equation*}
  \begin{array}{c||c|c}
    I&(2,1+\sqrt{-5})&(3,1\pm\sqrt{-5})\\\hline
N(I)&2&3
  \end{array}.
\end{equation*}
So these ideals are maximal, hence prime.
Ideals of the rings $\Z[\sqrt D]$ were originally called \emph{ideal numbers.}


\section{Integral domains}\label{sect:int-dom}

We now consider some rings that are increasingly close 
to having all of the properties of $\Z$.
We start with arbitrary integral domains.
We have noted in effect that the following fails in $\Zmod6$.

\begin{lemma}
In an integral domain, if $a$ and $b$ are non-zero associates, and
\begin{equation*}
a=bx,
\end{equation*}
then $x$ is a unit.
\end{lemma}

\begin{proof}
We have also, for some $y$,
\begin{align*}
b&=ay=bxy,&b\cdot(1-xy)&=0,&1&=xy,
\end{align*}
since $b\neq0$ and we are in an integral domain. 
\end{proof}

\begin{theorem}\label{thm:pr-irr}
  In an integral domain, prime elements are irreducible.
\end{theorem}

\begin{proof}
If $p$ is prime, and $p=ab$, then $p$ is an associate of $a$ or $b$,
so the other is a unit.
\end{proof}

By this and Euclid's Lemma (p.\ \pageref{thm:Euc-Lem}),
the irreducibles of $\Z$ are precisely the primes.

Recall from p.\ \pageref{multiset} that 
a \emph{multiset} is a pair $(A,f)$, where $f\colon A\to\N$.
If $A$ here is a finite subset of a ring, then the product
\begin{equation*}
\prod_{a\in A}a^{f(a)}
\end{equation*}
is well-defined (see p.\ \pageref{unord-prod})
and can be called the \textbf{product of the multiset.}
The components of the proof of the following are found in Euclid,
although Gauss's version \cite[\P16]{Gauss}
seems to be the first formal statement of the theorem \cite[p.~10]{MR568909}.

\begin{theorem}[Fundamental Theorem of Arithmetic]\label{thm:FTA}
Every element of $\N$ has a unique prime factorization.
That is, every natural number is the product of a unique multiset of prime numbers.
\end{theorem}

\begin{proof}
We first show that every integer greater than $1$ has a prime factor:
this is Propositions VII.31--2 of the \emph{Elements.}
Suppose $m>1$, and
let $p$ be the least integer $a$ such that $a\divides m$ and $1<a$.
Then $p$ must be prime.

Now suppose $n>1$, 
and every $m$ such that $1<m<n$ has a prime factorization.
If $n$ is prime, then it is its own prime factorization.
If $n$ is not prime, then $n=pm$ for some prime $p$,
where also $1<m<n$.
By hypothesis $m$ has a prime factorization, and hence so does $n$.
Therefore, by induction, every element of $\N$ has a prime factorization.

Prime factorizations are unique 
by Euclid's Lemma.
\end{proof}

A \textbf{unique factorization domain}\index{unique factorization
  domain}\index{domain!unique 
  factorization ---} 
or \textbf{\ufd} 
is an integral domain in which
the appropriate formulation of the result of the foregoing theorem holds.
Thus, in a \ufd, by definition,
\begin{compactenum}[1)]
\item
every nonzero element has an irreducible factorization,
that is, every nonzero element is the product of a multiset of irreducibles; and
\item
that multiset is unique up to replacement of elements by associates,
so that, if
\begin{equation*}
\prod_{i<n}\pi_i=\prod_{i<n'}\pi_i',
\end{equation*}
where the $\pi_i$ and $\pi_i'$ are irreducible, 
then $n=n'$ and, for some $\sigma$ in $\Sym n$,
for all $i$ in $n$, 
$\pi_i$ and $\pi_{\sigma(i)}'$ are associates.  
\end{compactenum}

Existence of irreducible factorizations in $\Z$,
along with Euclid's Lemma,
ensures that those factorizations are unique,
so that $\Z$ is a \ufd.
Conversely,
the definition of a \ufd\ is enough to give us Euclid's Lemma:

\begin{theorem}
In a \ufd, irreducibles are prime.
\end{theorem}

As for $\Z$ (p.\ \pageref{gcd}), so for any ring, 
a \textbf{greatest common divisor}%
\index{greatest common divisor}\index{divisor!greatest common ---} 
of elements $a$ and $b$ is a common divisor of $a$ and $b$ 
that is a maximum with respect to dividing: 
that is, it is some $c$ such that $c\divides a$ and $c\divides b$, 
and for all $x$, if $x\divides a$ and $x\divides b$, then $x\divides c$.  
There can be more than one greatest common divisor, but they are all associates.  
Every element of a ring is a greatest common divisor of itself and $0$.

\begin{theorem}
In a \ufd, any two nonzero elements have a greatest common divisor.
\end{theorem}

\begin{proof}
We can write the elements as 
\begin{align*}
  &u\prod_{i<n}\pi_i{}^{a(i)},&&v\prod_{i<n}\pi_i{}^{b(i)},
\end{align*}
where $u$ and $v$ are units and the $\pi_i$ are irreducibles;
then the product
\begin{equation*}
  \prod_{i<n}\pi_i{}^{\min(a(i),b(i))}
\end{equation*}
is a greatest common divisor of the first two elements.
\end{proof}

As in $\Z$, so in an arbitrary \pid, more is true,
and we shall use this to show that every \pid\ is a \ufd.
If $a$ and $b$ have a common divisor $d$, then
\begin{equation*}
(a,b)\included(d),
\end{equation*}
but we need not have the reverse inclusion, 
even if $d$ is a greatest common divisor.
For example, $\Q[X,Y]$ will be a \ufd\ 
by Theorem~\ref{thm:R[X]-ufd} (p.\ \pageref{thm:R[X]-ufd}),
and in this ring, $X$ and $Y$ have the greatest common divisor $1$,
but $(X,Y)\neq1$.
For a \pid\ however, we have the following generalization 
of Theorem~\ref{thm:ax+by=d} (p.\ \pageref{thm:ax+by=d}).

\begin{theorem}
In a \pid, any two elements $a$ and $b$ have a greatest common divisor $d$,
and
\begin{equation*}
(a,b)=(d),
\end{equation*}
so that the equation
\begin{equation*}
ax+by=d
\end{equation*}
is soluble in the ring. 
\end{theorem}

Now we can generalize Euclid's Lemma.

\begin{theorem}\label{thm:pid-irr-pr}
In a \pid, irreducibles are prime.
\end{theorem}

\begin{proof}
Suppose the irreducible $\pi$ divides $ab$ but not $a$.  
Then $1$ is a greatest common divisor of $\pi$ and $a$,
and so by the last theorem,
$\pi x+ay=1$ for some $x$ and $y$ in the ring.  
Now the proof of Euclid's Lemma goes through.
\end{proof}

So now, in a \pid, if an element has an irreducible factorization,
this factorization is unique.
Now, our proof that elements of $\N$ have prime factorizations has two parts.
The first part is that every non-unit has a prime factor.
The second part can be understood as follows.
Suppose some $n_0$ does not have a prime factorization.
But $n_0=p_0\cdot n_1$ for some prime $p_0$ and some $n_1$.
Then $n_1$ in turn must have no prime factorization.
Thus $n_1=p_1n_2$ for some prime $p_1$ and some $n_2$, and so on.
We obtain
\begin{equation}\label{eqn:n_0}
n_0>n_1>n_2>\cdots,
\end{equation}
which is absurd in $\N$.
It follows that $n_0$ must have had a prime factorization.

An arbitrary ring will not have an ordering as $\N$ does,
but the relation of divisibility will be an adequate substitute,
at least in a \pid.
Indeed, with the $n_i$ as above, we have
\begin{equation}\label{eqn:(n_0)}
(n_0)\pincluded(n_1)\pincluded(n_2)\pincluded\cdots
\end{equation}
This is a strictly ascending chain of ideals.
A ring is called \textbf{Noetherian}\index{Noetherian ring} 
if its every strictly ascending chain of ideals is finite.

\begin{theorem}
Every \pid\ is Noetherian.
\end{theorem}

\begin{proof}
If $I_0\included I_1\included\dotsb$, 
then $\bigcup_{i\in\upomega}I_i$ is an ideal $(a)$; 
then $a\in I_n$ for some $n$, so the chain cannot grow beyond $I_n$. 
\end{proof}

Now we can adapt to an arbitrary \pid\
the foregoing argument that elements of $\N$ have prime factorizations.
In fact that argument can be streamlined.
If $n_0$ has no prime factorization,
then $n_0=m_0\cdot n_1$ for some non-units $m_0$ and $n_1$,
where at least $n_1$ has no prime factorization.
Again we obtain a descending sequence as in~\eqref{eqn:n_0},
hence an ascending sequence as in~\eqref{eqn:(n_0)}.

\begin{theorem}\label{thm:PID->UFD}
Every \pid\ is a \ufd.
\end{theorem}

\begin{proof}
By Theorem~\ref{thm:pid-irr-pr}, irreducibles in a \pid\ are prime,
and therefore irreducible factorizations are unique when they exist.
Indeed, if
\begin{equation*}
\prod_{i<n}\pi_i=\prod_{i<n'}\pi_i',
\end{equation*}
where the $\pi_i$ and $\pi_i'$ are irreducible, 
then, since it divides the right side, 
$\pi_0$ must divide one of the $\pi_i'$ (because $\pi_0$ is prime).
Thus $\pi_i'=u\cdot\pi_0$ for some $u$.
Also $u$ must be a unit
(because $\pi_i'$ is irreducible and also,
being irreducible, $\pi_0$ is not a unit).
We may assume $i=0$.
The product $u\cdot\pi_1'$ is an associate of $\pi_1'$
(by Theorem~\ref{thm:div})
and is therefore also irreducible.
Replacing $\pi_1'$ with $u\cdot\pi_1'$,
we have
\begin{equation*}
\prod_{1\leq i<n}\pi_i=\prod_{1\leq i<n'}\pi_i',
\end{equation*}
since a \pid\ is an integral domain.
By induction, $n=n'$, and for some $\sigma$ in $\Sym n$,
for all $i$ in $n$, $\pi_i$ and $\pi_{\sigma(i)}'$ are associates.

It remains to show that irreducible factorizations exist in a \pid.
By the Axiom of Choice, we can well-order the \pid.
Suppose, if possible, $a\neq0$ and has no irreducible factorization.
Then $a=b\cdot c$ for some non-units $b$ and $c$,
where $c$ has no irreducible factorization.
We have
\begin{equation*}
(a)\pincluded(c).
\end{equation*}
Now let us denote by $a'$ the \emph{least} such $c$ in the well-ordering.
Then we can produce a sequence $(a_i\colon i\in\upomega)$,
where $a_0$ has no irreducible factorization and,
assuming $a_i$ has no irreducible factorization, $a_{i+1}=a_i{}'$.
By induction, each $a_i$ does have no irreducible factorization,
and so
\begin{equation*}
(a_0)\pincluded(a_1)\pincluded(a_2)\pincluded\cdots,
\end{equation*}
which is contrary to the last theorem.
Thus every nonzero element of a \pid\ has an irreducible factorization,
and this is unique.
\end{proof}

We have thus shown that the Fundamental Theorem of Arithmetic
can be founded solely on the status of $\Z$ as a \pid.
We may now ask further how $\Z$ gets this status.
The proof of Theorem~\ref{thm:Z-subg} can be worked out as follows.
The function $x\mapsto\abs x$ from $\Z$ to $\upomega$
(as defined on p.\ \pageref{abs}) is such that
\begin{equation*}
x=0\iff\abs x=0.
\end{equation*}
Given an ideal $I$ of $\Z$ that is different from $(0)$,
we let $a$ be a nonzero element such that $\abs a$ is minimal.
If $b\in I$, then
\begin{equation*}
\abs{b-ax}<\abs a
\end{equation*}
for some $x$ (as for example the $x$ that minimizes $\abs{b-ax}$),
and then $\abs{b-ax}=0$ (since $b-ax\in I$).
Then $b=ax$, and hence $b\in(a)$.
Therefore $I=(a)$.

A \textbf{Euclidean function} on an integral domain $R$
is a function $\partial$ from the ring to $\upomega$ such that
\begin{equation*}
\partial(x)=0\iff x=0
\end{equation*}
and, for all $a$ in $R\setminus\{0\}$ and $b$ in $R$, the inequality
\begin{equation*}
\partial(b-ax)<\partial(a)
\end{equation*}
is soluble in $R$.
Thus $x\mapsto\abs x$ is a Euclidean function on $\Z$.
Actually we need not require the range of a Euclidean function 
to be a subset of $\upomega$;
it could be any well-ordered set.

A \textbf{Euclidean domain} or \ed\ is an integral domain with a Euclidean function.
We now have:

\begin{theorem}
Every \ed\ is a \pid.
\end{theorem}

Other examples of Euclidean domains include the following.

For any field $K$, the function $f$ on $K$ given by
\begin{equation*}
f(x)=
\begin{cases}
	1,&\text{ if }x\neq0,\\
	0,&\text{ if }x=0,
\end{cases}
\end{equation*}
is a Euclidean function.

If $f$ is a polynomial $\sum_{i=0}^ma_iX^i$, where $a_m\neq0$,
then $m$ is $\deg(f)$, the \emph{degree} of $f$.
The function $f\mapsto\deg f$ on $K[X]$ will be Euclidean 
by Theorem~\ref{thm:deg-euc} (p.\ \pageref{thm:deg-euc}).\label{deg}
  
The \textbf{Gaussian integers}\index{Gaussian integer} 
are the elements of $\Z[\sqrt{-1}]$,
that is, the integers of $\Q(\sqrt{-1})$ 
(see \S\ref{sect:ant}, p.\ \pageref{sect:ant}). 
Writing $\mi$ for $\sqrt{-1}$ as usual,
we have that
the norm function $z\mapsto\abs z^2$ on $\Z[\mi]$ is Euclidean,
where
\begin{equation*}
\abs{x+y\mi}^2=x^2+y^2.
\end{equation*}
Indeed, if $a\in\Z[\mi]\setminus\{0\}$ 
and $b\in\Z[\mi]$,
then $b/a$ is an element $s+t\mi$ of $\Q(\mi)$.
There are elements $x$ and $y$ of $\Z$ such that
\begin{align*}
\abs{s-x}&\leq\frac12,&\abs{t-y}&\leq\frac12.
\end{align*}
Let $q=x+y\mi$; then
\begin{equation*}
\left|\frac ba-q\right|=\abs{s-x+(t-y)\mi}\leq\frac{\sqrt2}2<1
\end{equation*}
and so $\abs{b-aq}<\abs a$ (and hence $\abs{b-aq}^2<\abs a^2$).

\section{Localization}\label{sect:loc}

We shall now generalize the construction of $\Q$ from $\Z$
that is suggested by Theorem~\ref{thm:Z->Q} (p.\ \pageref{thm:Z->Q}).
A nonempty subset of a ring 
is called \textbf{multiplicative}\index{multiplicative} 
if it is closed under multiplication.  
For example, $\Z\setminus\{0\}$ is a multiplicative subset of $\Z$,
and more generally,
the complement of any prime ideal of any ring is multiplicative.

\begin{lemma}
If $S$ is a multiplicative subset of a ring $R$, then on $R\times S$
there is an equivalence-relation $\sim$ given by
\begin{equation}\label{eqn:q}
  (a,b)\sim (c,d)\iff (ad-bc)\cdot e=0\text{ for some $e$ in }S.
\end{equation}
If $R$ is an integral domain and $0\notin S$, then
\begin{equation*}
  (a,b)\sim(c,d)\iff ad=bc.
\end{equation*}
\end{lemma}

\begin{proof}
Reflexivity and symmetry are obvious.  
For transitivity, note that, 
if $(a,b)\sim(c,d)$ and $(c,d)\sim(e,f)$, 
so that, for some $g$ and $h$ in $S$,
\begin{align*}
0&=(ad-bc)g=adg-bcg,&0&=(cf-de)h=cfh-deh,
\end{align*}
then 
\begin{align*}
(af-be)cdgh
&=afcdgh-becdgh\\
&=adgcfh-bcgdeh
=bcgcfh-bcgcfh=0,
\end{align*}
so $(a,b)\sim(e,f)$.
\end{proof}

In the notation of the lemma, 
the equivalence-class of $(a,b)$ is denoted by $a/b$ or
\begin{equation*}
  \frac ab,
\end{equation*}
and the quotient $R\times S\modsim$ is denoted by
\begin{equation*}
  S\inv R.
\end{equation*}
If $0\in S$, then $S\inv R$ has exactly one element.  
An instance where $R$ is not an integral domain 
will be considered in the next section (\S\ref{sect:ultra}).

\begin{theorem}\label{thm:loc}
  Suppose $R$ is a ring with multiplicative subset $S$.
  \begin{compactenum}
  \item
  In $S\inv R$, if $c\in S$,
  \begin{equation*}
\frac ab=\frac{ac}{bc}.
\end{equation*}
  \item
  $S\inv R$ is a ring
in which the operations are given by
\begin{align*}
  \frac ab\cdot\frac cd&=\frac{ac}{bd},&
  \frac ab\pm\frac cd&=\frac{ad\pm bc}{bd}.  
\end{align*}
  \item
  There is a ring-homomorphism $\phi$ from $R$ to $S\inv R$ where, for every $a$ in $S$,
  \begin{equation*}
\phi(x)=\frac{xa}a.
\end{equation*}
If $1\in S$, then $\phi(x)=x/1$.
\newcounter{local}
\setcounter{local}{\value{enumi}}
 \end{compactenum}
   Suppose in particular $R$ is an integral
domain and $0\notin S$.  
\begin{compactenum}\setcounter{enumi}{\value{local}}
\item
$S\inv R$ is an integral domain, and the homomorphism $\phi$ is an embedding.
\item
If $S=R\setminus\{0\}$, then $S\inv R$ is a field, and if
If $\psi$ is an embedding of $R$ in a field $K$, then there is an embedding $\tilde{\psi}$ of $S\inv R$ in $K$ such that $\tilde{\psi}\circ\phi=\psi$.
\end{compactenum}
\end{theorem}

When $S$ is the complement of a prime ideal $\primei$,\label{mathfrak} 
then $S\inv R$ is called 
the \textbf{localization}\index{local!---ization} of $R$ at $\primei$  
and can be denoted by
\begin{equation*}
  R_{\primei}.
\end{equation*}
(See Appendix \ref{app:German}, p.\ \pageref{app:German}, 
for Fraktur letters like $\mathfrak p$.)
If $R$ is an integral domain, so that $(0)$ is prime, 
then localization $R_{(0)}$ (which is a field by the theorem)
is the \textbf{quotient-field}\label{qf}%
\index{quotient!--- field}\index{field!quotient ---} 
of $R$. 
In this case, the last part of the theorem 
describes the quotient field 
in terms of a \emph{universal property} 
in the sense of p.\ \pageref{up}.
However, it is important to note that,
if $R$ is not an integral domain,
then the homomorphism $x\mapsto x/1$ from $R$ to $R_{\primei}$
might not be an embedding.
The following will be generalized as Theorem~\ref{thm:reg-quot-loc}
(p.\ \pageref{thm:reg-quot-loc} below).

\begin{theorem}\label{thm:Br-quot-loc}
For every Boolean ring $R$,
for every prime ideal $\primei$ of $R$,
the homomorphism $x\mapsto x/1$ from $R$ to $R_{\primei}$
is surjective and has kernel $\primei$.
Thus
\begin{equation*}
\F_2\cong R/\primei\cong R_{\primei}.
\end{equation*}
\end{theorem}

A \textbf{local ring}\index{ring!local ---}\index{local!--- ring}
is a ring with a unique maximal ideal.  
The connection between localizations and local rings 
is made by the theorem below.

\begin{lemma}
An ideal $\maxi$ of a ring $R$ is a unique maximal ideal of $R$
if and only if
\begin{equation*}
 \units R=R\setminus\maxi. 
\end{equation*}
\end{lemma}

\begin{theorem}\label{thm:local-ring}
The localization $R_{\primei}$ of a ring $R$ at a prime ideal $\primei$ 
is a local ring
whose unique maximal ideal is
\begin{equation*}
\primei R_{\primei},
\end{equation*}
namely the ideal generated by the image of $\primei$.
\end{theorem}

\begin{proof}
The ideal $\primei R_{\primei}$ consists of those $a/b$ such that $a\in\primei$.  
In this case, if $c/d=a/b$, then $cb=da$, which is in $\primei$, 
so $c\in\primei$ since $\primei$ is prime and $b\notin\primei$.  
Hence for all $x/y$ in $R_{\primei}$,
\begin{align*}
x/y\notin R_{\primei}\primei
&\iff x\notin\primei\\
&\iff x/y\text{ has an inverse, namely }y/x.
\end{align*}
By the lemma, we are done.
\end{proof}


\section{*Ultraproducts of fields}\label{sect:ultra}

An \emph{ultraproduct} of fields 
is the quotient of the direct product of a family of fields
by a maximal ideal.
An algebraic investigation of this construction will involve 
maximal ideals, prime ideals, localizations,
and our theorems about them.
First we shall establish the usual tool 
by which the very existence of maximal ideals is established:

\subsection{Zorn's Lemma}

On p.\ \ref{thm:rec} we established a Recursion Theorem 
for $\N$ as an algebra, and hence for $\upomega$.
Now we establish another such theorem,
for arbitrary ordinals, not just $\upomega$;
but the ordinals are now to be considered
as well-ordered sets,
not algebras.

\begin{theorem}[Transfinite Recursion]
For all sets $A$, for all ordinals $\alpha$, 
for all functions $f$ 
from $\bigcup\{A^{\beta}\colon\beta<\alpha\}$ to $A$,
there is a unique element
\begin{equation*}
(a_{\beta}\colon\beta<\alpha)
\end{equation*}
of $A^{\alpha}$ such that, for all $\beta$ in $\alpha$,
\begin{equation*}
f(a_{\gamma}\colon\gamma<\beta)=a_{\beta}.
\end{equation*}
\end{theorem}

\begin{proof}
We first prove uniqueness.
Suppose, if possible,
$(a'_{\beta}\colon\beta<\alpha)$ is another element of $A^{\alpha}$ as desired,
and let $\beta$ be minimal such that $a_{\beta}\neq a'_{\beta}$.
Then
\begin{equation*}
(a_{\gamma}\colon\gamma<\beta)
=(a'_{\gamma}\colon\gamma<\beta),
\end{equation*}
so by definition $a_{\beta}=a'_{\beta}$.
We now prove existence.
If the theorem fails for some $\alpha$,
let $\alpha$ be minimal such that it fails.
Say $f\colon\bigcup\{A^{\beta}\colon\beta<\alpha\}\to A$.
By hypothesis, for each $\beta$ in $\alpha$,
there is a unique element $(a_{\gamma}\colon\gamma<\beta)$ of $A^{\beta}$
such that, for all $\gamma$ in $\beta$,
\begin{equation*}
f(a_{\delta}\colon\delta<\gamma)=a_{\gamma}.
\end{equation*}
As before, $a_{\gamma}$ is independent 
of the choice of $\beta$ such that $\gamma<\beta<\alpha$.
Then for all $\beta$ in $\alpha$ we are free to define
\begin{equation*}
a_{\beta}=f(a_{\gamma}\colon\gamma<\beta).
\end{equation*}
Then the element $(a_{\beta}\colon\beta<\alpha)$ of $A^{\alpha}$
shows that the theorem does not fail for $\alpha$.
\end{proof}

Our proof used the method of the \textbf{minimal counterexample:}
we showed that there could not be such a counterexample.

We now proceed to Zorn's Lemma.
Suppose $\Omega$ is a set and $A\included\pow{\Omega}$.
Then proper inclusion ($\pincluded$)
is a transitive irreflexive relation on $A$ and on each of its subsets
(see Theorems~\ref{thm:<trans} and~\ref{thm:<irr}, p.\ \pageref{thm:<trans}).
A subset $C$ of $A$ is called a \textbf{chain} in $A$
if proper inclusion is also a total relation on $C$,
so that $C$ is linearly ordered by proper inclusion
(see Theorem~\ref{thm:<tot}).
An \textbf{upper bound} of $C$
is a set that includes each element of $C$.
In particular, $\bigcup C$ is an upper bound,
and every upper bound includes this union.
A \textbf{maximal element} of $A$ is an element 
that is not properly included in any other element.

The union of every chain of proper ideals of a ring
is itself a proper ideal of the ring.
A maximal ideal of the ring is more precisely a maximal element
of the set of proper ideals of the ring.
By the following, rings do have maximal ideals.

\begin{theorem}[Zorn's Lemma]\label{thm:ZL}
For all sets $\Omega$, for all subsets $A$ of $\pow{\Omega}$,
if $A$ contains an upper bound for each of its chains,
then $A$ has a maximal element.%%%%%
\footnote{In 1935, Zorn \cite{MR1563165} 
presented this statement
for the case where the upper bounds of the chains 
are actually the unions of the chains.
He called the statement the ``maximum principle''
and suggested that using it would make proofs more algebraic
than when the ``well-ordering theorem'' is used.
Probably this theorem is what we have called the Axiom of Choice.
Zorn promised to prove, in a later paper, 
that the maximum principle and the Axiom of Choice are equivalent;
but it seems such a paper never appeared.
Earlier, in 1922, Kuratowski \cite[(42), p.~89]{Kuratowski-Zorn}
proved ``Zorn's Lemma''
for the case where the chains in question are well-ordered.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{theorem}

\begin{proof}
By the Axiom of Choice, 
there is a bijection $\alpha\mapsto B_{\alpha}$ 
from some cardinal $\kappa$ to $A$.
By the Recursion Theorem,
there is a function $\alpha\mapsto C_{\alpha}$ from $\kappa$ to $A$
such that, for all $\alpha$ in $\kappa$, 
if $\{C_{\beta}\colon\beta<\alpha\}$ is a chain,
and if $\gamma$ is minimal such that $B_{\gamma}$ is an upper bound of this chain,
then
\begin{equation*}
C_{\alpha}=
\begin{cases}
	B_{\gamma},&\text{ if }B_{\gamma}\not\included B_{\alpha},\\
	B_{\alpha},&\text{ if }B_{\gamma}\included B_{\alpha};
\end{cases}
\end{equation*}
in particular, $\{C_{\beta}\colon\beta\leq\alpha\}$ is a chain.
If $\{C_{\beta}\colon\beta<\alpha\}$ is \emph{not} a chain,
then we can define $C_{\alpha}=B_0$.
But we never have to do this:
for all $\alpha$ in $\kappa$,
the set $\{C_{\beta}\colon\beta<\alpha\}$ \emph{is} a chain,
since there can be no minimal counterexample to this assertion.
Indeed, if $\alpha$ is minimal 
such that $\{C_{\beta}\colon\beta<\alpha\}$ is not a chain,
there must be $\beta$ and $\gamma$ in $\alpha$
such that $\gamma<\beta$
and neither of $C_{\beta}$ and $C_{\gamma}$ includes the other.
But by assumption $\{C_{\delta}\colon\delta<\beta\}$ is a chain,
and so by definition $\{C_{\delta}\colon\delta\leq\beta\}$ is a chain,
and in particular one of $C_{\beta}$ and $C_{\gamma}$ must include the other.

By a similar argument, $\{C_{\alpha}\colon\alpha<\kappa\}$ is a chain,
so it has an upper bound $D$ in $A$.
Suppose for some $\alpha$ we have $D\included B_{\alpha}$.
Then $C_{\alpha}=B_{\alpha}$,
since otherwise, by definition, $C_{\alpha}=B_{\gamma}$ for some $\gamma$
such that $B_{\gamma}\not\included B_{\alpha}$:
in this case $C_{\alpha}\not\included B_{\alpha}$,
so $C_{\alpha}\not\included D$, which is absurd.
Thus $C_{\alpha}=B_{\alpha}$,
and hence $B_{\alpha}\included D$, so $D=B_{\alpha}$.
Therefore $D$ is a maximal element of $A$.
\end{proof}

As we said, it follows that rings have maximal ideals.
We shall use Zorn's Lemma further to show that there are ideals
that are maximal with respect to having certain properties.
In our examples, these ideals will turn out to be prime.

\subsection{Boolean rings}

Recall that all rings now are commutative rings.
For every such ring $R$, the set of its prime ideals 
is called its \textbf{spectrum} and can be denoted by
\begin{equation*}
  \spec R.
\end{equation*}
If $a\in R$, let us use the notation
\begin{equation*}
  [a]=\{\primei\in\spec R\colon a\notin\primei\}.
\end{equation*}

\begin{theorem}
For every ring $R$, for all $a$ and $b$ in $R$,
\begin{equation*}
  [a]\cap[b]=[ab].
\end{equation*}
\end{theorem}

\begin{proof}
  Since every $\primei$ in $\spec R$ is prime, we have
  \begin{align*}
    \primei\in[a]\cap[b]
&\iff a\notin\primei\And b\notin\primei\\
&\iff ab\notin\primei\\
&\iff \primei\in[ab].\qedhere
  \end{align*}
\end{proof}

As a consequence of the theorem, 
the spectrum of a ring can be given the \textbf{Zariski topology,}
in which the sets $[a]$ are basic open sets.
This topology is used in algebraic geometry,
especially when the ring is one of the polynomial rings
defined below in sub-\S\ref{subsect:poly-up}.
We are now interested in the case of Boolean rings.
We showed in Theorem~\ref{thm:Br} (p.\ \pageref{thm:Br}) 
that the power set of every set can be understood as a Boolean ring
in which the operations are defined by
\begin{align*}
X\cdot Y&=X\cap Y,&X+Y&=(X\setminus Y)\cup(Y\setminus X).
\end{align*}
We may abbreviate $(X\setminus Y)\cup(Y\setminus X)$ by
\begin{equation*}
  X\symdiff Y;
\end{equation*}
it is the \textbf{symmetric difference} of $X$ and $Y$.
Immediately from the definition,
every sub-ring of a Boolean ring is a Boolean ring.
We now show that every Boolean ring embeds in a Boolean ring
whose underlying set is the power set of some set.
This is an analogue of 
Cayley's Theorem for groups (p.\ \pageref{thm:Cay}) and
Theorem~\ref{thm:x-lambda_x} for associative rings (p.\ \pageref{thm:x-lambda_x}).


\begin{theorem}[Stone \cite{MR1501865}]\label{thm:Stone}
For every Boolean ring $R$,
for all $a$ and $b$ in $R$,
\begin{equation*}
  [a]\symdiff[b]=[a+b],
\end{equation*}
and the map $x\mapsto[x]$ is an embedding of $R$ in $\pow{\spec R}$.
\end{theorem}

\begin{proof}
By Theorem~\ref{thm:Br-2} (p.\ \pageref{thm:Br-2}),
the characteristic of $R$ is at most $2$,
and so for all $a$ in $R$ we have
\begin{equation*}
a\cdot(1+a)=0.
\end{equation*}
Suppose $\primei\in\spec R$.  
Since $\primei$ is prime and (like every ideal) contains $0$, 
it must contain $a$ or $1+a$.
If $\primei$ contains neither $a$ nor $b$,
then it contains the sum of $1+a$ and $1+b$, which is $a+b$.
Since the sum of any two elements of the subset $\{a,b,a+b\}$ of $R$
is equal to the third element,
every $\primei$ in $\spec R$ contains either one element or all elements of this set.
Therefore
\begin{align*}
  \primei\in[a+b]
&\iff a+b\notin\primei\\
&\iff(a\in\primei\liff b\notin\primei)\\
&\iff(\primei\notin[a]\liff\primei\in[b])\\
&\iff\primei\in[a]\symdiff[b].
\end{align*}
By this and the previous theorem,
$x\mapsto[x]$ is a homomorphism of Boolean rings.
It remains to show that this homomorphism is injective.
Say $x\in R\setminus\{0\}$.
The union of a chain of ideals of $R$ that do not contain $x$
is an ideal of $R$ that does not contain $x$.
Therefore, by Zorn's Lemma,
there is an ideal $\mathfrak m$ of $R$
that is maximal among those ideals that do not contain $x$.
If $a$ and $b$ are not in $\mathfrak m$,
then by maximality
\begin{align*}
x&\in\mathfrak m+(a),&x&\in\mathfrak m+(b),
\end{align*}
and therefore
\begin{equation*}
x^2\in\mathfrak m+(ab).
\end{equation*}
(We made a similar computation 
in proving the Chinese Remainder Theorem, p.\ \pageref{thm:CRT}.)
Since $x^2\notin\mathfrak m$, we must have $ab\notin\mathfrak m$.
Thus $\mathfrak m$ is prime, and so $\mathfrak m\in[x]$.
In particular, $[x]\neq\emptyset$.
\end{proof}

Equipped with the Zariski topology, the spectrum of a Boolean ring
is the \textbf{Stone space} of the ring.

\subsection{Regular rings}

The Boolean rings are members of a larger class of rings
that satisfy the conclusion 
of Theorem~\ref{thm:Boole} (p.\ \pageref{thm:Boole}).
We can establish this by first noting that,
for every set $\Omega$, there is an isomorphism $U\mapsto\chi_U$ 
from the Boolean ring $\pow{\Omega}$ 
to the direct power $\F_2{}^{\Omega}$, 
where
\begin{equation*}
  \chi_U(i)=
  \begin{cases}
    1,&\text{ if }i\in U,\\
    0,&\text{ if }i\in\Omega\setminus U.
  \end{cases}
\end{equation*}
Here $\chi_U$ can be called the \textbf{characteristic function} of $U$ 
(as a subset of $\Omega$).
The power $\F_2{}^{\Omega}$ is a special case of the product $\prod_{i\in\Omega}K_i$,
where each $K_i$ is a field.
If $a\in\prod_{i\in\Omega}K_i$, 
there is an element $a^*$ of the product given by
\begin{equation*}
\uppi_i(a^*)=\begin{cases}
\uppi_i(a)\inv,&\text{ if }\uppi_i(a)\neq0,\\
0,&\text{ if }\uppi_i(a)=0.
\end{cases}
\end{equation*}
Then
\begin{equation*}
aa^*a=a.
\end{equation*}
In particular, for every $x$ in the ring $\prod_{i\in\Omega}K_i$ 
there is $y$ in the ring such that
\begin{equation*}
xyx=x.
\end{equation*}
Therefore the ring $\prod_{i\in\Omega}K_i$ is called 
a \textbf{(von Neumann) regular ring.}%%%%%
\footnote{In general, a regular ring need not be commutative; 
see \cite[IX.3, ex.~5, p.~442]{MR600654}.}
Thus Boolean rings are also regular rings in this sense, 
since $xxx=x$ in a Boolean ring.
A regular ring can also be understood as a ring in which
\begin{equation*}
  x\in(x^2)
\end{equation*}
for all $x$ in the ring.
The following generalizes Theorem~\ref{thm:Boole} (p.\ \pageref{thm:Boole}).

\begin{theorem}\label{thm:reg-pr-max}
In regular rings, all prime ideals are maximal.
\end{theorem}

\begin{proof}
If $R$ is a regular ring, and $\primei$ is a prime ideal,
then for all $x$ in $R$, for some $y$ in $R$,
\begin{equation}\label{eqn:xy-1}
(xy-1)\cdot x=0,  
\end{equation}
and so at least one of $xy-1$ and $x$ is in $\primei$.  
Hence if $x+\primei$ is not $0$ in $R/\primei$,
then $x+\primei$ has the inverse $y+\primei$. 
Thus $R/\primei$ is a field, so $\primei$ is maximal.
\end{proof}

Now we can generalize Theorem~\ref{thm:Br-quot-loc} 
(p.\ \pageref{thm:Br-quot-loc}).

\begin{theorem}\label{thm:reg-quot-loc}
  If $\primei$ is a prime ideal of a regular ring $R$, 
then there is a well-defined isomorphism
\begin{equation*}
 x+\primei\mapsto x/1 
\end{equation*}
from $R/\primei$ to $R_{\primei}$.
\end{theorem}

\begin{proof}
If $a\in R$ and $b\in R\setminus\primei$, 
and $bcb=b$,
then the elements $a/b$ and $ac/1$ of $R_{\primei}$ are equal since
\begin{equation*}
(a-bac)b=ab-abcb=ab-ab=0.
\end{equation*}
Thus the homomorphism $x\mapsto x/1$ from $R$ to $R_{\primei}$
guaranteed by Theorem~\ref{thm:loc} is surjective.
By the last theorem, $\primei$ is maximal,
and hence $R_{\primei}$ is a field.
Supposing $x\in\primei$, as in that theorem
we have \eqref{eqn:xy-1} for some $y$, but $1-xy\notin\primei$.
This shows $x/1=0/1$.
Hence, if $y+\primei=z+\primei$ for some $y$ and $z$,
so that $y-z\in\primei$, then $y/1=z/1$.
Thus the epimorphism $x+\primei\mapsto x/1$ is well-defined.
Its kernel then cannot be all of the field $R/\primei$,
so the epimorphism must also be an embedding.
\end{proof}

The foregoing two theorems turn out to \emph{characterize} regular rings.
That is, every ring of which the conclusions of these theorems hold 
must be regular.
In fact a somewhat stronger statement is true;
this is the next theorem below.
For the sake of the theorem, we make the following definitions.
An element $x$ of a ring $R$ is called \textbf{nilpotent}
if $x^n=0$ for some $n$ in $\N$.

\begin{lemma}\label{lem:nilrad}
The ideal $\bigcap\spec R$ of a ring $R$ 
is precisely the set of nilpotent elements of $R$.
\end{lemma}

\begin{proof}
  Let $N$ be the set of nilpotent elements of $R$.
Easily $N\included\bigcap\spec R$.
Now suppose $x\in R\setminus N$; we show $x\notin\bigcap\spec R$.
Using Zorn's Lemma, we may let $\primei$ be an ideal of $R$ 
that is maximal among those ideals that contain no power of $x$.
We show $\primei\in\spec R$.  Suppose neither $a$ nor $b$ is in $\primei$.
Then both $\primei+(a)$ and $\primei+(b)$ contain powers of $x$.
Hence the product $\primei+(ab)$ contains%%%%%
\footnote{A similar idea was used 
in the proof of the Chinese Remainder Theorem, p.\ \pageref{thm:CRT-R}, 
to reduce the case $n=m+1$ to the case $n=2$.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
a power of $x$.
Therefore $\primei$ is prime, although $x\notin\primei$.
\end{proof}

The ideal $\bigcap\spec R$ of a ring $R$ 
is called the \textbf{nilradical} of $R$.
A ring is \textbf{reduced} if its nilradical is $(0)$.

\begin{theorem}%\label{thm:reg-pr-max}
The following are equivalent conditions on a ring $R$.%%%%%
\footnote{The equivalence of these conditions 
is part of \cite[Thm~1.16, p.~7]{MR533669}.
This theorem adds a fourth equivalent condition:
``All simple $R$-modules are injective.''
The proofs given involve module theory,
except the proof that, 
if all prime ideals are maximal, 
and the ring is reduced, 
then each localization at a maximal ideal is a field.
That proof is reproduced below.}
%%%%%%%%%%%%%%%%%%%%
\begin{compactenum}
\item
$R$ is regular.
\item
Every prime ideal of $R$ is maximal, and $R$ is reduced.
\item
The localization $R_{\maxi}$ is a field for all maximal ideals $\maxi$ of $R$.
\end{compactenum}
\end{theorem}

\begin{proof}
  \begin{asparaenum}
    \item
In regular rings, prime ideals are maximal by Theorem~\ref{thm:reg-pr-max}.
Also, if $xyx=x$, but $x^2=0$, then $x=x^2y=0$; so regular rings are reduced.
\item
Now suppose every prime ideal of $R$ is maximal, and $R$ is reduced.
Let $\maxi$ be a maximal ideal of $R$.
By Theorem~\ref{thm:local-ring} (p.\ \pageref{thm:local-ring}),
$\maxi R_{\maxi}$ is the unique maximal ideal of $R_{\maxi}$.
By Zorn's Lemma,
every prime ideal $\mathfrak P$ of $R_{\maxi}$ 
is included in a maximal ideal,
which must be $\maxi R_{\maxi}$.
Now, the intersection $\maxi R_{\maxi}\cap R$ 
is a proper ideal of $R$ that includes $\maxi$, so it is $\maxi$.
Hence $\mathfrak P\cap R$ is a prime ideal of $R$ that is included in $\maxi$,
so it is $\maxi$, and therefore $\mathfrak P=\maxi R_{\maxi}$.
Thus this maximal ideal is the unique prime ideal of $R_{\maxi}$.
By the lemma, this ideal is the nilradical of the ring.
Thus for all $r/s$ in $\maxi R_{\maxi}$, for some $n$ in $\N$,
we have $(r/s)^n=0$, so $r^n/s^n=0$, 
and therefore $tr^n=0$ for some $t$ in $R\setminus\maxi$.
In this case, $(tr)^n=0$, so $tr=0$, and therefore $r/s=0$.
In short, $\maxi R_{\maxi}=(0)$.
Therefore $R_{\maxi}$ is a field.
\item
Finally, suppose $R_{\maxi}$ is a field for all maximal ideals $\maxi$ of $R$.
If $x\in R$, define
\begin{equation*}
  I=\{r\in R\colon rx\in(x^2)\}.
\end{equation*}
This is an ideal of $R$ containing $x$.
We shall show that it contains $1$.
We do this by showing that it is not included in any maximal ideal $\maxi$.
If $x\notin\maxi$, then $I\nincluded\maxi$.
If $x\in\maxi$, then $x/1\notin\units{(R_{\maxi})}$, 
so, since $R_{\maxi}$ is a field,
we have $x/1=0/1$, and hence
\begin{equation*}
  rx=0
\end{equation*}
for some $r$ in $R\setminus\maxi$; but $r\in I$.
Again $I\nincluded\maxi$.
Thus $I$ must be $(1)$, so $x\in(x^2)$.
Therefore $R$ is regular.\qedhere
  \end{asparaenum}
\end{proof}

We again consider the special case of a product $\prod\family K$,
where $\family K$ is an indexed family $(K_i\colon i\in\Omega)$ of fields.
Here $\prod\family K$ is a regular ring, 
and $xx^*x=x$ when $x^*$ is defined as above.
Hence every sub-ring of $\prod\family K$ 
that is closed under the operation $x\mapsto x^*$ is also a regular ring.
We now prove the converse:
every regular ring is isomorphic to such a ring.

\begin{theorem}
  For every regular ring $R$, the homomorphism
  \begin{equation*}
    x\mapsto\bigl(x+\primei\colon\primei\in\spec R\bigr)
  \end{equation*}
is an embedding of $R$ 
in the product
\begin{equation*}
 \prod_{\primei\in\spec R}R/\primei 
\end{equation*}
of fields.
The image of this embedding is closed under $x\mapsto x^*$.
\end{theorem}

\begin{proof}
The indicated map is an embedding,
just as the map $x\mapsto[x]$ 
in Stone's Theorem (p.\ \pageref{thm:Stone}) is an embedding.
Indeed, suppose $R$ is a regular ring, and $x\in R\setminus\{0\}$.
Let $\maxi$ be maximal among those ideals of $R$ that do not contain $x$.
If $a$ and $b$ are in $R\setminus\maxi$, then
\begin{gather*}
  x\in\bigl(\maxi+(a)\bigr)\cap\bigl(\maxi+(b)\bigr),\\
x^2\in\maxi+(ab),\\
x\in\maxi+(ab),
\end{gather*}
so $ab\notin\maxi$.  Thus $\maxi$ is a prime ideal, 
and $x+\maxi\neq0$ in $R/\maxi$.
Therefore the map $x\mapsto\bigl(x+\primei\colon\primei\in\spec R\bigr)$ 
is an embedding.

Let this embedding be called $f$.
Given $x$ in $R$, we have to show that $f(x)^*$ is in the image of $f$.
Now, there is $y$ in $R$ such that $xyx=x$, and therefore
\begin{equation*}
  f(x)f(y)f(x)=f(x).
\end{equation*}
For each $\primei$ in $\spec R$, 
by applying the canonical projection $\uppi_{\primei}$, we obtain
\begin{equation*}
  (x+\primei)(y+\primei)(x+\primei)=x+\primei.
\end{equation*}
If $x+\primei\neq0$, we can cancel it, obtaining
\begin{equation*}
y+\primei=(x+\primei)\inv=\uppi_{\primei}(f(x)^*).
\end{equation*}
However, possibly $x+\primei=0$, while $y+\primei\neq0$,
so that $f(y)\neq f(x)^*$.
In this case, letting $z=yxy$, we have
\begin{align*}
  xzx&=xyxyx=xyx=x,&
zxz&=yxyxyxy=yxyxy=yxy=z.
\end{align*}
In short, $xzx=x$ and $zxz=z$.
Then
\begin{align*}
x\in\primei&\iff z\in\primei,&
x\notin\primei&\implies(z+\primei)\inv=x+\primei,
\end{align*}
so $f(z)=f(x)^*$.
\end{proof}

\subsection{Ultraproducts}

If $R$ is a Boolean ring,
then by Stone's Theorem (p.\ \pageref{thm:Stone}), 
$R$ embeds in $\pow{\spec R}$.
We have also shown
\begin{equation*}
 \pow{\spec R}\cong\F_2{}^{\spec R}. 
\end{equation*}
Finally, for each $\primei$ in $\spec R$,
by Theorem~\ref{thm:Br-quot-loc} (p.\ \pageref{thm:Br-quot-loc}),
the quotient $R/\primei$ is isomorphic to $\F_2$, and so
\begin{equation*}
\F_2{}^{\spec R}\cong\prod_{\primei\in\spec R}R/\primei.
\end{equation*}
In this way, Stone's Theorem becomes a special case of the foregoing theorem.

The field $\F_2$ can be considered as a sub\emph{set} of each every field,
although not a sub\emph{field} (unless the field has characteristic $2$).
This observation gives rise to the following.

\begin{theorem}
For every indexed family $(K_i\colon i\in\Omega)$ of fields,
each ideal $I$ of $\prod_{i\in\Omega}K_i$ is generated by the set
\begin{equation*}
  \{aa^*\colon a\in I\}.
\end{equation*}
This set is itself an ideal, 
when considered as a subset of $\F_2{}^{\Omega}$.
Hence the map $I\mapsto\{aa^*\colon a\in I\}$ is a bijection
from the set of ideals of $\prod_{i\in\Omega}K_i$ 
to the set of ideals of $\F_2{}^{\Omega}$.
\end{theorem}

\begin{proof}
  We need only check that $\{aa^*\colon a\in I\}$ 
is closed under addition in $\F_2{}^{\Omega}$.
If $a$ and $b$ are in $I$, 
then $aa^*=\chi_A$ and $bb^*=\chi_B$ for some subsets $A$ and $B$ of $\Omega$.
In  $\F_2{}^{\Omega}$, the sum $aa^*+bb^*$ is $\chi_{A\symdiff B}$,
which can be computed in $\prod_{i\in\Omega}K_i$ as
\begin{equation*}
  \chi_{A\symdiff B}\cdot(a+b)(a+b)^*;
\end{equation*}
and this is in $I$.
\end{proof}

If $\family K$ is an indexed family $(K_i\colon i\in\Omega)$ of fields,
Let $\mathfrak P$ be a prime ideal of $\prod\family K$.  
Then the quotient $\prod\family K/\mathfrak P$ is a field, 
and this field is called an \textbf{ultraproduct} of $\family K$.
The ideal $\mathfrak P$ could be a principal ideal $(a)$.
This ideal is equal to $(aa^*)$ and therefore to $(\chi_U)$
for some subset $U$ of $\Omega$.
But $(a)$ is maximal,
and therefore $U=\Omega\setminus\{i\}$ for some $i$ in $\Omega$.
In this case,
\begin{equation*}
\prod\family K/\mathfrak P\cong K_i.
\end{equation*}
However, if $\Omega$ is infinite,
then $\pow{\Omega}$ has the proper ideal $I$ 
consisting of the the finite subsets of $\Omega$.
Then $\{\chi_U\colon U\in I\}$ generates a proper ideal of $\prod\family K$.  
If $\mathfrak P$ includes this ideal,
then $\mathfrak P$ is not principal, 
and the field $\prod\family K/\mathfrak P$ is called 
a \textbf{nonprincipal ultraproduct} of $\family K$.  
Such ideals $\mathfrak P$ exist by Zorn's Lemma.\label{ac-up}

If $a\in\prod\family K$, 
the subset $\{i\in\Omega\colon a_i\neq0\}$ of $\Omega$
can be called the \textbf{support} of $a$
and be denoted by
\begin{equation*}
\supp a.
\end{equation*}
In particular, $\supp{\chi_U}=U$.
By the last theorem,
we have a bijection
\begin{equation*}
\mathfrak P\mapsto\{\supp x\colon x\in\mathfrak P\}
\end{equation*}
from $\spec{\prod\family K}$ to $\spec{\pow{\Omega}}$.
Suppose the image of $\mathfrak P$ under this map is $\primei$.
Then for all $a$ and $b$ in $\prod\family K$ we have,
\emph{modulo} $\mathfrak P$,
\begin{equation*}
a\equiv b\iff\{i\in\Omega\colon \uppi_i(a)\neq\uppi_i(b)\}\in\primei.
\end{equation*}
We may think of the elements of $\primei$ as ``small'' sets; 
their complements are ``large.''  
Then every subset of $\Omega$ is small or large.
Two elements of $\prod\family K$ are congruent \emph{modulo} $\mathfrak P$
if and only if they agree on a large set of indices in $\Omega$.
If $\mathfrak P$ is the principal ideal $(\Omega\setminus\{i\})$,
then the large subsets of $\Omega$ are just those that contain $i$.


Suppose however $\mathfrak P$ is nonprincipal.
Then all finite subsets of $\Omega$ are small, 
and all \emph{cofinite} subsets of $\Omega$ are large,
and each map $x\mapsto\upiota_i(x)+\mathfrak P$ 
from $K_i$ to $\prod\family K/\mathfrak P$ is the zero map.
Thus no one field $K_i$ affects the ultraproduct $\prod\family K/\mathfrak P$.
Rather, the ultraproduct is a kind of ``average'' of all of the fields $K_i$.
Say for example $\Omega$ is the set of prime numbers in $\N$,
and for each $p$ in $\Omega$, the field $K_p$ is $\F_p$.
Then $\prod\family K/\mathfrak P$ has characteristic $0$, 
since for each prime $p$, 
the element $p\cdot1$ of $\prod_{\ell\in\Omega}\F_{\ell}$ 
disagrees with $0$ on a large set.

Since in general an ultraproduct $\prod_{i\in\Omega}K_i/\mathfrak P$ of fields
depends only on $(K_i\colon i\in\Omega)$ and a prime ideal of $\pow{\Omega}$,
we can replace the fields $K_i$ with arbitrary structures (all having the same signature).
The notion that a nonprincipal ultraproduct is an average of the factors
is made precise by the result known as \L o\'s's Theorem,
because it can be extracted from \L o\'s's 1955 paper \cite{MR0075156}.
The proof is straightforward,
but requires careful attention to logic.

\section{Polynomial rings}\label{sect:poly-rings}

\subsection{Universal property}\label{subsect:poly-up}

Given a ring $R$, 
we defined the polynomial ring $R[X]$ on p.\ \pageref{poly-ring}
as the set of formal sums
\begin{equation*}
  \sum_{i<m}a_iX^i,
\end{equation*}
where $(a_i\colon i<m)\in R^m$, where $m\in\upomega$.
This means that, assuming $m\leq n$, we have
\begin{multline*}
  \sum_{i<m}a_iX^i=\sum_{i<n}b_iX^i\\
\iff(a_i\colon i<m)=(b_i\colon i<m)\And b_m=0\And\dots\And b_{n-1}=0.
\end{multline*}
We understand $\sum_{i<1}a_iX^i$ to be $a_0$, an element of $R$.
Thus $R$ is included in $R[X]$.

We can now define recursively the family 
of polynomial rings $R[X_0,\dots,X_{n-1}]$:
\begin{equation*}
  R[X_0,\dots,X_{n-1}]=
  \begin{cases}
    R,&\text{ if }n=0,\\
R[X_0,\dots,X_{k-1}][X_k],&\text{ if }n=k+1.
  \end{cases}
\end{equation*}
These polynomial rings have a certain universal property 
in the sense of p.\ \pageref{up}:

\begin{theorem}\label{thm:poly-free}
For all rings $R$, 
for all $n$ in $\upomega$,
for all rings $S$, 
for all homomorphisms $\phi$ from $R$ to $S$, 
for all $\bm a$ in $S^n$, 
there is a unique homomorphism $H$ 
from $R[X_0,\dots,X_{n-1}]$ to $S$
such that
\begin{align*}
H\restriction R&=\phi,&
(H(X_i)\colon i<n)&=\bm a.
\end{align*}
\end{theorem}

\begin{proof}
We use induction.
  The claim is trivially true when $n=0$.
When $n=1$, given $a$ in $S$,
we must have $H\restriction A=\phi$
and $H(X)=a$ and therefore
\begin{equation*}
  H\Biggl(\sum_{k<m}b_kX^k\Biggr)=\sum_{k<m}\phi(b_k)\cdot a^k
\end{equation*}
for all $(b_i\colon i<m)$ in $R^m$, for all $m$ in $\upomega$.
Thus $H$ is determined on all of $R[X]$.
The general inductive step follows in the same way.
\end{proof}

In the notation of the theorem,
if $f\in R[X_0,\dots,X_{n-1}]$,
then we may denote $H(f)$ by
\begin{equation*}
  f^{\phi}(\bm a).
\end{equation*}
if also $\phi=\id_R$,
then $H(f)$ is just
\begin{equation*}
  f(\bm a).
\end{equation*}
Given a ring $R$, we can define a category
(in the sense of \S\ref{sect:category}, p.\ \pageref{sect:category})
whose objects are pairs $(S,\phi)$,
where $S$ is a ring and $\phi$ is a homomorphism from $R$ to $S$.
If $(T,\psi)$ is also in the category,
then a morphism from $(S,\phi)$ to $(T,\psi)$ 
is a homomorphism $h$ from $S$ to $T$ such that $h\circ\phi=\psi$.
\begin{equation*}
\xymatrix@R=3ex{
&S\ar[dd]^h\\
R\ar[ur]^{\phi}\ar[dr]_{\psi}&\\
&T}
\end{equation*}
Then for each $n$ in $\upomega$,
the pair $(R[X_0,\dots,X_{n-1}],\id_R)$ is an object in this category,
and by the last theorem,
in the sense of sub-\S\ref{subsect:free} (p.\ \pageref{subsect:free}), 
it is a \emph{free object} on $n$,
with respect to the map $i\mapsto X_i$ on $n$.
Then $R[X_0,\dots,X_{n-1}]$ is uniquely determined 
(up to isomorphism) by this property, by Theorem~\ref{thm:free}.

\subsection{Division}

If $R$ is a ring, and $f$ is the element $\sum_{i=0}^na_iX^i$ of $R[X]$,
and $a_n\neq0$, then:
\begin{compactitem}
  \item
$n$ is called the \textbf{degree}\index{degree} of $f$,
and we may write
\begin{equation*}
  \deg(f)=n;
\end{equation*}
\item
each $a_i$ is a \textbf{coefficient} of $f$ and is \emph{the} coefficient of $X^i$;
\item
$a_n$ is the 
\textbf{leading coefficient}\index{leading coefficient} of $f$;
\item
if this leading coefficient is $1$, then $f$ is called \textbf{monic.}
\end{compactitem}
We define also
\begin{equation*}
  \deg(0)=-\infty,
\end{equation*}
and for all $k$ in $\upomega$,
\begin{equation*}
  -\infty+k=-\infty=k-\infty,
\end{equation*}
so that the next lemma makes sense in all cases.
We said in \S\ref{sect:int-dom} (p.\ \pageref{deg}) that, 
if $K$ is a field,
then $f\mapsto\deg(f)$ is a Euclidean function on $K[X]$.
We now prove this.

\begin{lemma}
Suppose $f$ and $g$ are polynomials in one variable $X$ over a ring $R$.
then
\begin{equation*}
  \deg (f+g)\leq\max(\deg f, \deg g), 
\end{equation*}
with equality if $\deg(f)\neq\deg(g)$.
Also
\begin{equation*}
  \deg (f\cdot g)\leq\deg f+\deg g,
\end{equation*}
with equality 
if the product of the leading coefficients of $f$ and $g$ is not $0$.
In particular,
if $R$ is an integral domain, then so is $R[X]$.  
\end{lemma}

\begin{theorem}[Division Algorithm]\index{Division Algorithm}
  \index{theorem!Division Algorithm} \index{algorithm!Division A---}
If $f$ and $g$ are polynomials in $X$ over a ring $R$, 
and the leading coefficient of $g$ is $1$, then
  \begin{equation}\label{eqn:f=qg+r}
    f=q\cdot g+r
  \end{equation}
for some unique $q$ and $r$ in $R[X]$ such that $\deg(r)<\deg(g)$.
\end{theorem}

\begin{proof}
To prove uniqueness,
we note that if for each $i$ in $2$ we have
\begin{equation*}
f_i=q_i\cdot g+r_i,
\end{equation*}
where $q_0\neq q_1$, and $\deg(r_0)$ and $\deg(r_1)$ are less than $\deg(g)$,
then by the lemma
\begin{equation*}
\deg(f_0-f_1)
=\deg\bigl((q_0-q_1)\cdot g+r_0-r_1\bigr)\geq\deg g\geq0,
\end{equation*}
so $f_0\neq f_1$.
To prove existence, if $\deg(f)<\deg(g)$, we let $q=0$.
Suppose $\deg(g)\leq \deg(f)$.
Given an arbitrary polynomial $h$ over $R$ with leading coefficient $a$
such that $\deg(g)\leq\deg(f)$,
we define
\begin{equation*}
  h^*=h-aX^{\deg(h)-\deg(g)}\cdot g.
\end{equation*}
Then $\deg(h^*)<\deg(h)$ and
\begin{equation*}
  h=aX^{\deg(h)-\deg(g)}\cdot g+h^*.
\end{equation*}
Now define $f_0=f$, and $f_1=f_0{}^*$, and so on until $\deg(f_k)<\deg(g)$.
Let $a_i$ be the leading coefficient of $f_i$, 
and let $n_i=\deg(f_i)-\deg(g)$.
Then \eqref{eqn:f=qg+r} holds when $r=f_k$ and
\begin{equation*}
  q=a_0X^{n_0}+\dots+a_{k-1}X^{n_{k-1}}.\qedhere
\end{equation*}
\end{proof}

\begin{corollary}[Remainder Theorem]\index{Remainder Theorem}
  \index{theorem!Remainder Th---}
If $c\in R$ and $f\in R[X]$, then
\begin{equation*}
f= q\cdot (X-c)+f(c)
\end{equation*}
for some unique $q$ in $R[X]$.
\end{corollary}

\begin{proof}
By the Division Algorithm,
  $f=q\cdot (X-c)+d$ for some unique $q$ in $R[X]$ and $d$ in $R$.
Then $f(c)=q(c)\cdot(c-c)+d=d$.
\end{proof}

If $f(c)=0$, then $c$ is a \textbf{zero} of $f$.

\begin{theorem}
For every polynomial $f$ over a ring, for every $c$ in the ring,
\begin{equation*}
  f(c)=0\iff(X-c)\divides f.
\end{equation*}
If the ring an integral domain, and $f\neq0$,
then the number of distinct zeros of $f$ is at most $\deg(f)$.
\end{theorem}

\begin{proof}
By the Remainder Theorem, 
$c$ is a zero of $f$ if and only if $f=q\cdot(X-c)$ for some $q$.  
In this case, if the ring is an integral domain, 
and $d$ is another zero of $f$, 
then, since $d-c\neq0$, we must have that $d$ is a zero of $q$.  
Hence, if $\deg(f)=n$, 
and $f$ has the distinct zeros $r_0$, \dots, $r_{m-1}$, 
then repeated application of the Remainder Theorem yields
\begin{equation*}
f=q\cdot(X-r_0)\dotsm(X-r_{m-1})
\end{equation*}
for some $q$.
If $f\neq0$, then $q\neq0$, and $\deg(f)\geq m$.
\end{proof}

Recall however 
from the proof of Theorem~\ref{thm:Boole} (p.\ \pageref{thm:Boole}) 
that every element of a Boolean ring is a zero of $X\cdot(1+X)$, that is, $X+X^2$; 
but some Boolean rings have more than two elements.  
In $\Zmod 6$, the same polynomial $X+X^2$ has the zeros $0$, $2$, $3$, and $5$.

\begin{theorem}\label{thm:deg-euc}
  If $K$ is a field, then $f\mapsto\deg(f)$ is a Euclidean function on $K[X]$.
  \begin{comment}
  
  
and
\begin{equation*}
  \units{K[X]}=\units K=K\setminus\{0\}.
\end{equation*}


\end{comment}
\end{theorem}

\begin{proof}
Over a field, 
the Division Algorithm does not require 
the leading coefficient of the divisor to be $1$.
\end{proof}

Thus for all fields $K$, the ring $K[X]$ is a \ed,
therefore a \pid,
therefore a \ufd.

\subsection{*Multiple zeros}


A zero $c$ of a polynomial over an integral domain 
has \textbf{multiplicity} $m$ 
if the polynomial is a product $g\cdot(X-c)^m$, 
where $c$ is not a zero of $g$.  
A zero with multiplicity greater than $1$ is a \textbf{multiple} zero.
Derivations were defined on p.\ \pageref{derivation}; 
they will be useful for recognizing the existence of multiple roots.

\begin{lemma}
  If $\delta$ is a derivation of a ring $R$, 
then for all $x$ in $R$ and $n$ in $\upomega$,
  \begin{equation*}  
   \delta(x^n)=nx^{n-1}\cdot\delta(x).
   \end{equation*}
\end{lemma}

\begin{proof}
Since
\begin{equation*}
\delta(1)=\delta(1\cdot1)=\delta(1)\cdot 1+1\cdot\delta(1)=2\cdot\delta(1), 
\end{equation*}
we have $\delta(1)=0$, so the claim holds when $n=0$.  
If it holds when $n=k$, then
\begin{multline*}
\delta(x^{k+1})
=\delta(x)\cdot x^k+x\cdot\delta(x^k)\\
=\delta(x)\cdot x^k+kx^k\cdot\delta(x)
=(k+1)\cdot x^k\cdot\delta(x),
\end{multline*}
so the claim holds when $n=k+1$.
\end{proof}

\begin{theorem}
  On a polynomial ring $R[X]$, there is a
  unique derivation $f\mapsto
  f'$ such that 
  \begin{align*}
    X'&=1,&c'&=0
  \end{align*}
for all $c$ in $R$.
   This derivation is given by
   \begin{equation}\label{eqn:der}
\Biggl(\sum_{k=0}^na_kX^k\Biggr)'=
\sum_{k=0}^{n-1}(k+1)\cdot a_{k+1}X^k.
\end{equation}
\end{theorem}

\begin{proof}
Let $\delta$ be the operation $f\mapsto f'$ on $K[X]$ defined by~\eqref{eqn:der}.
By the lemma and the definition of a derivation,
$\delta$ is the only operation that can meet the desired conditions.
It remains to show that $\delta$ is indeed a derivation.
We have
\begin{equation*}
\delta\Biggl(\sum_{k=0}^na_kX^k\Biggr)=\sum_{k=0}^na_k\cdot\delta(X^k).
\end{equation*}
Also
\begin{multline*}
	\delta(X^kX^{\ell})
	=\delta(X^{k+\ell})
	=(k+\ell)\cdot X^{k+\ell-1}\\
	=kX^{k-1}X^{\ell}+\ell X^kX^{\ell-1}
	=\delta(X^k)\cdot X^{\ell}+X^k\cdot\delta(X^{\ell}).
\end{multline*}
Therefore $\delta$ is indeed a derivation:
\begin{align*}
&\phantom{{}={}}\delta\Biggl(\sum_{k<m}a_kX^k\cdot\sum_{\ell<n}b_{\ell}X^{\ell}\Biggr)\\
&=\delta\Biggl(\sum_{k<m}\sum_{\ell<n}a_kX^k\cdot b_{\ell}X^{\ell}\Biggr)\\
&=\sum_{k<m}\sum_{\ell<n}a_kb_{\ell}\cdot\delta(X^kX^{\ell})\\
&=\sum_{k<m}\sum_{\ell<n}a_kb_{\ell} \cdot\bigl(\delta(X^k)
\cdot X^{\ell}+X^k\cdot\delta(X^{\ell})\bigr)\\
&=\sum_{k<m}\sum_{\ell<n}\bigl(a_k\cdot\delta(X^k)
\cdot b_{\ell}X^{\ell}+a_kX^k\cdot b_{\ell}\cdot\delta(X^{\ell})\bigr)\\
&=\sum_{k<m}a_k\cdot\delta(X^k)\cdot\sum_{\ell<n} b_{\ell}X^{\ell}
+\sum_{k<m}a_kX^k\cdot\sum_{\ell<n} b_{\ell}\cdot\delta(X^{\ell})\\
&=\delta\Biggl(\sum_{k<m}a_kX^k\Biggr)\cdot\sum_{\ell<n}b_{\ell}X^{\ell}
+\sum_{k<m}a_kX^k\cdot\delta\Biggl(\sum_{\ell<n}b_{\ell}X^{\ell}\Biggr).\qedhere
\end{align*}
\end{proof}

In the notation of the theorem, $f'$ is the \textbf{derivative} of $f$.

\begin{lemma}
Let $R$ be an integral domain,
and suppose $f\in R[X]$ and $f(c)=0$.  
Then $c$ is a multiple zero of $f$ if and only if
  \begin{equation*}
    f'(c)=0.
  \end{equation*}
\end{lemma}

\begin{proof}
  Write $f$ as $(X-c)^m\cdot g$, where $g(c)\neq0$.  Then $m\geq1$, so 
  \begin{equation*}
    f'=m\cdot(X-c)^{m-1}\cdot g+(X-c)^m\cdot g'.
  \end{equation*}
If $m>1$, then $f'(c)=0$.  
If $f'(c)=0$, then $m\cdot 0^{m-1}\cdot g(c)=0$, 
so $0^{m-1}=0$ and hence $m>1$.
\end{proof}

If $L$ is a field with subfield $K$, 
then a polynomial over $K$ may be irreducible over $K$, but not over $L$.  
For example, $X^2+1$ is irreducible over $\Q$, but not over $\Q(\mi)$.  
Likewise, the polynomial may have zeros from $L$, but not $K$.  
Hence it makes sense to speak of zeros of an irreducible polynomial.

\begin{theorem}
If $f$ is an irreducible polynomial with multiple zeros over a field $K$,
then $K$ has characteristic $p$ for some prime number $p$,
and
\begin{equation*}
f=g(X^p)
\end{equation*}
for some polynomial $g$ over $K$.
\end{theorem}

\begin{proof}
If $f$ has the multiple zero $c$,
then by the lemma $X-c$ is a common factor of $f$ and $f'$.
Since $f$ is irreducible,
itself must be a common factor of $f$ and $f'$,
so $f'$ can only be $0$, since $\deg(f')<\deg(f)$.
Say $f=\sum_{k=0}^na_kX^k$, so
$f'=\sum_{k=0}^{n-1}(k+1)\cdot a_{k+1}X^k$.
If $f'=0$, but $a_{k+1}\neq0$,
then $k+1$ must be $0$ in $K$,
that is, its image under the homomorphism from $\Z$ to $K$ must be $0$.
Then this homomorphism has a kernel $\gpgen p$ for some prime number $p$.
Hence $a_k=0$ whenever $p\ndivides k$,
so $f$ can be written as $\sum_{j=0}^ma_{pj}X^{pj}$,
which is $g(X^p)$, where $g=\sum_{j=0}^ma_{pj}X^j$.
\end{proof}

\subsection{Factorization}

Throughout this subsection, $R$ is a \ufd\ with quotient field $K$.
We know from Theorem~\ref{thm:deg-euc} that $K[X]$ is a Euclidean domain
and therefore a \ufd.
Now we shall show that $R[X]$ too is a \ufd.
It will then follow that each of the polynomial rings $R[X_0,\dots,X_{n-1}]$ is a \ufd.

A polynomial over $R$ is called \textbf{primitive} 
if $1$ is a greatest common divisor of its coefficients.
Gauss proved a version of the following
for the case where $R$ is $\Z$ \cite[\P42]{Gauss}.

\begin{lemma}[Gauss]
The product of primitive polynomials over $R$ is primitive.
\end{lemma}

\begin{proof}
Let $f=\sum_{k=0}^ma_kX^k$ and $g=\sum_{k=0}^nb_kX^k$.  Then
\begin{equation*}
fg=\sum_{k=0}^{mn}c_kX^k,
\end{equation*}
where
\begin{equation*}
c_k=\sum_{i+j=k}a_ib_j=a_0b_k+a_1b_{k-1}+\dotsb+a_kb_0.
\end{equation*}
Suppose $f$ is primitive, but $fg$ is not,
so the coefficients $c_k$ have a common prime factor $\pi$.  
There is some $\ell$ such that $\pi\divides a_i$ when $i<\ell$, 
but $\pi\ndivides a_{\ell}$.
Then $\pi$ divides
\begin{equation*}
c_{\ell}-(a_0b_{\ell}+\dots+a_{\ell-1}b_1),
\end{equation*}
which is $a_{\ell}b_0$, so $\pi\divides b_0$.
Hence $\pi$ divides
\begin{equation*}
c_{\ell+1}-(a_0b_{\ell+1}+\dots+a_{\ell-1}b_2)-a_{\ell+1}b_0,
\end{equation*}
which is $a_{\ell}b_1$, so $\pi\divides b_1$, and so on.
Thus $g$ is not primitive.
\end{proof}

\begin{lemma}\label{lem:poly-ass}
Primitive polynomials over $R$ that are associates over $K$ 
are associates over~$R$.
\end{lemma}

\begin{proof}
Suppose $f$ and $g$ are polynomials that are defined over $R$
and are associates over $K$.
Then $uf=g$ for some $u$ in $\units K$,
and consequently $bu=a$ for some $a$ and $b$ in $R$, so $af=bg$.  
If $f$ and $g$ are primitive, then $a$ and $b$ must be associates in $R$, 
and therefore $u\in\units R$,
so $f$ and $g$ are associates over $R$.
\end{proof}

\begin{lemma}
Primitive polynomials over $R$ are irreducible over $R$ 
if and only if they are irreducible over~$K$. 
\end{lemma}

\begin{proof}
Suppose $f$ and $g$ are polynomials over $K$
such that the product $fg$ is a primitive polynomial over $R$. 
For some $a$ and $b$ in $K$,
the polynomials $af$ and $bg$ have coefficients in $R$ 
and are primitive over $R$.
By Gauss's Lemma, $abfg$ is primitive.
Since $fg$ is already primitive,
$ab$ must be a unit in $R$.
In particular, $abu=1$ for some $u$ in $\units R$.
Then $af$ and $bug$ are primitive polynomials over $R$
whose product is $fg$.  

Now, the units of $K[X]$ are just the polynomials of degree $0$,
that is, the elements of $\units K$.
In particular,
\begin{equation*}
f\in\units{K[X]}\iff af\in\units{K[X]}.
\end{equation*}
The unit \emph{primitive} elements of $R[X]$ 
are the elements of $\units R$.
Thus
\begin{equation*}
  af\in\units{K[X]}\iff af\in\units{R[X]}.
\end{equation*}
Therefore $fg$ is irreducible over $K$ if and only if over $R$.
\end{proof}

Note however that if $f$ is primitive and irreducible over $R$, 
and $a$ in $R$ is not a unit or $0$, 
then $af$ is still irreducible over $K$ 
(since $a$ is a unit in $K$) but not over $R$.

\begin{theorem}\label{thm:R[X]-ufd}
$R[X]$ is a \ufd.
\end{theorem}

\begin{proof}
Every nonzero element of $R[X]$ can be written as $af$, 
where $a\in R\setminus\{0\}$ and $f$ is primitive.  
Then $f$ has a prime factorization over $K$ 
(since $K[X]$ is a Euclidean domain): 
say $f=f_0\dotsm f_{n-1}$.  
There are $b_k$ in $K$ such that $b_kf_k$ is a primitive polynomial over $R$.  
The product of these is still primitive by Gauss's Lemma, 
so the product of the $b_k$ must be a unit in $R$.
We may assume this unit is $1$.
Thus $f$ has an irreducible factorization
\begin{equation*}
  (b_0f_0)\dotsm(b_{n-1}f_{n-1})
\end{equation*}
over $R$.  
Its uniqueness follows from its uniqueness over $K$ 
and Lemma~\ref{lem:poly-ass}.
Since $a$ has a unique irreducible factorization,
%$a_0\cdots a_{m-1}$,
we obtain a unique irreducible factorization of $af$.
\end{proof}

We end with a test for irreducibility.

\begin{theorem}[Eisenstein's Criterion]
If $\pi$ is an irreducible element of $R$
and $f$ is a polynomial
\begin{equation*}
a_0+a_1X+\dots+a_nX^n
\end{equation*}
over $R$ such that
\begin{align*}
\pi^2&\ndivides a_0,&
\pi&\divides a_0,&
\pi&\divides a_1,&
&\dots,&
\pi&\divides a_{n-1},&
\pi\ndivides a_n,
\end{align*}
then $f$ is irreducible over $K$ and, if primitive, over $R$.
\end{theorem}

\begin{proof}
Suppose $f=gh$, where
\begin{align*}
g=&\sum_{k=0}^nb_kX^k,&h=&\sum_{k=0}^nc_kX^k,
\end{align*}
all coefficients being from $R$.  
We may assume $f$ is primitive, so $g$ and $h$ must be primitive.  
We may assume $\pi$ divides $b_0$, but not $c_0$.  
Let $\ell$ be such that $\pi\divides b_k$ when $k<\ell$.  
If $\ell=n$, then (since $g$ is primitive) 
we must have $b_n\neq0$, so $\deg(g)=n$. 
In this case $\deg(h)=0$, so $h$ is a unit.  
If $\ell<n$, then, since $\pi\divides a_{\ell}$, but
\begin{equation*}
a_{\ell}=b_0c_{\ell}+b_1c_{\ell-1}+\dotsb+b_{\ell}c_0,
\end{equation*}
we have $\pi\divides b_{\ell}$.  
By induction, $\pi\divides b_k$ whenever $k<n$, so as before $\deg(g)=n$.
\end{proof}

An application is the following.

\begin{theorem}
If $p$ is a prime number, then the polynomial
\begin{equation*}
1+X+\dots+X^{p-1}
\end{equation*}
is irreducible.
\end{theorem}

\begin{proof}
It is enough to establish the irreducibility of $\sum_{k=0}^{p-1}(X+1)^k$.
We have
\begin{align*}
\sum_{k=0}^{p-1}(X+1)^k
&=\sum_{k=0}^{p-1}\sum_{j=0}^k\binom kjX^j\\
&=\sum_{j=0}^{p-1}X^j\sum_{k=j}^{p-1}\binom kj
=\sum_{j=0}^{p-1}X^j\binom p{j+1},
\end{align*}
which meets the Eisenstein Criterion since
\begin{align*}
\binom p1&=p,&
\binom p{j+1}&=\frac{p!}{(p-j-1)!(j+1)!},
\end{align*}
which is divisible by $p$ if and only if $j<p-1$.
\end{proof}


%\newpage
%\part{Extras}%{Appendices}

%\part*{Appendices}

\appendix

\chapter{The Greek alphabet}\label{app:Greek}

In Figure \ref{fig:Greek} are the Greek letters available
in \LaTeX,
\begin{compactitem}
\item 
in math mode (that is, in \url{$...$}),
possibly with the \url{upgreek} package, and 
\item
in text mode with the \url{gfsporson} package
of the Greek Font Society.
\end{compactitem}
\begin{figure}
  \begin{center}
  \begin{tabular}{cccccclcc}
\multicolumn4c{math mode}&\multicolumn2c{textmode}&\\
\multicolumn2c{normal}&var.
&\url{upgreek}&\multicolumn2c{\url{gfsporson}}&name&value&code\\\hline
&$\alpha$&&$\upalpha$&\gk A&\gk a&alpha&a\\
&$\beta$&&$\upbeta$&\gk B&\gk b&beta&b\\
$\Gamma$&$\gamma$&&$\upgamma$&\gk G&\gk g&gamma&g\\
$\Delta$&$\delta$&&$\updelta$&\gk D&\gk d&delta&d\\
&$\standardepsilon$&$\epsilon$&$\upepsilon$&\gk E&\gk e&epsilon&e\\
&$\zeta$&&$\upzeta$&\gk Z&\gk z&zeta&z\\
&$\eta$&&$\upeta$&\gk H&\gk h&Eta&\^e&h\\
$\Theta$&$\theta$&$\vartheta$&$\uptheta$&\gk J&\gk j&theta&th&j\\
&$\iota$&&$\upiota$&\gk I&\gk i&iota&i\\
&$\kappa$&&$\upkappa$&\gk K&\gk k&kappa&k\\
$\Lambda$&$\lambda$&&$\uplambda$&\gk L&\gk l&lambda&l\\
&$\mu$&&$\upmu$&\gk M&\gk m&mu&m\\
&$\nu$&&$\upnu$&\gk N&\gk n&nu&n\\
$\Xi$&$\xi$&&$\upxi$&\gk X&\gk x&xi&x\\
&&&&\gk O&\gk o&omicron&o\\
$\Pi$&$\pi$&$\varpi$&$\uppi$&\gk P&\gk p&pi&p\\
&$\rho$&$\varrho$&$\uprho$&\gk R&\gk r&rho&r(h)\\
$\Sigma$&$\sigma$&$\varsigma$&$\upsigma$&\gk S&\gk{sv, s}&sigma&s\\
&$\tau$&&$\uptau$&\gk T&\gk t&tau&t\\
$\Upsilon$&$\upsilon$&&$\upupsilon$&\gk U&\gk u&upsilon&u, y&u\\
$\Phi$&$\standardphi$&$\phi$&$\upphi$&\gk F&\gk f&phi&ph&f\\
&$\chi$&&$\upchi$&\gk Q&\gk q&chi&ch&q\\
$\Psi$&$\psi$&&$\uppsi$&\gk Y&\gk y&psi&ps&y\\
$\Omega$&$\omega$&&$\upomega$&\gk W&\gk w&omega&\^o&w\\\hline
  \end{tabular}
\end{center}

  \caption{Greek letters}\label{fig:Greek}
  
\end{figure}
A number of the Greek capitals, and one of the minuscules,
are not provided in math mode,
because of their similarity to (or identity with) Latin letters.
As discussed in note \ref{fn:Greek}, p.\ \pageref{fn:Greek},
one might use the standard, slanted letters as variables,
and the upright letters as constants.

An example will show how the letters are obtained in the underlying 
\LaTeX\ file.
The code for the line of forms of pi is
\begin{center}
\verb+$\Pi$ & $\pi$ & $\varpi$ & $\uppi$ & \gk P & \gk p & pi+
\end{center}
where the command \url{\gk} is defined in the preamble by means of
\begin{center}
  \begin{minipage}{0.8\textwidth}
\verb+\usepackage[polutonikogreek,english]{babel}+\\
\verb+\usepackage{gfsporson}+\\
\verb+\newcommand{\gk}[1]{%+\\
\verb+     \foreignlanguage{polutonikogreek}{%+\\
\verb+          \textporson{#1}}}+
  \end{minipage}
\end{center}
In Figure \ref{fig:Greek},
the ``value'' of a letter 
is the Latin letter or letters usually used to transcribe it.
This is usually the code for the letter in \LaTeX;
if it is not, the code is given separately in the table.
In text, the two minuscule forms of sigma
are used, 
within a word and at the end of a word, respectively;
for standalone \gk{sv}, \verb+\gk{sv}+ can be used.


\chapter{The German script}\label{app:German}

In his encyclopedic \emph{Model Theory} of 1993, Wilfrid Hodges observes \cite[Ch.~1, p.~21]{MR94e:03002}:
\begin{quotation}
  Until about a dozen years ago, most model theorists named structures
  in horrible Fraktur lettering.  Recent writers sometimes adopt a
  notation according to which all structures are named $M$, $M'$,
  $M^*$, $\bar M$, $M_0$, $M_i$ or occasionally $N$.  
I hope I cause no offence by using a more freewheeling notation.
\end{quotation}
For Hodges, \emph{structures} 
(as defined in \S\ref{sect:structures} on p.\ \pageref{sect:structures} above) 
are denoted by the letters $A$, $B$, $C$, and so forth; 
he refers to their universes as
\textbf{domains}\index{domains}
and denotes these by $\operatorname{dom}(A)$ and so forth.  
%\begin{comment}
This practice is convenient if one is using a typewriter 
(as in the preparation of another of Hodges's books \cite{Hodges-Building}, 
from 1985).
In his \emph{Model Theory:  An Introduction} of 2002, 
David Marker \cite{MR1924282} uses ``calligraphic'' letters 
to denote structures, as distinct from their universes:
so $M$ is the universe of~$\mathcal M$, and $N$ of $\mathcal N$.
%\end{comment}
I still prefer the older practice 
of using capital Fraktur letters for structures:
%In \AmS\ \LaTeX\ (by which these notes are typeset) these letters are:
\begin{equation*}
\begin{array}{*{13}{c}}
\mathfrak A&\mathfrak B&\mathfrak C&\mathfrak D&\mathfrak E&\mathfrak F&\mathfrak G&\mathfrak H&\mathfrak I&\mathfrak J&\mathfrak K&\mathfrak L&\mathfrak M\\\mathfrak N&\mathfrak O&\mathfrak P&\mathfrak Q&\mathfrak R&\mathfrak S&\mathfrak T&\mathfrak U&\mathfrak V&\mathfrak W&\mathfrak X&\mathfrak Y&\mathfrak Z
  \end{array}
\end{equation*}
Here are the minuscule Fraktur letters, 
which are used in this text,
starting on p.\ \pageref{mathfrak}, for denoting ideals:
\begin{equation*}
\begin{array}{*{13}{c}}
\mathfrak a&\mathfrak b&\mathfrak c&\mathfrak d&\mathfrak e&\mathfrak f&\mathfrak g&\mathfrak h&\mathfrak i&\mathfrak j&\mathfrak k&\mathfrak l&\mathfrak m\\\mathfrak n&\mathfrak o&\mathfrak p&\mathfrak q&\mathfrak r&\mathfrak s&\mathfrak t&\mathfrak u&\mathfrak v&\mathfrak w&\mathfrak x&\mathfrak y&\mathfrak z
  \end{array}
\end{equation*}
A way to write these letters by hand is seen
on the page reproduced in Figure~\ref{fig:Ger}
from a 1931 textbook \cite{German}
on the German language:
\begin{figure}[ht]
%  \begin{sideways}
\centering
\includegraphics[width=\textwidth]%[width=350pt]%
{../german-script-cropped.eps}
%  \end{sideways}
\caption{The German alphabet}\label{fig:Ger}
\end{figure}

\begin{comment}


\chapter{Categories}

For any two groups $G$ and $H$ there is a set
\begin{equation*}
  \Hom{G,H}
\end{equation*}
comprising the homomorphisms from $G$ to $H$.  There is a map
\begin{equation*}
  (g,f)\mapsto g\circ f
\end{equation*}
from $\Hom{H,K}\times\Hom{G,H}$ to $\Hom{G,K}$,
and there is an element $\id_H$ of $\Hom {H,H}$, such that
\begin{equation*}
  \id_H\circ f=f,\quad g\circ\id_H=g,\quad k\circ(g\circ f)=(k\circ
  g)\circ f
\end{equation*}
whenever $f\in\Hom{G,H}$, $g\in\Hom{H,K}$, and $k\in\Hom{K,L}$.
Understood in this way, groups with their 
homomorphisms compose a prototypical example of a \emph{category.}

A \textbf{directed graph}\index{directed graph} is a certain kind of quadruple 
\begin{equation*}
  (\class C_0,\class C_1,t,h),
\end{equation*}
where $\class C_0$ and $\class C_1$ are classes, and $t$
and $h$ are 
functions from $\class C_1$ to $\class C_0$.  We may refer to each
element of $\class C_0$ as a \textbf{node,}\index{node} and to each element of
$\class C_1$ as an \textbf{arrow.}\index{arrow}  If $a$ is an arrow, then $t(a)$ is its
\textbf{tail,}\index{tail} and $h(a)$ is its \textbf{head,}\index{head} and $a$ is an
arrow \textbf{from}
$t(a)$ \textbf{to} $t(b)$.  If $f$ is an arrow from $A$ to $B$, we
may express this by writing 
\begin{align*}
f&\colon A\longrightarrow B&
&\text{ or }&
&A\overset{f}{\longrightarrow}B.  
\end{align*}
We require the arrows from $A$ to $B$ to compose a \emph{set} (as
opposed to a proper class, like the class of all sets that do not
contain themselves).  We can define
\begin{equation*}
  \class C_2=\{(f,g)\in\class C_1{}^2\colon t(f)=h(g)\};
\end{equation*}
this is the class of paths of length $2$.
More generally,
\begin{equation*}
\class C_{n+1}=\Bigl\{(f_0,\dots,f_n)\in\class
G_1{}^{n+1}\colon \bigwedge_{i<n}t(f_i)=h(f_{i+1})\Bigr\}. 
\end{equation*}
The graph above is a \textbf{category}\index{category}
if there are
\begin{compactenum}
  \item
a function
$A\mapsto\id_A$ from $\class C_0$ to $\class C_1$, and
\item
a function $(f,g)\mapsto f\circ g$ from $\class C_2$ to $\class C_1$,
\end{compactenum}
such that
\begin{align*}
t(\id_A)&=A=h(\id_A),&
t(f\circ g)&=t(g),&
h(f\circ g)&=h(f),
\end{align*}
and also
\begin{align}\label{eqn:cat}
  f\circ\id_{t(f)}&=f,&
  \id_{h(g)}\circ g&=g,&
h\circ(g\circ f)&=(h\circ g)\circ f
\end{align}
whenever these are defined.  In particular then, the category is a
sextuple
\begin{equation}\label{cat}
  (\class C_0,\class C_1,t,h,\id,\circ),
\end{equation}
meeting the conditions that we have discussed.  In this case, the nodes---the elements of $\class C_0$---are called
\textbf{objects.}%
\index{object}
Conditions~\eqref{eqn:cat} can be
diagrammed as follows.
\begin{align*}
  &\xymatrix{
A \ar[r]^f \ar[d]_f & B \ar[d]^g\\
B \ar[ur]|{\id_B} \ar[r]_g & C}&
&\xymatrix{
B \ar[d]_g & A \ar[l]_f \ar[dl]|{g\circ f} \ar[d]%|{h\circ g\circ f} 
\ar[r]^f & B \ar[dl]|{h\circ g} \ar[d]^g\\
C \ar[r]_h & D & C \ar[l]^h}
\end{align*}
These are \textbf{commutative diagrams}\index{commutative diagram}%
\index{diagram!commutative ---}
in the sense that any two paths from one vertex to another represent
the same arrow.\footnote{One can define commutative diagrams
    formally.  A \textbf{diagram}\index{diagram} is a
homo\-morph\-ism from a directed graph to a category.  One then thinks of the
diagram as the graph with its nodes and arrows labelled with their
images in the category.  The diagram is
\textbf{commutative} if every 
path in the graph with the same tail and head is sent to the same
arrow in the category.}
The
arrows of a category are also called \textbf{morphisms.}\index{morphism}  
The class of morphisms from $A$ to $B$ can be denoted by
\begin{equation*}
  \Hom{A,B}.
\end{equation*}
The morphism
$f\circ g$ is the \textbf{composite}\index{composite} of $f$ and $g$.

A
category is \textbf{concrete}\index{concrete} if each of its objects has an underlying
set and the morphisms are functions in the way suggested by the notation.
For example,
  the class of sets, with the class of functions, is a concrete
  category; likewise the class of groups, with homomorphisms, and the
  class of topological spaces, with continuous functions. 
However, not all categories are concrete.  For example,
if $G$ is a group, then its elements can be considered as objects of
  a category in which $\Hom{a,b}=\{ba\inv\}$, $\id_a=1$, and $c\circ
  d=cd$.


In a category, a morphism $f$ is an \textbf{isomorphism}\index{isomorphism} if
\begin{equation*}
  g\circ f=\id_{t(f)}\quad\text{ and }\quad f\circ g=\id_{h(f)}
\end{equation*}
for some morphism $g$; then $g$ is an \textbf{inverse}\index{inverse} of $f$.

\begin{theorem}
  In a category, inverses are unique.
\end{theorem}

\begin{proof}
  If $g$ and $h$ are inverses of $f$, then
  $g=g\circ\id_{h(f)}=g\circ(f\circ h)=(g\circ f)\circ h=
  \id_{t(f)}\circ h=h$.
\end{proof}

If it exists, then the inverse of $f$ is $f\inv$.  It is immediate
then that $(f\inv)\inv=f$.


\end{comment}

\begin{comment}


\chapter{Group-actions}\label{App:ga}

%This chapter is a suggested reference from p.\ \pageref{app-ref}.
The chapter is partially inspired by an expository article
\cite{MR1997347} by Serre.
Suppose a group $G$ acts on a set $\setactedon$ by $(g,x)\mapsto gx$.
Just as, for an
element $a$ of $\setactedon$, we define
\begin{equation*}
  G_a=\{g\in G\colon ga=a\},
\end{equation*}
so, for an element $g$ of $G$, we may define
\begin{equation*}
  \setactedon^g=\{x\in\setactedon\colon gx=x\}:
\end{equation*}
this is the set of \textbf{fixed points}\index{fixed point} of $g$.
The orbit of $a$ under the action of $G$ is defined by
\begin{equation*}
  Ga=\{ga\colon g\in G\}.
\end{equation*}
Then $ga=ha\iff gG_a=hG_a$, and therefore
\begin{equation*}
  \size{Ga}=[G:G_a],
\end{equation*}
and the sets $Ga$ partition $G$.  We may define
\begin{equation*}
  {\setactedon}/G=\{Gx\colon x\in {\setactedon}\}.
\end{equation*}
Assume $G$ is finite.
For any function $\phi$ from $G$ to $\R$ and subset $X$ of $G$, we
define 
\begin{align*}
  \int_X\phi&=\sum_{g\in X}\frac{\phi(g)}{\size
  G},&
\int\phi&=\int_G\phi. 
\end{align*}
Assume ${\setactedon}$ is also finite, and
let $\chi$ be the function
\begin{equation*}
  g\mapsto\size{{\setactedon}^g}
\end{equation*}
from $G$ to $\upomega$.

\begin{lemma}[Burnside]\index{Burnside Lemma} \index{theorem!Burnside
    Lemma} 
  $\size{{\setactedon}/G}=\int\chi$.
\end{lemma}

\begin{proof}
Letting $R=\{(g,x)\in G\times\setactedon\colon gx=x\}$, we define $\pi_G$ as
$(g,x)\mapsto g$ from $R$ to $G$, and $\pi_{\setactedon}$ as
$(g,x)\mapsto x$ from $R$ to $\setactedon$.
Then
\begin{equation*}
  \size R=\sum_{g\in G}\size{\pi_G{}\inv(g)}=\sum_{g\in G}\chi(g),
\end{equation*}
but also
\begin{equation*}
\size{R}
=\sum_{x\in {\setactedon}}\size{G_x}
= \sum_{C\in {\setactedon}/G}\sum_{x\in C}\size{G_x}.
\end{equation*}
But if $C\in {\setactedon}/G$ and $a\in C$, then $C=[G:G_a]$.  Hence
\begin{equation*}
 \sum_{C\in {\setactedon}/G}\sum_{x\in C}\size{G_x}
=  \sum_{C\in {\setactedon}/G}\sum_{x\in C}\frac{\size G}{\size C}
=  \sum_{C\in {\setactedon}/G}\size G
=\size{{\setactedon}/G}\cdot\size G.\qedhere
\end{equation*}
\end{proof}
Now define
\begin{equation*}
  G_0=\{g\in G\colon {\setactedon}^g=\emptyset\},
\end{equation*}
the set of elements of $G$ with no fixed points.

\begin{theorem}[Jordan]\index{Jordan Theorem} \index{theorem!Jordan Th---}
  If $\size{{\setactedon}/G}=1$ and $\size {\setactedon}\geq 2$, then 
  \begin{equation*}
      G_0\neq\emptyset.
  \end{equation*}
\end{theorem}

\begin{proof}
  By the Burnside Lemma, the average size of ${\setactedon}^g$ is $1$.  Since
  ${\setactedon}^1={\setactedon}$, and $\size {\setactedon}\geq 2$, we must have $\size {\setactedon}^g<1$ for some
  $g$ in $G$.
\end{proof}

A stronger result is the following:

\begin{theorem}[Cameron--Cohen]\index{Cameron--Cohen Theorem}
  If $\size{{\setactedon}/G}=1$ and $\size {\setactedon}\geq 2$, then 
  \begin{equation*}
    \size{G_0}\cdot\size
  {\setactedon}\geq\size G.
  \end{equation*}
\end{theorem}

\begin{proof}
  The action of $G$ on ${\setactedon}$ induces an action on ${\setactedon}\times {\setactedon}$, and
  $\size{({\setactedon}\times {\setactedon})^g}=\chi(g)^2$.  Now, $({\setactedon}\times {\setactedon})/G$ contains the
  diagonal $G(1,1)$ and at least one other element, so
  \begin{equation*}
    \int\chi^2\geq 2
  \end{equation*}
by Burnside's Lemma.  Let $n=\size {\setactedon}$.  Then for all $g$
in $G\setminus G_0$, we have $1\leq\chi(g)\leq n$ and therefore
\begin{equation*}
(\chi(g)-1)(\chi(g)-n)\leq0;
\end{equation*}
but $(\chi(g)-1)(\chi(g)-n)=n$ when $g\in G_0$.
Consequently,
\begin{align*}
  \frac{\size{G_0}\cdot\size {\setactedon}}{\size G}=n\int_{G_0}1
  &=     \int_{G_0}(\chi-1)(\chi-n) \\
  &\geq  \int_G(\chi-1)(\chi-n)=
  \int_G(\chi^2-1)\geq1.\qedhere 
\end{align*}
\end{proof}

Serre's article gives applications to topology and number-theory.


\end{comment}

\begin{comment}


\chapter{Extra theorems}

Here are some results that used to be in the main text.

The first three theorems below fit after \S\ref{sect:cosets} (p.~\pageref{sect:cosets}).
Here $HK$ has the obvious meaning of $\{xy\colon x\in H\land y\in K\}$.  It need not be a group.  For example, in $\Sym 3$, if $H=\gpgen{(0\cdiv 1)}$ and $K=\gpgen{(0\cdiv 2)}$, then $HK=\{\gid,(0\cdiv 1),(0\cdiv 2),(0\cdiv 2\cdiv 1)\}$, which is not a group.

\begin{theorem}
  If $H$ and $K$ are finite subgroups of some group, then
  \begin{equation*}
    \order{HK}=\frac{\order H\order K}{\order{H\cap K}}.
  \end{equation*}
\end{theorem}

\begin{proof}
Since $H\cap K$ is a group by Theorem~\ref{thm:subgroups}, and $H\cap K\included H$, we have $H\cap K\subgp H$.  By Theorem~\ref{thm:cosets}, for some $n$ in $\N$, for some $a_0$, \dots, $a_{n-1}$ in $H$, we now have
\begin{equation*}
H=a_0(H\cap K)\cup\dotsb\cup a_{n-1}(H\cap K),
\end{equation*}
the union being disjoint.  Then
  $\order H=n\order{H\cap K}$.  Also, immediately
  \begin{equation*}
a_0K\cup\dotsb\cup a_{n-1}K\included HK.
\end{equation*}
We have also the reverse inclusion, since if $h\in H$ and $k\in K$, then $h=a_ik_1$ for some $i$ in $n$ and some $k_1$ in $H\cap K$, so that $hk=a_ik_1k$, which is in $a_iK$.  Thus
  \begin{equation*}
    a_0K\cup\dotsb\cup a_{n-1}K=HK.
  \end{equation*}
This union is disjoint.  For, suppose $a_ik_i=a_jk_j$, where $k_i$ and
$k_j$ are in $K$.
Then $a_j{}\inv a_i=k_jk_i{}\inv$, which belongs both to $H$ and to $K$.  Thus $a_j{}\inv a_i\in H\cap K$.  Hence we must have $a_i(H\cap K)=a_j(H\cap
K)$, so that $a_i=a_j$.  So the union above is disjoint, and therefore $\order{HK}=n\order K$.
\end{proof}

Note that in the foregoing theorem and proof, we have no need to name the group of which $H$ and $K$ are subgroups.  If this group is $G$, then we have $\size G\geq\size{HK}$, and so
\begin{equation*}
[G:H]=\frac{\size G}{\size H}\geq\frac{\size K}{\size{H\cap K}}=[K\colon H\cap K].
\end{equation*}
We proved this under the assumption that $H$ and $K$ are finite; but we can do without this assumption as follows:

\begin{theorem}\label{thm:HHK}
  Suppose $H$ and $K$ are subgroups of a group $G$.
  Then
  \begin{equation}\label{eqn:KHK}
    [H:H\cap K]\leq[G:K].
  \end{equation}
  If $[G:K]$ is finite, then it is equal to $[H:H\cap K]$ if and only if $G=HK$.
\end{theorem}

\begin{proof}
  In the proof of the last theorem, we showed in effect that the function
  $x(H\cap K)\mapsto xK$ from $H/(H\cap K)$ to $G/K$ is injective.  This gives \eqref{eqn:KHK}.  The function is
  surjective if and only if $G=HK$.
\end{proof}

\begin{theorem}
Suppose $H$ and $K$ are subgroups of a group $G$.
Then
  \begin{equation*}
    [G:H\cap K]\leq[G:H][G:K],
  \end{equation*}
If $[G:H]$ and $[G:K]$ are finite, then their product is equal to $[G:H\cap K]$
if and only if $G=HK$.
\end{theorem}

\begin{proof}
  By Theorems~\ref{thm:KHG} and~\ref{thm:HHK},
  \begin{equation*}
[G:H\cap
    K]=[G:H][H:H\cap K]\leq [G:H][G:K].
\end{equation*}
Similarly the rest follows.
\end{proof}

The following is another corollary of Theorem~\ref{thm:hom-n} in \S\ref{sect:normal} (p.~\pageref{thm:hom-n}).

\begin{corollary}
  If $f$ is a homomorphism from $G$ to $H$, and $N$ is a normal subgroup
  of $G$, and $M\nsubgp H$, and $f[N]\subgp M$, then there is a
  homomorphism $\tilde f$ from $G/N$ to $H/M$ such that the following
  diagram commutes:
  \begin{equation*}
    \xymatrix{
G \ar[r]^{\uppi}\ar[d]_f & G/N\ar[d]^{\tilde f}\\
H \ar[r]_{\uppi}         & H/M
}
  \end{equation*}
\end{corollary}

\begin{proof}
$N<\Ker{\uppi\circ f}$.
\end{proof}

The next lemma is from the end of \S\ref{sect:normal} (p.~\pageref{sect:fin}).
The lemma was used immediately for a theorem,
which is now Theorem~\ref{thm:GNKN} (p.~\pageref{thm:GNKN}).

\begin{lemma}
  If $f$ is an epimorphism from $G$ onto $H$, then there is a
  one-to-one correspondence $K\mapsto f[K]$ between subgroups of $G$
  that include 
  $\Ker f$ and subgroups of $H$; under this, normal subgroups
  correspond.
  \begin{equation*}
    \xymatrix{
K  \ar[r]\ar[d] & G \ar@{>>}[d]^f\\
f[K] \ar[r] & H
}
  \end{equation*}
\end{lemma}

The next theorem is a refinement of Theorem~\ref{thm:3-cycles} (p.~\pageref{thm:3-cycles}).

\begin{theorem}
  $\Alt n$ is generated by the $3$-cycles 
$\begin{pmatrix}
    0 & 1 & k
  \end{pmatrix}$,
where $1<k<n$.
\end{theorem}

\begin{proof}
If $a$, $b$, and $c$ are distinct elements of $n\setminus\{0,1\}$, then
\begin{align*}
  \begin{pmatrix}
    0 & a & b
  \end{pmatrix}
&= \begin{pmatrix}
    0 & 1 & b
  \end{pmatrix}
  \begin{pmatrix}
    a & 1 & 0
  \end{pmatrix}
= \begin{pmatrix}
    0 & 1 & b
  \end{pmatrix}
  \begin{pmatrix}
    0 & 1 & a
  \end{pmatrix}\inv,\\
\begin{pmatrix}
1 & a & b
\end{pmatrix}
&= \begin{pmatrix}
    1 & 0 & b
  \end{pmatrix}
  \begin{pmatrix}
    a & 0 & 1
  \end{pmatrix}
= \begin{pmatrix}
    0 & 1 & b
  \end{pmatrix}\inv
  \begin{pmatrix}
    0 & 1 & a
  \end{pmatrix},\\
\begin{pmatrix}
a & b & c  
\end{pmatrix}
&=
\begin{pmatrix}
  c&1&0
\end{pmatrix}
\begin{pmatrix}
  0 & a & b
\end{pmatrix}
\begin{pmatrix}
  0 & 1 & c
\end{pmatrix}.\qedhere
\end{align*}
\end{proof}

The following gives an 
equivalent formulation of prime ideals (the original definition being given on p.\ \pageref{prime}).

\begin{theorem}
  A proper ideal $P$ of a ring is prime if and only if, for all ideals $I$
  and $J$ of the ring,
  \begin{equation}\label{eqn:IJ}
    IJ\included P\iff I\included P\lor J\included P.
  \end{equation}
\end{theorem}

\begin{proof}
  The given condition has~\eqref{eqn:p-ideal} as a special case, since
  the latter can be written as
  \begin{equation*}
    (a)(b)\included P\implies(a)\included P\lor (b)\included P.
  \end{equation*}
Also, if~\eqref{eqn:IJ} fails, so that $IJ\included P$, but
$I\setminus P$ contains some $a$, and $J\setminus P$ contains some
$b$, then $ab\in P$, so~\eqref{eqn:p-ideal} fails.
\end{proof}


Here is a more direct proof of Theorem~\ref{thm:PID->UFD} 
(p.\ \pageref{thm:PID->UFD}),
that every \ufd\ is a \pid.


\begin{proof}
They do exist, because by the last theorem,
a tree of factorizations can have no infinite branches.  
That is, let $N$ be the set of non-zero non-irreducible non-units in some \pid,
and suppose $a\in N$.
Then $a$ is a product $a_{(0)}\cdot a_{(1)}$ of non-units.
If one of these $a_{(i)}$ is in $N$,
it is a product $a_{(i,0)}\cdot a_{(i,1)}$ of non-units.
Otherwise, we let $a_{(i,0)}=a_{(i)}$, but $a_{(i,1)}$ is undefined.
In general, if $a_{(n_0,\dots,n_{k-1})}$ has been defined and is in $N$, we let
\begin{equation*}
a_{(n_0,\dots,n_{k-1})}=a_{(n_0,\dots,n_{k-1},0)}\cdot a_{(n_0,\dots,n_{k-1},1)},
\end{equation*}
where each of the $a_{(n_0,\dots,n_{k-1},i)}$ is a non-unit;
but if $a_{(n_0,\dots,n_{k-1})}$ is irreducible, we let
\begin{equation*}
a_{(n_0,\dots,n_{k-1},0)}=a_{(n_0,\dots,n_{k-1})},
\end{equation*}
while $a_{(n_0,\dots,n_{k-1},1)}$ is undefined.
Strictly we need to use the Axiom of Choice here:
assuming we have well-ordered the ring, we let $a_{(n_0,\dots,n_{k-1},0)}$ be the \emph{least} possibility that meets our conditions.
Thus we obtain a non-zero non-unit $a_{\sigma}$ 
for each $\sigma$ in a subset $B$ 
of the set $\bigcup_{k\in\upomega}2^k$ of finite binary sequences.
Moreover, for each $k$ in $\upomega$,
\begin{equation*}
a=\prod_{\sigma\in 2^k\cap B}a_{\sigma}.
\end{equation*}
We claim that, for some $k$ in $\upomega$, for each $\sigma$ in $2^k\cap B$,
the factor $a_{\sigma}$ is irreducible.
For, let
\begin{equation*}
A=\{\emptyset\}
\cup\bigcup_{k\in\upomega}
\{\sigma\in 2^{k+1}\cap B\colon a_{\sigma\restriction k}\in N\}.
\end{equation*}
Then for all $k$ in $\upomega$, for all $\sigma$ in $2^{k+1}$,
\begin{equation*}
\sigma\in A\implies\sigma\restriction k\in A.
\end{equation*}
If $A$ is finite, then for some $n$ in $\upomega$, 
we have $A\included\bigcup_{k<n}2^k$,
and then $a_{\sigma}$ is irreducible for each $\sigma$ in $2^k\cap B$.
Moreover, $A$ \emph{is} finite.
For, if $A$ is infinite, then, by the result known as \emph{K\"onig's Lemma,}
$A$ has an infinite \emph{branch,} that is, there is $\tau$ in $2^{\upomega}$ such that
\begin{equation}\label{eqn:branch}
\{\tau\restriction k\colon k\in\upomega\}\included A.
\end{equation}
Indeed, suppose $A$ is infinite.
We can define $\tau$ recursively as follows.
Suppose $\tau\restriction n$ has been defined,
and the set $\{\sigma\in A\colon\sigma\restriction n=\tau\restriction n\}$ is infinite.
(The notation $\sigma\restriction n$ is meaningful 
only if the domain of $\sigma$ includes $n$.)
Then this set contains $\tau\restriction n$, and every other element is an element of one of the two sets
\begin{equation*}
\{\sigma\in A\colon\sigma\restriction n=\tau\restriction n\And\sigma(n)=i\},
\end{equation*}
where $i\in 2$.
One of these two sets must then be infinite.
If it is infinite when $i=0$, we let $\tau(n)=0$; otherwise $\tau(n)=1$.
Then \eqref{eqn:branch} is satisfied.
But then
\begin{equation*}
(a)=(a_{\tau\restriction 0})
\pincluded(a_{\tau\restriction 1})
\pincluded(a_{\tau\restriction 2})\pincluded\cdots
\end{equation*}
which contradicts the last theorem. 
\end{proof}


\end{comment}

%\bibliographystyle{plain}
%\bibliography{../../references}
%\bibliography{../references}

\def\rasp{\leavevmode\raise.45ex\hbox{$\rhook$}} \def\cprime{$'$}
  \def\cprime{$'$} \def\cprime{$'$} \def\cprime{$'$}
\begin{thebibliography}{10}

\bibitem{Burali-Forti}
Cesare Burali-Forti.
\newblock A question on transfinite numbers.
\newblock In van Heijenoort \cite{MR1890980}, pages 104--12.
\newblock First published 1897.

\bibitem{MR1517828}
Josephine~E. Burns.
\newblock The {F}oundation {P}eriod in the {H}istory of {G}roup {T}heory.
\newblock {\em Amer. Math. Monthly}, 20(5):141--148, 1913.

\bibitem{MR0103812}
Chen~Chung Chang.
\newblock On unions of chains of models.
\newblock {\em Proc. Amer. Math. Soc.}, 10:120--127, 1959.

\bibitem{Cohn-ANT}
Harvey Cohn.
\newblock {\em Advanced Number Theory}.
\newblock Dover, New York, 1980.
\newblock Corrected republication of 1962 edition.

\bibitem{MR0159773}
Richard Dedekind.
\newblock {\em Essays on the theory of numbers. {I}: {C}ontinuity and
  irrational numbers. {II}: {T}he nature and meaning of numbers}.
\newblock authorized translation by Wooster Woodruff Beman. Dover Publications
  Inc., New York, 1963.

\bibitem{Euclid-Heiberg}
Euclid.
\newblock {\em Euclidis {E}lementa}, volume~I of {\em Euclidis Opera Omnia}.
\newblock Teubner, 1883.
\newblock Edidit et Latine interpretatvs est I. L. Heiberg.

\bibitem{MR17:814b}
Euclid.
\newblock {\em The thirteen books of {E}uclid's {E}lements translated from the
  text of {H}eiberg. {V}ol. {I}: {I}ntroduction and {B}ooks {I}, {I}{I}. {V}ol.
  {I}{I}: {B}ooks {I}{I}{I}--{I}{X}. {V}ol. {I}{I}{I}: {B}ooks
  {X}--{X}{I}{I}{I} and {A}ppendix}.
\newblock Dover Publications Inc., New York, 1956.
\newblock Translated with introduction and commentary by Thomas L. Heath, 2nd
  ed.

\bibitem{MR1932864}
Euclid.
\newblock {\em Euclid's {E}lements}.
\newblock Green Lion Press, Santa Fe, NM, 2002.
\newblock All thirteen books complete in one volume. The Thomas L. Heath
  translation, edited by Dana Densmore.

\bibitem{Gauss}
Carl~Friedrich Gauss.
\newblock {\em Disquisitiones Arithmeticae}.
\newblock Springer-Verlag, New York, 1986.
\newblock Translated into English by Arthur A. Clarke, revised by William C.
  Waterhouse.

\bibitem{Gauss-Latin}
Carolo~Friderico Gauss.
\newblock {\em Disquisitiones Arithmeticae}.
\newblock Gerh.\ Fleischer Jun., Lipsiae, 1801.
\newblock Electronic version of the original Latin text from Goettingen State
  and University Library.

\bibitem{MR533669}
K.~R. Goodearl.
\newblock {\em von {N}eumann regular rings}, volume~4 of {\em Monographs and
  Studies in Mathematics}.
\newblock Pitman (Advanced Publishing Program), Boston, Mass., 1979.

\bibitem{MR2467561}
Timothy Gowers, June Barrow-Green, and Imre Leader, editors.
\newblock {\em The {P}rinceton companion to mathematics}.
\newblock Princeton University Press, Princeton, NJ, 2008.

\bibitem{MR1487370}
Joel~David Hamkins.
\newblock Every group has a terminating transfinite automorphism tower.
\newblock {\em Proc. Amer. Math. Soc.}, 126(11):3223--3226, 1998.

\bibitem{MR568909}
G.~H. Hardy and E.~M. Wright.
\newblock {\em An introduction to the theory of numbers}.
\newblock The Clarendon Press Oxford University Press, New York, fifth edition,
  1979.

\bibitem{German}
Roe-Merrill~S. Heffner.
\newblock {\em Brief {G}erman Grammar}.
\newblock D. C. Heath and Company, Boston, 1931.

\bibitem{MR0120156}
Leon Henkin.
\newblock On mathematical induction.
\newblock {\em Amer. Math. Monthly}, 67:323--338, 1960.

\bibitem{MR94e:03002}
Wilfrid Hodges.
\newblock {\em Model theory}, volume~42 of {\em Encyclopedia of Mathematics and
  its Applications}.
\newblock Cambridge University Press, Cambridge, 1993.

\bibitem{Hodges-Building}
Wilfrid Hodges.
\newblock {\em Building models by games}.
\newblock Dover Publications, Mineola, New York, 2006.
\newblock original publication, 1985.

\bibitem{MR600654}
Thomas~W. Hungerford.
\newblock {\em Algebra}, volume~73 of {\em Graduate Texts in Mathematics}.
\newblock Springer-Verlag, New York, 1980.
\newblock Reprint of the 1974 original.

\bibitem{MR0472307}
Morris Kline.
\newblock {\em Mathematical thought from ancient to modern times}.
\newblock Oxford University Press, New York, 1972.

\bibitem{Kuratowski-Zorn}
Casimir Kuratowski.
\newblock Une m{\'e}thode d'{\'e}limination des nombres transfinis des
  raisonnements math{\'e}matiques.
\newblock {\em Fundamenta Mathematicae}, 3(1):76--108, 1922.

\bibitem{MR12:397m}
Edmund Landau.
\newblock {\em Foundations of Analysis. {T}he Arithmetic of Whole, Rational,
  Irrational and Complex Numbers}.
\newblock Chelsea Publishing Company, New York, N.Y., third edition, 1966.
\newblock translated by F. Steinhardt; first edition 1951; first German
  publication, 1929.

\bibitem{Lang-alg}
Serge Lang.
\newblock {\em Algebra}.
\newblock Addison-Wesley, Reading, Massachusetts, third edition, 1993.
\newblock reprinted with corrections, 1997.

\bibitem{MR0075156}
Jerzy {\L}o{\'s}.
\newblock Quelques remarques, th\'eor\`emes et probl\`emes sur les classes
  d\'efinissables d'alg\`ebres.
\newblock In {\em Mathematical interpretation of formal systems}, pages
  98--113. North-Holland Publishing Co., Amsterdam, 1955.

\bibitem{MR0089813}
Jerzy {\L}o{\'s} and Roman Suszko.
\newblock On the extending of models ({IV}): {I}nfinite sums of models.
\newblock {\em Fund. Math.}, 44:52--60, 1957.

\bibitem{MR1924282}
David Marker.
\newblock {\em Model theory: an introduction}, volume 217 of {\em Graduate
  Texts in Mathematics}.
\newblock Springer-Verlag, New York, 2002.

\bibitem{MR0098777}
James~H. McKay.
\newblock Another proof of {C}auchy's group theorem.
\newblock {\em Amer. Math. Monthly}, 66:119, 1959.

\bibitem{LaTeX-Comp}
Frank Mittelbach and Michel Goossens.
\newblock {\em The {\LaTeX} Companion}.
\newblock Addison Wesley, Boston, second edition, August 2004.
\newblock With Johannes Braams, David Carlisle, and Chris Rowley; second
  printing (with corrections).

\bibitem{Peano}
Giuseppe Peano.
\newblock The principles of arithmetic, presented by a new method.
\newblock In van Heijenoort \cite{MR1890980}, pages 83--97.
\newblock first published 1889.

\bibitem{MR1200456}
Proclus.
\newblock {\em A commentary on the first book of {E}uclid's \emph{{E}lements}}.
\newblock Princeton Paperbacks. Princeton University Press, Princeton, NJ,
  1992.
\newblock Translated from the Greek and with an introduction and notes by Glenn
  R. Morrow, reprint of the 1970 edition, with a foreword by Ian Mueller.

\bibitem{Russell-letter}
Bertrand Russell.
\newblock Letter to {F}rege.
\newblock In van Heijenoort \cite{MR1890980}, pages 124--5.
\newblock First published 1902.

\bibitem{Skolem-some-remarks}
Thoralf Skolem.
\newblock Some remarks on axiomatized set theory.
\newblock In van Heijenoort \cite{MR1890980}, pages 290--301.
\newblock First published 1922.

\bibitem{MR1501865}
M.~H. Stone.
\newblock The theory of representations for {B}oolean algebras.
\newblock {\em Trans. Amer. Math. Soc.}, 40(1):37--111, 1936.

\bibitem{MR13:419b}
Ivor Thomas, editor.
\newblock {\em Selections illustrating the history of {G}reek mathematics.
  {V}ol. {II}. {F}rom {A}ristarchus to {P}appus}, volume 362 of {\em Loeb
  Classical Library}.
\newblock Harvard University Press, Cambridge, Mass, 1951.
\newblock With an English translation by the editor.

\bibitem{MR801316}
Simon Thomas.
\newblock The automorphism tower problem.
\newblock {\em Proc. Amer. Math. Soc.}, 95(2):166--168, 1985.

\bibitem{MR1890980}
Jean van Heijenoort, editor.
\newblock {\em From {F}rege to {G}\"odel: {A} source book in mathematical
  logic, 1879--1931}.
\newblock Harvard University Press, Cambridge, MA, 2002.

\bibitem{von-Neumann-ax}
John von Neumann.
\newblock An axiomatization of set theory.
\newblock In van Heijenoort \cite{MR1890980}, pages 393--413.
\newblock First published 1925.

\bibitem{von-Neumann}
John von Neumann.
\newblock On the introduction of transfinite numbers.
\newblock In van Heijenoort \cite{MR1890980}, pages 346--354.
\newblock First published 1923.

\bibitem{Zermelo-invest}
Ernst Zermelo.
\newblock Investigations in the foundations of set theory {I}.
\newblock In van Heijenoort \cite{MR1890980}, pages 199--215.
\newblock First published 1908.

\bibitem{MR1563165}
Max Zorn.
\newblock A remark on method in transfinite algebra.
\newblock {\em Bull. Amer. Math. Soc.}, 41(10):667--670, 1935.

\end{thebibliography}


%\printindex

\end{document}