\documentclass[%
version=last,%
a5paper,
11pt,%
headings=small,%
bibliography=totoc,%
index=totoc,%
twoside,%
reqno,%
cleardoublepage=empty,%
open=any,%
%parskip=half,%
draft=true,%
%DIV=classic,%
DIV=12,%
%headinclude=true,%
pagesize]
{scrbook}
%\usepackage[notref,notcite]{showkeys}
%\usepackage[headsepline]{scrpage2}
\usepackage{scrpage2}
\pagestyle{scrheadings}
\clearscrheadings
\ofoot{\pagemark}
\ifoot{\headmark}
\usepackage{pstricks}
\renewcommand{\captionformat}{\ }
\usepackage{cclicenses}
\usepackage{url}
%%%%%%%%%%%%%% TIME %%%%%%%%%%%%%%%
\usepackage{calc}
\newcounter{ampm}\newcounter{hours}\newcounter{minutes}
\newcommand{\printtime}{%
\setcounter{ampm}{\time/720}%
\setcounter{hours}{(\time-\value{ampm}*720)/60}%
\setcounter{minutes}{\time-\value{ampm}*720-\value{hours}*60}%
\ifthenelse{\value{ampm}=0}%
{\ifthenelse{\value{minutes}>9}%
{\thehours:\theminutes~a.m.}%
{\thehours:0\theminutes~a.m.}}%
{\ifthenelse{\value{minutes}>9}%
{\thehours:\theminutes~p.m.}%
{\thehours:0\theminutes~p.m.}}} %
% code adapted from the
% LaTeX Companion (2d ed), p. 871
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{relsize} % Here \smaller scales by 1/1.2; \relscale{X} scales by X
\renewenvironment{quote}{\begin{list}{}
{\relscale{.90}\setlength{\leftmargin}{0.05\textwidth}
\setlength{\rightmargin}{\leftmargin}}
\item[]}
{\end{list}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage[polutonikogreek,english]{babel}
% From the Greek Text Society
%\usepackage{gfsneohellenic}
\usepackage{gfsporson}
\newcommand{\gk}[1]{\foreignlanguage{polutonikogreek}{%
%\relscale{0.8}
\textporson{%
#1}}%
}% Greek text
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\bce}{\textsc{b.c.e.}}
\newcommand{\ce}{\textsc{c.e.}}
\usepackage{amsmath,amssymb,amsthm,amscd}
\allowdisplaybreaks
\usepackage[mathscr]{euscript}
\usepackage{upgreek}
\usepackage{multicol}
\usepackage{stmaryrd} % \triangle{left,right}eqslant
\usepackage[matrix,arrow]{xy}
\usepackage{hfoldsty}
\usepackage{verbatim}
\usepackage[neverdecrease]{paralist}
\usepackage{graphicx,rotating} % for the German script picture
%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Theorems
%
%%%%%%%%%%%%%%%%%%%%%%%%
%\swapnumbers
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem{porism}{Porism}
\newtheorem{corollary}{Corollary}
\numberwithin{porism}{theorem}
\numberwithin{corollary}{theorem}
\theoremstyle{definition}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{example}[theorem]{Example}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{makeidx}
\makeindex
\newcommand{\zfc}{\mathrm{ZFC}}
\newcommand{\zf}{\mathrm{ZF}}
\newcommand{\lto}{\Rightarrow}
\newcommand{\liff}{\Leftrightarrow}
%\renewcommand{\land}{\mathrel{\&}}
\usepackage{bm}
\newcommand{\tuple}[1]{\bm{#1}}
\newcommand{\included}{\subseteq} % [the name suggests the meaning here]
\newcommand{\nincluded}{\nsubseteq} % [the name suggests the meaning here]
\newcommand{\pincluded}{\subset} % [the name suggests the meaning here]
\renewcommand{\leq}{\leqslant}
\renewcommand{\geq}{\geqslant}
\renewcommand{\nleq}{\nleqslant}
\renewcommand{\emptyset}{\varnothing}
\renewcommand{\setminus}{\smallsetminus}
\let\standardphi\phi
\renewcommand{\phi}{\varphi}
\let\standardepsilon\epsilon
\renewcommand{\epsilon}{\varepsilon}
\newcommand{\stnd}[1]{\mathbb{#1}}
\newcommand{\N}{\stnd{N}}
\newcommand{\Z}{\stnd{Z}} % integers
\newcommand{\Q}{\stnd{Q}} % rationals
\newcommand{\Qp}{\Q^+} % positive rationals
\newcommand{\rc}[1]{#1^{\mathrm{rc}}} % real closure
\newcommand{\C}{\stnd{C}} % complex numbers
\newcommand{\R}{\stnd{R}} % real numbers
\newcommand{\Rp}{\R^+} % positive real numbers
\newcommand{\diff}[1][\R]{\textrm C_{\infty}(#1)}
\newcommand{\F}{\stnd{F}} %
\newcommand{\Ham}{\stnd{H}} % quaternions
\newcommand{\Oct}{\stnd{O}} % octonions
%\newcommand{\primes}{\stnd{P}} % prime numbers
\newcommand{\on}{\mathbf{ON}} % ordinals
\newcommand{\cn}{\mathbf{CN}} % cardinals
\DeclareMathOperator{\id}{id} % identity-map
%\newcommand{\id}{\operatorname{id}} % identity-map
\newcommand{\gid}{\mathrm e} % identity of group
\newcommand{\bv}{\mathbf e} % "basis vector"
\newcommand{\mE}{\mathrm E} % matrix with these as rows
\newcommand{\inv}{^{-1}} % mult. inverse
\newcommand{\It}{\mathbf{I}}
\newcommand{\Mat}[2][n\times n]{\operatorname M_{#1}(#2)}
\newcommand{\MatR}[1][n\times n]{\Mat[#1]{R}}
\newcommand{\MatZ}[1][n\times n]{\Mat[#1]{\Z}}
\newcommand{\GL}[2][n]{\operatorname{GL}_{#1}(#2)}
\newcommand{\GLZ}[1][n]{\GL[#1]{\Z}}
\newcommand{\GLR}[1][n]{\GL[#1]{R}}
\newcommand{\IM}{\mathrm I}
\newcommand{\Kfg}{\mathrm V_4} % Klein four group
\newcommand{\quat}{\mathrm Q_8} % Quaternion group
\newcommand{\str}[1]{\mathfrak{#1}} % structure
\newcommand{\qsep}{\;} % follows a quantified variable
\newcommand{\Forall}[1]{\forall{#1}\qsep }
\newcommand{\Exists}[1]{\exists{#1}\qsep }
\newcommand{\modsim}{/\mathord{\sim}} % modulo the eq-ren \sim
\newcommand{\eqc}[1]{[#1]} % equivalence-class
\newcommand{\divides}{\mathrel{|}}
\newcommand{\ndivides}{\mathrel{\nmid}}
\newcommand{\order}[1]{\lvert#1\rvert}
\newcommand{\gpgen}[1]{\langle#1\rangle}% subgroup generated by #1
\newcommand{\trivgp}{\{\gid\}} % trivial group
\newcommand{\nsubgen}[1]{\langle\langle#1\rangle\rangle}% normal subgroup generated by #1
\newcommand{\unordered}[2]{[#2]^{#1}} % unordered #1-tuples from #2
\newcommand{\free}[1]{\operatorname{F}(#1)} % free group on #1
\newcommand{\fggen}{I} % generating set of a free group
\newcommand{\gprels}{B} % relations
\newcommand{\setactedon}{A} % set acted on by a group
\newcommand{\setcolon}{\colon}
\newcommand{\subgp}{<} % subgroup
\newcommand{\nsubgp}{\vartriangleleft} % normal subgroup
\newcommand{\nnsubgp}{\ntriangleleft} % not a normal subgroup
\newcommand{\nsupgp}{\vartriangleright} % normal supergroup
%\newcommand{\nnsupgp}{\ntriangleright} % normal supergroup
\newcommand{\psubgp}{\lneqq}
\newcommand{\psupgp}{\gneqq}
\newcommand{\Ker}[1]{\ker(#1)}
%\DeclareMathOperator{\im}{im} % image
\newcommand{\im}[1]{\operatorname{im}(#1)}
\newcommand{\congruence}{\equiv}
\newcommand{\siml}{\congruence_{\ell}^H}
\newcommand{\simr}{\congruence_{\mathrm r}^H}
\newcommand{\weakprod}{\sideset{}{^{\mathrm{w}}}\prod}
\newcommand{\textweakprod}{\prod^{\mathrm w}}
\newcommand{\freeprod}{\sideset{}{^*}\prod}
\newcommand{\textfreeprod}{\prod^*}
\newcommand{\gpres}[2]{\gpgen{#1\mid#2}}% group on #1 with rel'ns #2
\newcommand{\centr}[1]{\operatorname{C}(#1)} % center
\newcommand{\cseries}[2]{\operatorname{C}_{#1}(#2)} % central series
%\newcommand{\cseriesplain}[1]{\operatorname{C}_{#1}} % central series
\newcommand{\centralizer}[2]{\operatorname{C}_{#2}(#1)} % centralizer
\newcommand{\normalizer}[2]{\operatorname{N}_{#2}(#1)}
\newcommand{\dsubgp}[2]{{#2}^{(#1)}} % n-th derived subgroup of #2, where n=#1.
\newcommand{\tsubgp}[1]{#1_{\mathrm{t}}} % torsion sub-group
\newcommand{\family}[1]{\mathcal{#1}} % family (of sets)
\newcommand{\class}[1]{\mathbf{#1}} % class
\newcommand{\units}[1]{{#1}^{\times}} % group of units of a ring
\newcommand{\Zmod}[1]{\Z_{#1}}
\newcommand{\Zmodu}[1]{\units{\Zmod{#1}}}
\DeclareMathOperator{\lcm}{lcm}
\newcommand{\rest}[1]{\restriction{#1}}% restriction of function to #1
\newcommand{\modulo}{\emph{modulo}}
\newcommand{\bracket}{\operatorname b} % (Lie) bracket
% Concerning permutations:
\newenvironment{cycle}{(}{)}
\newcommand{\cdiv}{\;\,} % space between terms in a cycle
\newcommand{\sgn}{\operatorname{sgn}}
\newcommand{\sq}[2][\sigma]{q_{#1}(#2)} % used to define sgn
\newcommand{\Sym}[1]{\operatorname{Sym}(#1)}
\newcommand{\Alt}[1]{\operatorname{Alt}(#1)} % alternating group
%\newcommand{\Dih}[1]{D_{#1}} % dihedral group
\newcommand{\Dih}[1]{\operatorname{Dih}(#1)} % dihedral group
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\mi}{\mspace{2mu}\mathrm i\mspace{2mu}}
\newcommand{\mj}{\mathrm j\mspace{2mu}}
\newcommand{\mk}{\mathrm k}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\newcommand{\setimb}[1]{[#1]} % image of a set, using brackets
\newcommand{\abs}[1]{\left\lvert#1\right\rvert} % absolute value
\newcommand{\size}[1]{\lvert#1\rvert} % cardinality
\newcommand{\so}[1]{\operatorname{E}(#1)}
\newcommand{\End}[1]{\operatorname{End}(#1)}
\newcommand{\Aut}[1]{\operatorname{Aut}(#1)}
\newcommand{\Hom}[1]{\operatorname{Hom}(#1)}
\newcommand{\Inn}[1]{\operatorname{Inn}(#1)}
\newcommand{\Der}[1]{\operatorname{Der}(#1)}
\newcommand{\pid}{\textsc{pid}}
\newcommand{\ufd}{\textsc{ufd}}
\newcommand{\ed}{\textsc{ed}}
%\newcommand{\pid}{PID}
%\newcommand{\Pid}{PID}
%\newcommand{\ufd}{UFD}
%\newcommand{\ed}{ED}
\newcommand{\primei}{\mathfrak{p}} % a prime ideal
\newcommand{\maxi}{\mathfrak{m}} % a maximal ideal
\newcommand{\supp}[1]{\operatorname{supp}(#1)}
\newcommand{\Supp}[1]{\operatorname{supp}[#1]}
\newcommand{\symdiff}{\mathbin{\vartriangle}}
\newcommand{\spec}[1]{\operatorname{Spec}(#1)}
%\newcommand{\lang}{\mathcal{L}} % a language or signature
\usepackage{mathrsfs}
\newcommand{\pow}[1]{\mathscr{P}(#1)} % power set
\let\oldsqrt\sqrt
\renewcommand{\sqrt}[2][1]{\oldsqrt{\vphantom{#1}}{#2}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\renewcommand{\theequation}{\roman{equation}}
%\renewcommand{\theequation}{\fnsymbol{equation}}
\renewcommand{\thepart}{\Roman{part}}
\begin{document}
\title{Groups and Rings}
\author{David Pierce}
\date{%December 17, 2014\\
%July 22, 2015}
October 1, 2015}
\publishers{Matematik B\"ol\"um\"u\\
Mimar Sinan G\"uzel Sanatlar \"Universitesi\\
\url{dpierce@msgsu.edu.tr}\\
\url{http://mat.msgsu.edu.tr/~dpierce/}}
\uppertitleback{\centering
\emph{Groups and Rings}\\
\mbox{}\\
This work is licensed under the\\
Creative Commons Attribution--Noncommercial--Share-Alike
License.\\
To view a copy of this license, visit\\
\url{http://creativecommons.org/licenses/by-nc-sa/3.0/}\\
\mbox{}\\
\cc \ccby David Pierce \ccnc \ccsa\\
\mbox{}\\
Mathematics Department\\
Mimar Sinan Fine Arts University\\
Istanbul, Turkey\\
\url{http://mat.msgsu.edu.tr/~dpierce/}\\
\url{dpierce@msgsu.edu.tr}
}
%\frontmatter
\maketitle
\chapter*{Preface}
There have been several versions of the present text.
\begin{asparaenum}
\item
The first draft was my record of the first semester
of the graduate course in algebra
given at Middle East Technical University in Ankara in 2008--9.
I had taught the same course also in 2003--4.
The main reference for the course was Hungerford's \emph{Algebra} \cite{MR600654}.
\item
I revised my notes when teaching algebra a third time, in 2009--10.
Here I started making some attempt to indicate how theorems were going to be used later.
What is now \S\ref{sect:N}
(the development of the natural numbers from the Peano Axioms)
was originally prepared for a course called Non-Standard Analysis,
given at the Nesin Mathematics Village,
\c Sirince, in the summer of 2009.
I built up the foundational Chapter~\ref{ch:N} around this section.
\item
Another revision, but only partial,
came in preparation for a course
at Mimar Sinan Fine Arts University in Istanbul in 2013--4.
I expanded Chapter \ref{ch:N},
out of a desire to give some indication of how
mathematics, and especially algebra, could be built up from some simple axioms
about the relation of membership---that is, from set theory.
This building up, however, is not part of the course proper.
\item
The present version of the notes represents a more thorough-going revision,
made during and after the course at Mimar Sinan.
I try to make more use of examples, introducing them as early as possible.
The number theory that has always been in the background has been integrated more explicitly into the text (see page~\pageref{no-th}).
I have tried to distinguish more clearly
between what is essential to the course and what is not;
the starred sections comprise most of what is not essential.
\end{asparaenum}
All along, I have treated groups,
not merely as structures satisfying certain axioms, but
as structures isomorphic to groups of symmetries of sets.
The equivalence of the two points of view has been established
in the theorem named for Cayley
(in \S\ref{sect:groups}, on page~\pageref{thm:Cay}).
Now it is pointed out (in that section) that
standard structures like
$(\Qp,1,{}\inv,\cdot)$
and
$(\Q,0,-,+)$,
are also groups,
even though they are not obviously symmetry groups.
Several of these structures
are constructed in Chapter \ref{ch:N}.
(In earlier editions they were constructed later.)
Symmetry groups as such are investigated more thoroughly now,
in \S\S\ref{sect:sym} and \ref{sect:ms},
\emph{before} the group axioms are simplified in \S\ref{sect:simp}.
Rings are defined in Part~\ref{part:groups}, on groups,
so that their groups of units are available as examples of groups,
especially in \S\ref{sect:semidirect} on semidirect products (page~\pageref{sect:semidirect}).
Also rings are needed to produce rings of matrices and their groups of units,
as in \S\ref{sect:gl} (page~\pageref{sect:gl}).
I give many page-number references,
first of all for my own convenience in the composition of the text at the computer.
Thus the capabilities of Leslie Lamport's \LaTeX\ program
in automating such references are invaluable.
Writing the text could hardly have been contemplated in the first place
without Donald Knuth's original \TeX\ program.
I now use the \texttt{scrbook} document class of \textsf{KOMA-Script,}
``developed by Markus Kohm and based on earlier work by Frank Neukam'' \cite[p.~236]{LaTeX-Comp}.
Ideally every theorem would have an historical reference.
This is a distant goal, but I have made some moves in this direction.
The only exercises in the text are the theorems whose proofs are not already supplied.
Ideally more exercises would be supplied, perhaps in the same manner.
%\newpage
\tableofcontents
\listoffigures
%\setcounter{chapter}{-1}
\part{Preliminaries}
\chapter{Introduction}
Published around 300 \bce,
the \emph{Elements} of Euclid
is a model of mathematical exposition.
Each of its thirteen books
consists mainly of statements followed by proofs.
The statements are usually called \textbf{Propositions}
today~\cite{MR17:814b,MR1932864},
although they have no particular title
in the original text \cite{Euclid-Heiberg}.
By their content, they can be understood as \emph{theorems} or \emph{problems.}
Writing six hundred years after Euclid,
Pappus of Alexandria explains the difference \cite[p.~566]{MR13:419b}:
\begin{quote}
Those who wish to make more skilful distinctions in geometry find it worthwhile to call
\begin{compactitem}
\item
a \textbf{problem} (\gk{pr'oblhma}),
that in which it is \emph{proposed}
%\linebreak
(\gk{pro\-b'al\-letai}) to do or construct something;
\item
a \textbf{theorem} (\gk{je'wrhma}),
that in which the consequences and necessary implications
of certain hypotheses \emph{are investigated} (\gk{jewre~itai}).
\end{compactitem}
%But among the ancients some described them all as problems, some as theorems.
\end{quote}
(The Greek letters are listed and discussed
in Appendix \ref{app:Greek}, p.\ \pageref{app:Greek}.)
For example, Euclid's first proposition
is the the problem of constructing an equilateral triangle.
His fifth proposition is the theorem
that the base angles of an isosceles triangle are equal to one another.
Each proposition of the present notes has one of four titles:
\textbf{Lemma, Theorem, Corollary,} or \textbf{Porism.}
Each proposition may be followed by an explicitly labelled proof,
which is terminated with a box $\qedsymbol$.
\emph{If there is no proof, the reader is expected to supply her or his own proof,
as an exercise.}
No propositions are to be accepted on faith.
Nonetheless, for an algebra course, some propositions are more important than others.
The full development of the foundational Chapter~\ref{ch:N} below
would take a course in itself,
but is not required for algebra as such.
In these notes, a proposition may be called a lemma
if it will be used to prove a theorem,
but then never used again.
Lemmas in these notes are numbered sequentially.
Theorems are also numbered sequentially,
independently from the lemmas.
A statement that can be proved easily from a theorem
is called a corollary and is numbered with the theorem.
So for example Theorem~\ref{thm:rec} on page~\pageref{thm:rec}
is followed by Corollary~\ref{cor:rec}.
Some propositions can be obtained easily,
not from a preceding theorem itself,
but from its proof.
Such propositions are called \emph{porisms}
and, like corollaries, are numbered with the theorems from whose proofs they are derived.
So for example Porism~\ref{por:prod} on p.\ \pageref{por:prod}
follows Theorem~\ref{thm:prod}.
The word \emph{porism} and its meaning are explained, in the 5th century \ce,
by Proclus in his commentary
on the first book of Euclid's \emph{Elements}~\cite[p.~212]{MR1200456}:
\begin{quote}
``Porism'' is a term applied to a certain kind of problem,
such as those in the \emph{Porisms} of Euclid.
But it is used in its special sense
when as a result of what is demonstrated
some other theorem comes to light without our propounding it.
Such a theorem is therefore called a ``porism,''
as being a kind of incidental gain resulting from the scientific demonstration.
\end{quote}
The translator explains that the word \emph{porism} comes from the verb \gk{por'izw},
meaning to furnish or provide.
The original source for much of the material of these notes
is Hungerford's \emph{Algebra} \cite{MR600654},
or sometimes Lang's \emph{Algebra} \cite{Lang-alg},
but there are various rearrangements and additions.
The back cover of Hungerford's book quotes a review:
\begin{quote}
Hungerford's exposition is clear enough that an average graduate student can read the text on his own and understand most of it.
\end{quote}
I myself aim for logical clarity;
but I do not intend for these notes to be a replacement for lectures in a classroom.
Such lectures may amplify some parts, while glossing over others.
As a graduate student myself,
I understood a course to consist of the teacher's lectures,
and the most useful reference was not a printed book,
but the notes that I took in my own hand.
I still occasionally refer to those notes today.
Hungerford is inspired by category theory,
of which his teacher Saunders Mac Lane was one of the creators.
Categories are defined in the present text
in \S\ref{sect:category} (page~\pageref{sect:category}).
The spirit of category theory is seen
at the beginning of Hungerford's Chapter I, ``Groups'':
\begin{quote}
There is a basic truth
that applies not only to groups
but also to many other algebraic objects
(for example, rings, modules, vector spaces, fields):
in order to study effectively an object
with a given algebraic structure,
it is necessary to study as well the functions
that preserve the given algebraic structure
(such functions are called homomorphisms).
\end{quote}
Hungerford's term \emph{object} here reflects the usage of category theory.
Taking inspiration from model theory,
the present notes will often use the term \emph{structure} instead.
Structures are defined in \S\ref{sect:structures} (page~\pageref{sect:structures}).
The examples of objects named by Hungerford are all structures
in the sense of model theory,
although not every object in a category is a structure in this sense.
When a word is printed in \textbf{boldface} in these notes,
the word is a technical term whose meaning can be inferred from the surrounding text.
\chapter{Mathematical foundations}\label{ch:N}%\label{part:N}
%\setcounter{section}{-1}
%\setchapterpreamble{
As suggested in the Introduction,
the full details of this chapter are not strictly part of an algebra course,
but are logically presupposed by the course.
One purpose of the chapter is to establish the notation whereby
\begin{align*}
\N&=\{1,2,3,\dots\},&
\upomega&=\{0,1,2,\dots\}.
\end{align*}
%$\N=\{1,2,3,\dots\}$ and $\upomega=\{0,1,2,\dots\}$.
The elements of $\upomega$ are the von-Neumann natural numbers,%%%%%
\footnote{\label{fn:Greek}%%%%%
The letter $\upomega$
is not the minuscule English letter called \emph{double u,}
but the minuscule Greek \emph{omega,}
which is probably in origin a double o.
Obtained with the control sequence \url{\upomega}
from the \url{upgreek} package
for \LaTeX,
the $\upomega$ used here is upright,
unlike the standard slanted $\omega$ (obtained with \url{\omega};
see Appendix \ref{app:Greek}, p.\ \pageref{app:Greek}).
The slanted $\omega$ might be used as a variable
(as for example on page~\pageref{omega}).
We shall similarly distinguish between the constant $\uppi$
(used for the ratio of the circumference to the diameter of a circle,
as well as for the \emph{canonical projection}
defined on page~\pageref{can-proj}
and the \emph{coordinate projections}
defined on pages~\pageref{coord-proj} and \pageref{coord-proj-2})
and the variable $\pi$ (pages~\pageref{pi} and \pageref{pi-2}).}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
so that if $n\in\upomega$, then
\begin{equation*}
n=\{0,\dots,n-1\}.
\end{equation*}
In particular, $n$ is itself a set with $n$ elements.
When $n=0$, this means $n$ is the empty set.
A cartesian power $A^n$ can be understood as the set of functions from $n$ to $A$.
Then a typical element of $A^n$ can be written as $(a_0,\dots,a_{n-1})$.
Most people write $(a_1,\dots,a_n)$ instead;
and when they want an $n$-element set, they use $\{1,\dots,n\}$.
This is a needless complication,
since it leaves us with no simple abbreviation for an $n$-element set.
Another purpose of this chapter is to review the construction,
not only of the sets $\N$ and $\upomega$,
but the sets $\Qp$, $\Q$, $\Z$, $\Rp$, and $\R$ derived from them.
We ultimately have certain \emph{structures,} namely:
\begin{compactitem}
\item
the \emph{semigroup} $(\N,+)$;
\item
\emph{monoids} $(\upomega,0,+)$ and $(\N,1,\cdot)$;
\item
\emph{groups} $(\Qp,1,{}\inv,\cdot)$, $(\Q,0,-,+)$, $(\Z,0,-,+)$, $(\Rp,1,{}\inv,\cdot)$, and $(\R,0,-,+)$;
\item
\emph{rings} $(\Z,0,-,+,1,\cdot)$, $(\Q,0,-,+,1,\cdot)$, and $(\R,0,-,+,1,\cdot)$.
\end{compactitem}
\section{Sets and geometry}\label{sect:sets}
Most objects of mathematical study can be understood as \emph{sets.}
A set is a special kind of \emph{collection.}
A \textbf{collection} is many things, considered as one. Those many
things are the
\textbf{members}%
\index{member}
or
\textbf{elements}%
\index{element}
of the collection. The members \textbf{compose} the collection, and the collection \textbf{comprises} them.\footnote{Thus the relations named by the verbs \emph{compose} and \emph{comprise} are converses of one another; but native English speakers often confuse these two verbs.} Each member \textbf{belongs} to the collection and is \textbf{in} the collection, and the collection \textbf{contains} the member.
Designating certain collections as sets,
we shall identify some properties of them
that will allow us to do the mathematics that we want.
These properties will be expressed by \emph{axioms.}
We shall use versions of the so-called Zermelo--Fraenkel Axioms
with the Axiom of Choice.
The collection of these axioms is denoted by $\zfc$.
Most of these axioms were described by Zermelo in 1908 \cite{Zermelo-invest}.
We study study sets axiomatically,
because a na\"\i ve approach can lead to contradictions.
For example, one might think na\"\i vely
that there was a collection of all collections.
But there can be no such collection,
because if there were, then there would be
a collection of all collections that did not contain themselves,
and \emph{this} collection would contain itself if and only if it did not.
This result is the \textbf{Russell Paradox,}\label{Russell}
described in a letter \cite{Russell-letter} from Russell to Frege in 1902.
The propositions of Euclid's \emph{Elements}
concern points and lines\label{line} in a plane and in space.
Some of these lines are \emph{straight} lines,
and some are circles.
Two straight lines that meet at a point make an \emph{angle.}
Unless otherwise stated, straight lines have endpoints.
It is possible to compare two straight lines, or two angles:
if they can be made to coincide, they are \emph{equal} to one another.
This is one of Euclid's so-called \emph{common notions.}
If a straight line has an endpoint on another straight line,
two angles are created.
If they are equal to one another, then they are called \emph{right angles.}
One of Euclid's \emph{postulates} is that all right angles are equal to one another.
The other postulates tell us things that we can do:
Given a center and radius, we can draw a circle.
From any given point to another, we can draw a straight line,
and we can extend an existing straight line beyond its endpoints;
indeed, given \emph{two} straight lines,
with another straight line cutting them
so as to make the interior angles on the same side together less than two right angles,
we can extend the first two straight lines so far that they will intersect one another.
Using the common notions and the postulates,
Euclid proves propositions:
the problems and theorems discussed in the Introduction above.
The common notions and the postulates
do not \emph{create} the plane or the space
in which the propositions are set.
The plane or the space exists already.
The Greek word \gk{gewmetr'ia} has the original meaning of \emph{earth measurement,}
that is, surveying.
People knew how to measure the earth long before Euclid's \emph{Elements} was written.
Similarly, people were doing mathematics long before set theory was developed.
Accordingly, the set theory presented here
will assume that sets already exist.
Where Euclid has postulates, we shall have axioms.
Where Euclid has definitions and common notions and certain unstated assumptions,
we shall have definitions and certain logical principles.
It is said of the \emph{Elements,}
\begin{quote}
A critical study of Euclid, with, of course, the advantage of present insights,
shows that he uses dozens of assumptions that he never states and undoubtedly did not recognize.\hfill\cite[p.~87]{MR0472307}
\end{quote}
One of these assumptions is that two circles will intersect
if each of them passes through the center of the other.
(This assumption is used to construct an equilateral triangle.)
But it is impossible to state \emph{all} of one's assumptions.
We shall assume, for example, that if a formal sentence $\Forall x\phi(x)$ is true,
what this means is that $\phi(a)$ is true for arbitrary $a$.\label{a}
\emph{This} means $\phi(b)$ is true, and $\phi(c)$ is true, and so on.
However, there is nothing at the moment called $a$ or $b$ or $c$ or whatever.
For that matter, we have no actual formula called $\phi$.
There is nothing called $x$,
and moreover there will never be anything called $x$
in the way that there might be something called $a$.
Nonetheless, we assume that everything we have said
about $\phi$, $x$, $a$, $b$, and $c$ makes sense.
The elements of every set will be sets themselves.
By definition, two sets will
\emph{equal}\label{equal}%
\index{equal}
if they have the same elements.
There will be an \emph{empty set,}\label{empty} denoted by
\begin{equation*}
\emptyset;
\end{equation*}
this will have no elements.
If $a$ is a set, then there will be a set denoted by
\begin{equation*}
\{a\},
\end{equation*}
with the unique element $a$. If $b$ is also a set, then there will be a set denoted by
\begin{equation*}
a\cup b,
\end{equation*}
whose members are precisely the members of $a$ and the members of $b$.
Thus there will be sets $a\cup\{b\}$ and $\{a\}\cup\{b\}$;
the latter is usually written as
\begin{equation*}
\{a,b\}.
\end{equation*}
If $c$ is another set, we can form the set $\{a,b\}\cup\{c\}$,
which we write as
\begin{equation*}
\{a,b,c\},
\end{equation*}
and so forth.
This will allow us to build up the following infinite sequence:
\begin{align*}
&\emptyset,&
&\{\emptyset\},&
&\bigl\{\emptyset,\{\emptyset\}\bigr\},&
&\Bigl\{\emptyset,\{\emptyset\},\bigl\{\emptyset,\{\emptyset\}\bigr\}\Bigr\},&
&\dots
\end{align*}
By definition,\label{nat} these sets will be the natural numbers $0$, $1$, $2$, $3$, \dots
To be more precise, they are the \textbf{von Neumann natural numbers} \cite{von-Neumann}.
\section{Set theory}
\subsection{Notation}
Our formal axioms for set theory will be written in a certain logic,
whose symbols are:
\begin{compactenum}[1)]
\item
variables, as $x$, $y$, and $z$;
\item
the symbol $\in$ denoting the membership relation;
\item
the Boolean connectives of propositional logic:
\begin{compactenum}
\item
the singulary connective $\lnot$ (``not''), and
\item
the binary connectives $\lor$ (``or''), $\land$ (``and''), $\lto$ (``implies''),
and $\liff$ (``if and only if'');
\end{compactenum}
\item
parentheses;
\item
quantification symbols $\exists$ (``there exists'') and $\forall$ (``for all'').
\end{compactenum}
We may also introduce constants, as $a$, $b$, and $c$, or $A$, $B$, and $C$, to stand for particular sets. A variable or a constant is called a \emph{term.} If $t$ and $u$ are terms, then the expression
\begin{equation*}
t\in u
\end{equation*}
is called an \textbf{atomic formula.}
It means $t$ is a member of $u$.
From atomic formulas, other formulas are built up \emph{recursively}
by use of the symbols above, according to certain rules, namely,
\begin{compactenum}[1)]
\item
if $\phi$ is a formula, then so is $\lnot\phi$;
\item
if $\phi$ and $\psi$ are formulas, then so is $(\phi*\psi)$,
where $*$ is one of the binary Boolean connectives;
\item
if $\phi$ is a formula and $x$ is variable,
then $\Exists x\phi$ and $\Forall x\phi$ are formulas.
\end{compactenum}
The formula $\lnot\;t\in u$ says $t$ is \emph{not} a member of $u$.
We usually abbreviate the formula by
\begin{equation*}
t\notin u.
\end{equation*}
The expression $\Forall z(z\in x\lto z\in y)$ is the formula saying
that every element of $x$ is an element of $y$. Another way to say
this is that $x$ is a
\textbf{subset}%
\index{subset}
of $y$, or that $y$
\textbf{includes}%
\index{include}
$x$. We abbreviate this formula by%%%%%
\footnote{The relation $\included$ of being included
is completely different from the relation $\in$ of being contained.
However, many mathematicians confuse these relations in words,
using the word \emph{contained} to describe both.}
\begin{equation*}
x\included y.
\end{equation*}
The expression $x\included y\land y\included x$ is the formula
saying that $x$ and $y$ have the same members,
so that they are \textbf{equal} by the definition foretold above
(page~\pageref{empty}); in this case we use the abbreviation
\begin{equation*}
x=y.
\end{equation*}
All occurrences of $x$
in the formulas $\Exists x\phi$ and $\Forall x\phi$ are \textbf{bound,}%%%%%
\footnote{The word \emph{bound} here
is the past participle of the verb \emph{to bind.}
There is another verb, \emph{to bound,}
which is also used in mathematics,
but its past participle is \emph{bounded.}
The two verbs \emph{to bind} and \emph{to bound} are apparently unrelated.
The verb \emph{to bind} has been part of English
since the beginning of that language in the tenth century.
The verb \emph{to bound} is based on the noun \emph{bound,}
which entered Middle English in the 12th century from the Old French noun
that became the modern \emph{borne.}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
and they remain bound when other formulas are built up from these formulas.
Occurrences of a variable that are not bound are \textbf{free.}
\subsection{Classes and equality}
A \textbf{singulary}%%%%%
\footnote{The word
\textbf{unary}\index{unary} is
more common, but less etymologically correct.}
%%%%%
formula is a formula in which only one variable occurs freely.
If $\phi$ is a singulary formula with free variable $x$, we may write $\phi$ as
\begin{equation*}
\phi(x).
\end{equation*}
If $a$ is a set, then by replacing every free occurrence of $x$ in $\phi$ with $a$,
we obtain the formula
\begin{equation*}
\phi(a),
\end{equation*}
which is called a \textbf{sentence} because it has no free variables.
This sentence is true or false, depending on which set $a$ is.
If the sentence is true, then $a$ can be said to \textbf{satisfy} the formula $\phi$.
There is a collection of all sets that satisfy $\phi$:
we denote this collection by
\begin{equation*}
\{x\colon\phi(x)\}.
\end{equation*}
Such a collection is called a
\textbf{class.}%
\index{class}\label{class}
In particular, it is the class \textbf{defined} by the formula $\phi$.
If we give this class the name $\bm C$, then the expression
\begin{equation*}
x\in\bm C
\end{equation*}
means just $\phi(x)$.
A formula in which only two variables occur freely is \textbf{binary.}
If $\psi$ is such a formula, with free variables $x$ and $y$,
then we may write $\psi$ as
\begin{equation*}
\psi(x,y).
\end{equation*}
We shall want this notation for proving Theorem~\ref{thm:=} below.
If needed, we can talk about ternary formulas $\chi(x,y,z)$, and so on.
The definition of equality of sets can be expressed by the sentences
\begin{gather}\label{eqn:=}
\Forall x\Forall y\bigl(x=y\lto(a\in x\liff a\in y)\bigr),\\\label{eqn:>=}
\Forall x\Forall y\Exists z\bigl(\lnot(z\in x\liff z\in y)\lor x=y\bigr),
\end{gather}
where $a$ is an arbitrary set.
The \textbf{Equality Axiom} is that equal sets belong to the same sets:
\begin{equation}\label{eqn:=2}
\Forall x\Forall y\bigl(x=y\lto(x\in a\liff y\in a)\bigr).
\end{equation}
The meaning of the sentences \eqref{eqn:=} and \eqref{eqn:=2}
is that equal sets satisfy the same atomic formulas $a\in x$ and $x\in a$.
\begin{theorem}\label{thm:=}
Equal sets satisfy the same formulas:
\begin{equation}\label{eqn:=thm}
\Forall x\Forall y\Bigl(x=y\lto\bigl(\phi(x)\liff\phi(y)\bigr)\Bigr).
\end{equation}
\end{theorem}
\begin{proof}
Suppose $a$ and $b$ are equal sets.
By symmetry, it is enough to show
\begin{equation}\label{eqn:ab}
\phi(a)\lto\phi(b)
\end{equation}
for all singulary formulas $\phi(x)$.
As noted, we have \eqref{eqn:ab}
whenever $\phi(x)$ is an atomic formula $x\in c$ or $c\in x$.
We also have it when $\phi$ is $x\in x$.
If we have \eqref{eqn:ab} when $\phi$ is $\psi$,
then we have it when $\phi$ is $\lnot\psi$.
If we have \eqref{eqn:ab} when $\phi$ is $\psi$ or $\chi$,
then we have it when $\phi$ is $(\psi*\chi)$,
where $*$ is one of the binary connectives.
If, for some binary formula $\psi$,
we have \eqref{eqn:ab} whenever $\phi(x)$ is of the form $\psi(x,c)$,
then we have it when $\phi(x)$ is $\Forall y\psi(x,y)$ or $\Exists y\psi(x,y)$.
Therefore we do have \eqref{eqn:ab} in all cases.
\end{proof}
The foregoing is a proof by \textbf{induction.}
Such a proof is possible because formulas are defined recursively.
See \S\ref{sect:N} below (page~\pageref{sect:N}).
Actually we have glossed over some details.
This may cause confusion;
but then the details themselves could cause confusion.
What we are really proving is all of the sentences of one of the infinitely many forms
\begin{equation}\label{eqn:''}
\left.
\begin{gathered}
\Forall x\Forall y\Bigl(x=y\lto\bigl(\phi(x)\liff\phi(y)\bigr)\Bigr),\\
\Forall x\Forall y\Forall z\Bigl(x=y\lto\bigl(\phi(x,z)\liff\phi(y,z)\bigr)\Bigr),\\
\Forall x\Forall y\Forall z\Forall{z'}
\Bigl(x=y\lto\bigl(\phi(x,z,z')\liff\phi(y,z,z')\bigr)\Bigr),\\
\makebox[8.8cm]{\dotfill,}
%\Forall x\Forall y\Forall z\Forall{z'}\Forall{z''}\Bigl(x=y\lto\bigl(\phi(x,z,z',z'')\liff\phi(y,z,z',z'')\bigr)\Bigr),
\end{gathered}
\right\}
\end{equation}
%and so on,
where no constant occurs in any of the formulas $\phi$.
Assuming $a=b$, it is enough to prove every sentence of one of the forms
\begin{gather*}
\phi(a)=\phi(b),\\
\phi(a,c)=\phi(b,c),\\
\phi(a,c,c')=\phi(b,c,c'),\\
\makebox[4cm]{\dotfill}
%\phi(a,c,c',c'')=\phi(b,c,c',c''),
\end{gather*}
%and so on.
We have tried to avoid writing all of this out,
by allowing constants to occur implicitly in formulas,
and by understanding $\Forall x\phi(x)$ to mean $\phi(a)$ for arbitrary $a$,
as suggested above (page~\pageref{a}).
We could abbreviate the sentences in \eqref{eqn:''} as
\begin{multline}\label{eqn:...}
\Forall x\Forall y\Forall{z_1} \dots\Forall{z_n}
\Bigl(x=y\lto\\
\bigl(\phi(x,z_1,\dots,z_n)\liff\phi(y,z_1,\dots,z_n)\bigr)\Bigr).
\end{multline}
However, we would have to explain what $n$ was and what the dots of ellipsis meant.
The expression in \eqref{eqn:...}
means one of the formulas in the infinite list suggested in \eqref{eqn:''},
and there does not seem to be a better way to say it than that.
The sentence \eqref{eqn:=thm} is usually taken as a logical axiom,
like one of Euclid's common notions.
Then \eqref{eqn:=} and \eqref{eqn:=2} are special cases of this axiom,
but \eqref{eqn:>=} is no longer true, either by definition or by proof.
So this too must be taken as an axiom, which is called the \textbf{Extension Axiom.}
%The idea behind the name is that having the same members
%means having the same \emph{extension.}
In any case, all of the sentences \eqref{eqn:=}, \eqref{eqn:>=}, \eqref{eqn:=2}, and \eqref{eqn:=thm} end up being true. They tell us that equal sets are precisely those sets that are logically indistinguishable.
We customarily treat equality as \emph{identity.} We consider equal sets to be the \emph{same} set. If $a=b$, we may say simply that $a$ is $b$.
Similarly, in ordinary mathematics,
since $1/2=2/4$,\label{1/2} we consider $1/2$ and $2/4$ to be the same.
In ordinary \emph{life} they are distinct:
$1/2$ is one thing, namely one half,
while $2/4$ is two things, namely two quarters.
In mathematics, we ignore this distinction.
As with sets, so with classes,
one \textbf{includes} another
if every element of the latter belongs to the former.
Hence if formulas $\phi(x)$ and $\psi(y)$
define classes $\class C$ and $\class D$ respectively,
and if
\begin{equation*}
\Forall x\bigl(\phi(x)\lto\psi(x)\bigr),
\end{equation*}
this means $\class D$ includes $\class C$, and we write
\begin{equation*}
\class C\included\class D.
\end{equation*}
If also $\class C$ includes $\class D$,
then the two classes are \textbf{equal,}
and we write
\begin{equation*}
\class C=\class D;
\end{equation*}
this means $\Forall x\bigl(\phi(x)\liff\psi(x)\bigr)$.
Likewise set and a class can be considered as \textbf{equal}
if they have the same members.
Thus if again $\bm C$ is defined by $\phi(x)$,
then the expression
\begin{equation*}
a=\bm C
\end{equation*}
means $\Forall x\bigl(x\in a\liff\phi(x)\bigr)$.
\begin{theorem}
Every set is a class.
\end{theorem}
\begin{proof}
The set $a$ is the class $\{x\colon x\in a\}$.
\end{proof}
However, there is no reason to expect the converse to be true.
\begin{theorem}\label{thm:RP}
Not every class is a set.
\end{theorem}
\begin{proof}
There are formulas $\phi(x)$ such that
\begin{equation*}
\Forall y\lnot\Forall x\bigl(x\in y\liff\phi(x)\bigr).
\end{equation*}
Indeed, let $\phi(x)$ be the formula $x\notin x$.
Then
\begin{equation*}
\Forall y\lnot\bigl(y\in y\liff\phi(y)\bigr).\qedhere
\end{equation*}
\end{proof}
More informally, the argument is that the class $\{x\colon x\notin x\}$ is not a set,
because if it were a set $a$, then $a\in a\liff a\notin a$,
which is a contradiction.
This is what was given above as the Russell Paradox (page~\pageref{Russell}).
Another example of a class that is not a set
is given by the \emph{Burali-Forti Paradox} on page~\pageref{BF} below.
\subsection{Construction of sets}
We have established what it means for sets to be equal.
We have established that sets are examples,
but not the only examples,
of the collections called classes.
However, we have not officially exhibited any sets.
We do this now.
The \textbf{Empty Set Axiom} is
\begin{equation*}
\Exists x\Forall yy\notin x.
\end{equation*}
As noted above (page~\pageref{empty}),
the set whose existence is asserted by this axiom is denoted by $\emptyset$.
This set is the class $\{x\colon x\neq x\}$.
We now obtain the sequence $0$, $1$, $2$, \dots, described above
(p.\ \pageref{nat}).
We use the Empty Set Axiom to start the sequence.
We continue by means of the \textbf{Adjunction Axiom:}
if $a$ and $b$ are sets, then the set denoted by $a\cup\{b\}$ exists.
Formally, the axiom is
\begin{equation*}
\Forall x\Forall y\Exists z\Forall w(w\in z\liff w\in x\lor w=y).
\end{equation*}
In writing this sentence,
we follow the convention whereby
the connectives $\lor$ and $\land$ are more binding than $\lto$ and $\liff$,
so that, for example, the expression
\begin{equation*}
(w\in z\liff w\in x\lor w=y)
\end{equation*}
means the formula $\bigl(w\in z\liff (w\in x\lor w=y)\bigr)$.
We can understand the Adjunction Axiom as saying that, for all sets $a$ and $b$,
the class $\{x\colon x\in a\lor x=b\}$ is actually a set.
Adjunction is not one of Zermelo's original axioms of 1908;
but the following is Zermelo's \textbf{Pairing Axiom:}
\begin{theorem}
For any two sets $a$ and $b$, the set $\{a,b\}$ exists:
\begin{equation*}
\Forall x\Forall y\Exists z\Forall w(w\in z\liff w=x\lor w=y).
\end{equation*}
\end{theorem}
\begin{proof}
By Empty Set and Adjunction, $\emptyset\cup\{a\}$ exists, but this is just $\{a\}$.
Then $\{a\}\cup\{b\}$ exists by Adjunction again.
\end{proof}
The theorem is that the class $\{x\colon x=a\lor x=b\}$ is always a set.
Actually Zermelo does not have a Pairing Axiom as such,
but he has an \textbf{Elementary Sets Axiom,}
which consists of what we have called the Empty Set Axiom and the Pairing Axiom.%%%%%
\footnote{Zermelo also requires that for every set $a$ there be a set $\{a\}$;
but this can be understood as a special case of pairing.}
Every class $\bm C$ has a \textbf{union,}
which is the class
\begin{equation*}
\{x\colon\Exists y(x\in y\land y\in\bm C)\}.
\end{equation*}
This class is denoted by
\begin{equation*}
\bigcup\bm C.
\end{equation*}
This notation is related as follows
with the notation for the classes involved in the Adjunction Axiom:
\begin{theorem}
For all sets $a$ and $b$, $a\cup\{b\}=\bigcup\bigl\{a,\{b\}\bigr\}$.
\end{theorem}
We can now use the more general notation
\begin{equation*}
a\cup b=\bigcup\{a,b\}.
\end{equation*}
The \textbf{Union Axiom} is that the union of a \emph{set} is always a set:
\begin{equation*}
\Forall x\Exists yy=\bigcup x.
\end{equation*}
The Adjunction Axiom is
a consequence of the Empty-Set, Pairing, and Union Axioms.
This why Zermelo did not need Adjunction as an axiom.
We state it as an axiom,
because we can do a lot of mathematics with it
that does not require the full force of the Union Axiom.
We shall however use the Union Axiom
when considering unions of chains of structures
(as on p.\ \pageref{chains} below).
Suppose $A$ is a set and $\bm C$ is the class $\{x\colon\phi(x)\}$.
Then we can form the class
\begin{equation*}
A\cap\bm C,
\end{equation*}
which is defined by the formula $x\in A\land\phi(x)$.
The \textbf{Separation Axiom} is that this class is a set.
Standard notation for this set is
\begin{equation}\label{eqn:xinA}
\{x\in A\colon\phi(x)\}.
\end{equation}
However, this notation is unfortunate.
Normally the formula $x\in A$ is read as a sentence of ordinary language,
namely ``$x$ belongs to $A$'' or ``$x$ is in $A$.''
However, the expression in \eqref{eqn:xinA} is read
as ``the set of $x$ in $A$ such that $\phi$ holds of $x$'';
in particular, $x\in A$ here is read as the noun phrase ``$x$ in $A$''
(or ``$x$ belonging to $A$,'' or ``$x$ that are in $A$,''
or something like that).%%%%%
\footnote{Ambiguity of expressions like $x\in A$ (is it a noun or a sentence?)
is common in mathematical writing, as for example in the abbreviation of $\Forall{\varepsilon}(\epsilon>0\lto\phi)$ as $(\forall{\varepsilon>0})\;\phi$.
Such ambiguity is avoided in these notes.
However, certain ambiguities are tolerated:
letters like $a$ and $A$ stand sometimes for sets,
sometimes for \emph{names} for sets.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Actually Separation is a \emph{scheme} of axioms, one for each singulary formula $\phi$:
\begin{equation*}
\Forall x\Exists y\Forall z\bigl(z\in y\liff z\in x\land\phi(z)\bigr).
\end{equation*}
In most of mathematics, and in particular in the other sections of these notes,
one need not worry too much about the distinction between sets and classes.
But it is logically important.
It turns out that the objects of interest in mathematics can be understood as sets.
Indeed, we have already defined natural numbers as sets.
We can talk about sets by means of formulas.
Formulas define classes of sets, as we have said.
Some of these classes turn out to be sets themselves;
but again, there is no reason to expect all of them to be sets,
and indeed by Theorem~\ref{thm:RP} (p.\ \pageref{thm:RP})
some of them are not sets.
\emph{Sub-classes} of sets are sets, by the Separation Axiom;
but some classes are too big to be sets.
The class $\{x\colon x=x\}$ of all sets is not a set,
since if it were, then the sub-class $\{x\colon x\notin x\}$ would be a set, and it is not.
Every set $a$ has a \emph{power class,}
namely the class $\{x\colon x\included a\}$ of all subsets of $a$.
This class is denoted by
\begin{equation*}
\pow a.
\end{equation*}
The \textbf{Power Set Axiom} is that this class is a set:
\begin{equation*}
\Forall x\Exists yy=\pow x.
\end{equation*}
Then $\pow a$ can be called the \textbf{power set} of $a$.
In the main text, after this chapter,
we shall not explicitly mention power sets until p.\ \pageref{pow}.
However, the Power Set Axiom is of fundamental importance
for allowing us to prove Theorem~\ref{thm:prod-set} on p.\ \pageref{thm:prod-set} below.
We want the \textbf{Axiom of Infinity} to be simply that
the collection $\{0,1,2,\dots\}$ of natural numbers
as defined on p.\ \pageref{nat} is a set.
It is not obvious how to formulate this as a sentence of our logic.
However, the indicated collection contains $0$, which by definition is the empty set;
also, for each of its elements $n$,
the collection contains also $n\cup\{n\}$.
Let $\It$ be the class of all \emph{sets} with these properties: that is,
\begin{equation*}
\It=\bigl\{x\colon0\in x\land\Forall y(y\in x\lto y\cup\{y\}\in x)\bigr\}.
\end{equation*}
Thus, if it exists, the set of natural numbers will belong to $\It$.
Furthermore, the set of natural numbers
will be the \emph{smallest} element of $\It$.
But we still must make this precise.
For an arbitrary class $\bm C$, we define
\begin{equation*}
\bigcap\bm C=\{x\colon\Forall y(y\in\bm C\lto x\in y)\}.
\end{equation*}
This class is the \textbf{intersection} of $\bm C$.
\begin{theorem}
If $a$ and $b$ are two sets, then
\begin{equation*}
a\cap b=\bigcap\{a,b\}.
\end{equation*}
If $a\in\bm C$, then
\begin{equation*}
\bigcap\bm C\included a,
\end{equation*}
so in particular $\bigcap\bm C$ is a set.
However, $\bigcap\emptyset$ is the class of all sets, which is not a set.
\end{theorem}
We can now define%%%%%
\footnote{Some writers define $\bigcap\bm C$ only when $\bm C$ is a nonempty set.
This would make our definition of $\upomega$ invalid without the Axiom of Infinity.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{equation}\label{eqn:upomega-defn}
\upomega=\bigcap\It.
\end{equation}
\begin{theorem}\label{thm:AI}
The following conditions are equivalent.
\begin{compactenum}
\item
$\It\neq\emptyset$.
\item
$\upomega$ is a set.
\item
$\upomega\in\It$.
\end{compactenum}
\end{theorem}
Any of the equivalent conditions in the theorem can be taken as the Axiom of Infinity.
This does not by itself establish that
$\upomega$ has the properties we expect of the natural numbers;
we still have to do some work.
We shall do this in \S\ref{sect:omega} (p.~\pageref{sect:omega}).
The \textbf{Axiom of Choice} can be stated
in any of several equivalent versions.
One of these versions is that every set can be \textbf{well-ordered:}
that is, the set can be given a linear ordering
(as defined on p.\ \pageref{lo} below)
so that every nonempty subset has a least element
(as in Theorem~\ref{thm:wo} on p.\ \pageref{thm:wo}).
However, we have not yet got a way to understand an ordering as a set.
An ordering is a kind of binary relation,
and a binary formula can be understood to define a binary relation.
But we cannot yet use our logical symbolism
to say that such a relation \emph{exists.}
We shall be able to do so in the next section.
We shall use the Axiom of Choice:
\begin{compactitem}
\item
to establish
that every set has a \emph{cardinality} (p.\ \pageref{cardinality});
\item
to prove Theorem~\ref{thm:PID->UFD},
that every \pid\ is a \ufd\ (p.\ \pageref{thm:PID->UFD});
\item
to prove Zorn's Lemma (p.\ \pageref{thm:ZL};
\item
hence to prove Stone's theorem on representations of Boolean rings
(p.\ \pageref{thm:Stone}).
\end{compactitem}
The Axiom can also used to show:
\begin{compactitem}
\item
that direct sums are not always the same as direct products (p.\ \pageref{ac});
\item
that nonprincipal ultraproducts of fields exist (p.\ \pageref{ac-up}).
\end{compactitem}
For the record, we have now named all of the axioms given by Zermelo in 1908:
\begin{inparaenum}[(I)]
\item
Extension,
\item
Elementary Sets,
\item
Separation,
\item
Power Set,
\item
Union,
\item
Choice, and
\item
Infinity.
\end{inparaenum}
Zermelo assumes that equality is identity:
but his assumption is our Theorem~\ref{thm:=}.
In fact Zermelo does not use logical formalism as we have.
We prefer to define equality with \eqref{eqn:=} and \eqref{eqn:>=}
and then use the Axioms of
\begin{inparaenum}[(i)]
\item
the Empty Set,
\item
Equality,
\item
Adjunction,
\item
Separation,
\item
Union,
\item
Power Set,
\item
Infinity, and
\item
Choice.
\end{inparaenum}
But these two collections of definitions and axioms are logically equivalent.
Apparently Zermelo overlooked one axiom, the \emph{Replacement Axiom,}
which was supplied in 1922 by Skolem \cite{Skolem-some-remarks} and by Fraenkel.%%%%%
\footnote{I have not been able to consult Fraenkel's original papers.
However, according to van Heijenoort \cite[p.~291]{MR1890980},
Lennes also suggested something like the Replacement Axiom at around the same time (1922) as Skolem and Fraenkel; but Cantor had suggested such an axiom in 1899.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
We shall give this axiom in the next section.
An axiom never needed in ordinary mathematics is the \emph{Foundation Axiom.} Stated originally by von Neumann \cite{von-Neumann-ax}, it ensures that certain pathological situations, like a set containing itself, are impossible. It does this by declaring that every nonempty set has an element that is disjoint from it: $\Forall x\Exists y(x\neq\emptyset\lto y\in x\land x\cap y=\emptyset)$. We shall never use this.
The collection called $\zfc$ is Zermelo's axioms, along with Replacement and Foundation. If we leave out Choice, we have what is called $\zf$.
\section{Functions and relations}\label{sect:f}
Given two sets $a$ and $b$, we define
\begin{equation*}
(a,b)=\bigl\{\{a\},\{a,b\}\bigr\}.
\end{equation*}
This set is the \textbf{ordered pair}
whose first entry is $a$ and whose second entry is $b$.
The purpose of the definition is to make the following theorem true.
\begin{theorem}
Two ordered pairs are equal if and only if
their first entries are equal and their second entries are equal:
\begin{equation*}\label{eqn:op}
(a,b)=(x,y)\liff a=x\land b=y.
\end{equation*}
\end{theorem}
If $A$ and $B$ are sets, then we define
\begin{equation*}
A\times B=\{z\colon\Exists x\Exists y(z=(x,y)\land x\in A\land y\in B)\}.
\end{equation*}
This is the \textbf{cartesian
product}\index{cartesian product}
of $A$ and $B$.
\begin{theorem}\label{thm:prod-set}
The cartesian product of two sets is a set.
\end{theorem}
\begin{proof}
If $a\in A$ and $b\in B$,
then $\{a\}$ and $\{a,b\}$ are elements of $\pow{A\cup B}$,
so $(a,b)\in\pow{\pow{A\cup B}}$, and therefore
\begin{equation*}
A\times B\included\pow{\pow{A\cup B}}.\qedhere
\end{equation*}
\end{proof}
An \textbf{ordered triple}\index{ordered triple} $(x,y,z)$
can be defined as $\bigl((x,y),z\bigr)$, and so forth.
A \textbf{function}\index{function} or \textbf{map}\index{map}
from $A$ to $B$
is a subset $f$ of $A\times B$ such that,
for each $a$ in $A$,
there is exactly one $b$ in $B$ such that $(a,b)\in f$.
Then instead of $(a,b)\in f$, we write
\begin{equation}\label{eqn:f}
f(a)=b.
\end{equation}
We have then
\begin{equation*}
A=\{x\colon\Exists yf(x)=y\},
\end{equation*}
that is, $A=\{x\colon\Exists y(x,y)\in f\}$.
The set $A$ is called the \textbf{domain} of $f$.
A function is sometimes said to be a function \textbf{on} its domain.
For example, the function $f$ here is a function on $A$.
The \textbf{range} of $f$ is the subset
\begin{equation*}
\{y\colon\Exists xf(x)=y\}
\end{equation*}
of $B$.
If this range is actually equal to $B$,
then we say that $f$ is \textbf{surjective onto} $B$,
or simply that $f$ is \textbf{onto} $B$.
Strictly speaking, it would not make sense to say $f$ was surjective or onto, simply.
A function $f$ is
\textbf{injective} or \textbf{one-to-one,} if
\begin{equation*}
\Forall x\Forall z(f(x)=f(z)\lto x=z).
\end{equation*}
The expression $f(x)=f(z)$ is an abbreviation of $\Exists y(f(x)=y\land f(z)=y)$,
which is another way of writing $\Exists y\bigl((x,y)\in f\land(z,y)\in f\bigr)$.
An injective function from $A$ \emph{onto} $B$ is a \textbf{bijection} from $A$ to $B$.
If it is not convenient to name a function with a single letter like $f$,
we may write the function as
\begin{equation*}
x\mapsto f(x),
\end{equation*}
where the expression $f(x)$ would be replaced by some particular expression involving $x$.
As an abbreviation of the statement that $f$ is a function from $A$ to $B$,
we may write
\begin{equation}\label{eqn:f:B->A}
f\colon A\to B.
\end{equation}
Thus, while the symbol $f$ can be understood as a \emph{noun,}
the expression $f\colon A\to B$ is a complete \emph{sentence.}
If we say, ``Let $f\colon A\to B$,'' we mean
let $f$ be a function from $A$ to $B$.
If $f\colon A\to B$ and $D\included A$, then the subset $\{y\colon\Exists x(x\in D\land y=f(x)\}$ of $B$
can be written as one of%%%%%
\footnote{The notation $f(D)$ is also used, but the ambiguity is dangerous,
at least in set theory as such.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{align*}
&\{f(x)\colon x\in D\},&
&f[D].
\end{align*}
This set is the \textbf{image} of $D$ under $f$.
Similarly, we can write
\begin{equation*}
A\times B=\{(x,y)\colon x\in A\land y\in B\}.
\end{equation*}
Then variations on this notation are possible.
For example, if $f\colon A\to B$ and $D\included A$, we can define
\begin{equation*}
f\restriction D=\{(x,y)\in f\colon x\in D\}.
\end{equation*}
\begin{theorem}
If $f\colon A\to B$ and $D\included A$, then
\begin{equation*}
f\restriction D\colon D\to B
\end{equation*}
and, for all $x$ in $D$, $(f\restriction D)(x)=f(x)$.
\end{theorem}
If $f\colon A\to B$ and $g\colon B\to C$,
then we can define
\begin{equation*}
g\circ f=\{(x,z)\colon\Exists y(f(x)=y\land g(y)=z)\};
\end{equation*}
this is called the \textbf{composite} of $(g,f)$.
\begin{theorem}\label{thm:composite}
If $f\colon A\to B$ and $g\colon B\to C$, then
\begin{equation*}
g\circ f\colon A\to C.
\end{equation*}
If also $h\colon C\to D$, then
\begin{equation*}
h\circ(g\circ f)=(h\circ g)\circ f.
\end{equation*}
\end{theorem}
We define
\begin{equation*}
\id_A=\{(x,x)\colon x\in A\};
\end{equation*}
this is the \textbf{identity} on $A$.
\begin{theorem}\label{thm:id}
$\id_A$ is a bijection from $A$ to itself.
If $f\colon A\to B$, then
\begin{align*}
f\circ\id_A&=f,&
\id_B\circ f&=f.
\end{align*}
\end{theorem}
If $f$ is a bijection from $A$ to $B$, we define
\begin{equation*}
f\inv=\{(y,x)\colon f(x)=y\};
\end{equation*}
this is the \textbf{inverse} of $f$.
\begin{theorem}\label{thm:inverses}
\mbox{}
\begin{compactenum}
\item
The inverse of a bijection from $A$ to $B$ is a bijection from $B$ to $A$.
\item
Suppose $f:A\to B$ and $g:B\to A$. Then $f$ is a bijection from $A$ to $B$ whose inverse is $g$ if and only if
\begin{align*}
g\circ f&=\id_A,&f\circ g=\id_B.
\end{align*}
\end{compactenum}
\end{theorem}
In the definition of the cartesian product $A\times B$
and of a functions from $A$ to $B$,
we may replace the sets $A$ and $B$ with classes.
For example, we may speak of the function $x\mapsto\{x\}$ on the class of all sets.
If $\bm F$ is a function on some class $\bm C$,
and $A$ is a \emph{subset} of $\bm C$,
then by the \textbf{Replacement Axiom,}
the image $\bm F[A]$ is also a set.
For example, if we are given a function $n\mapsto G_n$ on $\upomega$,
then by Replacement the class $\{G_n\colon n\in\upomega\}$ is a set.
Then the union of this class is a set, which we denote by
\begin{equation*}
\bigcup_{n\in\upomega}G_n.
\end{equation*}
A \textbf{singulary operation}\index{singulary} on $A$ is a function
from $A$ to itself; a \textbf{binary}\index{binary operation} on $A$
is a function
from $A\times A$ to $A$. A \textbf{binary relation} on $A$ is a
subset of $A\times A$; if $R$ is such, and $(a,b)\in R$, we often
write
\begin{equation*}\label{mathrel}
a\mathrel Rb.
\end{equation*}
A singulary operation on $A$ is a particular kind of binary
relation on $A$; for such a relation, we already have the
special notation in~\eqref{eqn:f}.
The reader will be familiar
with other kinds of binary relations, such as \emph{orderings.}
We are going to define a particular binary relation on p.\ \pageref{<} below
and prove that it is an ordering.
\section{An axiomatic development of the natural numbers}\label{sect:N}
In the preceding sections, we sketched an axiomatic approach to set theory.
Now we start over with an axiomatic approach to the natural numbers alone.
In the section after this,
we shall show that the set $\upomega$
does actually provide
a \emph{model} of the axioms for natural numbers
developed in the present section.
For the moment though, we forget the definition of $\upomega$.
We forget about starting the natural numbers with $0$.
Children learn to count starting with $1$, not $0$.
Let us understand the natural numbers to compose \emph{some} set called $\N$.
This set has
a distinguished \textbf{initial element,}\index{initial element}
which we call \textbf{one}\index{zero} and denote by
\begin{equation*}
1.
\end{equation*}
On the set $\N$ there is also
a distinguished singulary operation of
\textbf{succession,}\index{succession, successor}
namely the operation
\begin{equation*}
n\mapsto n+1,
\end{equation*}
where $n+1$ is called the \textbf{successor} of $n$.
Note that some other expression like $S(n)$ might be used for this successor.
For the moment, we have no binary operation called $+$ on $\N$.
I propose to refer to the ordered triple $(\N,1,n\mapsto n+1)$ as an
\emph{iterative structure.}
In general, by an \textbf{iterative structure,}\index{iterative}
I mean any set that has a distinuished element and a distinguished singulary operation.
Here the underlying set can be called
the \textbf{universe}\index{universe} of the structure.
For a simple notational distinction between a structure and its universe,
if the universe is $A$,
the structure itself might be denoted by a fancier version of this letter,
such as the Fraktur version $\str A$.
See Appendix~\ref{app:German} (p.~\pageref{app:German}) for Fraktur versions,
and their handwritten forms, for all of the Latin letters.
The
iterative structure $(\N,1,n\mapsto n+1)$ is
distinguished among iterative structures by satisfying the
following axioms.
\begin{compactenum}
\item\label{ax:0}
$1$ is not a successor: $1\neq n+1$.
\item\label{ax:inj}
Succession is injective: if $m+1=n+1$, then $m=n$.
\item\label{ax:ind}
The structure admits \textbf{proof by induction,}\index{induction} in
the following sense.
Every subset $A$ of the universe must be the whole universe,
provided $A$ has the following two closure properties:
\begin{compactenum}
\item
$1\in A$, and
\item
for all $n$, if $n\in A$, then $n+1\in A$.
\end{compactenum}
\end{compactenum}
These axioms were discovered originally by
Dedekind~\cite[II, VI (71), p.~67]{MR0159773};
but they were written down also by Peano~\cite{Peano},
and they are often known as the \textbf{Peano axioms.}\index{Peano}
Suppose $(A,b,f)$ is an iterative structure.
If we successively compute $b$, $f(b)$, $f(f(b))$, $f(f(f(b)))$, and so on,
either we always get a new element of $A$,
or we reach an element that we have already seen.
In the latter case,
if the first repeated element is $b$,
then the first Peano axiom fails.
If it is not $b$, then the second Peano axiom fails.
The last Peano axiom, the Induction Axiom,
would ensure that every element of $A$ was reached by our computations.
None of the three axioms implies the others,
although the Induction Axiom implies
that exactly one of the other two axioms holds \cite{MR0120156}.
The following theorem will allow us to define all of the usual operations on $\N$.
The theorem is difficult to prove.
Not the least difficulty is seeing that the theorem \emph{needs} to be proved.%%%%%
\footnote{Peano did not see this need, but Dedekind did.
Landau discusses the matter \cite[pp.~ix--x]{MR12:397m}.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\emph{Homomorphisms} will be defined generally on p.\ \pageref{hom},
but meanwhile we need a special case.
A \textbf{homomorphism} from $(\N,1,n\mapsto n+1)$ to an iterative structure $(A,b,f)$
is a function $h$ from $\N$ to $A$ such that
\begin{compactenum}[1)]
\item
$h(1)=b$, and
\item
$h(n+1)=f(h(n))$ for all $n$ in $\N$.
\end{compactenum}
\begin{theorem}[Recursion]\label{thm:rec}
For every iterative structure, there is exactly one
homomorphism from
$(\N,1,n\mapsto n+1)$ to this structure.
\end{theorem}
\begin{proof}
Given an iterative structure $(A,b,f)$,
we seek a homomorphism $h$ from $(\N,1,x\mapsto n+1)$ to $(A,b,f)$.
Then $h$ will be a particular subset of $\N\times A$.
Let $B$ be the set whose elements are the subsets $C$ of $\N\times
A$ such that, if $(n,y)\in C$, then either
\begin{compactenum}[1)]
\item
$(n,y)=(1,b)$ or else
\item $C$ has an element
$(m,x)$ such that $(n,y)=(m+1,f(x))$.
\end{compactenum}
In particular, $\{(1,b)\}\in B$.
Also, if $C\in B$ and $(m,x)\in C$, then
\begin{equation*}
C\cup\{(m+1,f(x))\}\in B.
\end{equation*}
Let $R=\bigcup B$; so $R$ is a subset of $\N\times A$.
We may say $R$ is a \emph{relation from $\N$ to $A$}.
If $(n,y)\in R$, then (as suggested on p.\ \pageref{mathrel} above)
we may write also
\begin{equation*}
n\mathrel Ry.
\end{equation*}
Since $\{(1,b)\}\in
B$, we have $1\mathrel Rb$.
Also, if $m\mathrel Rx$, then $(m,x)\in C$ for some $C$ in $B$,
so $C\cup\{(m+1,f(x))\}\in B$,
and therefore $(m+1)\mathrel R f(x)$.
Thus $R$ is the desired function $h$,
provided $R$ is actually a \emph{function} from $\N$ to $A$.
Proving that $R$ is a function from $\N$ to $R$ has two stages.
\begin{asparaenum}[1.]
\item
Let $D$ be the set of all $n$ in $\N$
for which there is $y$ in $A$ such that $n\mathrel Ry$.
Then we have just seen that $1\in D$, and if $n\in D$, then $n+1\in D$.
By induction, $D=\N$.
Thus if $R$ is a function, its domain is $\N$.
\item
Let $E$ be the set of all $n$ in $\N$ such that,
for all $y$ in $A$,
if $n\mathrel Ry$ and $n\mathrel Rz$, then $y=z$.
Suppose $1\mathrel R y$.
Then $(1,y)\in C$ for some $C$ in $B$.
Since $1$ is not a successor, we must have $y=b$, by definition of $B$.
Therefore $1\in E$.
Suppose $n\in E$, and $(n+1)\mathrel Ry$.
Then $(n+1,y)\in C$ for some $C$ in $B$.
Again since $1$ is not a successor,
we must have
\begin{equation*}
(n+1,y)=(m+1,f(x))
\end{equation*}
for some $(m,x)$ in $C$.
Since succession is injective, we must have $m=n$.
Thus, $y=f(x)$ for some $x$ in $A$ such that $n\mathrel Rx$.
Since $n\in E$, we know $x$ is \emph{unique} such that $n\mathrel Rx$.
Therefore $y$ is unique such that $(n+1)\mathrel Ry$.
Thus $n+1\in E$.
By induction, $E=\N$.
\end{asparaenum}
So $R$ is the desired function $h$.
Finally, $h$ is unique by induction.
\end{proof}
Note well that the proof uses all three of the Peano Axioms.
The Recursion Theorem is often used in the following form.
\begin{corollary}\label{cor:rec}
For every set $A$ with a distinguished element $b$, and for every function
$F$ from $\N\times B$ to $B$, there is a unique function $H$ from $\N$ to
$A$ such that
\begin{compactenum}[1)]
\item
$H(1)=b$, and
\item
$H(n+1)=F(n,H(n))$ for all $n$ in $\N$.
\end{compactenum}
\end{corollary}
\begin{proof}
Let $h$ be the unique homomorphism from $(\N,1,n\mapsto n+1)$ to
$(\N\times A,(1,b),f)$, where $f$ is the operation
$(n,x)\mapsto(n+1,F(n,x)))$. In particular, $h(n)$ is always an
ordered pair. By induction, the
first entry of $h(n)$ is always $n$; so there is a function $H$ from
$\N$ to $A$ such that $h(n)=(n,H(n))$. Then $H$ is as desired. By
induction, $H$ is unique.
\end{proof}
We can now use recursion to define, on $\N$,
%\begin{compactenum}[1)]
% \item
the binary operation
\begin{equation*}
(x,y)\mapsto x+y
\end{equation*}
of \textbf{addition,}\index{addition} and
%\item
the binary operation
\begin{equation*}
(x,y)\mapsto x\cdot y
\end{equation*}
of \textbf{multiplication.}\index{multiplication}
%\end{compactenum}
More precisely, for each $n$ in $\N$,
we recursively define the operations $x\mapsto n+x$ and $x\mapsto n\cdot x$.
The definitions are:
\begin{align}\label{eqn:+.}
& \begin{gathered}
n+1=n+1,\\
n\cdot1=n,
\end{gathered}&
& \begin{gathered}
n+(m+1)=(n+m)+1,\\
n\cdot(m+1)=n\cdot m+n.
\end{gathered}
\end{align}
The definition of addition might also be written as $n+1=S(n)$ and $n+S(m)=S(n+m)$.
In place of $x\cdot y$, we often write $xy$.
\begin{lemma}\label{lem:+}
For all $n$ and $m$ in $\N$,
\begin{align*}
1+n&=n+1,&(m+1)+n&=(m+n)+1.
\end{align*}
\end{lemma}
\begin{proof}
Induction.
\end{proof}
\begin{theorem}\label{thm:N-comm}
Addition on $\N$ is
\begin{compactenum}[1)]
\item
\textbf{commutative:}\index{commutative} $n+m=m+n$; and
\item
\textbf{associative:}\index{associative} $n+(m+k)=(n+m)+k$.
\end{compactenum}
\end{theorem}
\begin{proof}
Induction and the lemma.
\end{proof}
\begin{theorem}\label{thm:cancel}
Addition on $\N$ allows \textbf{cancellation:}\index{cancellation}
if $n+x=n+y$, then $x=y$.
\end{theorem}
\begin{proof}
Induction, and injectivity of succession.
\end{proof}
The analogous proposition for multiplication is Corollary~\ref{cor:mulcan} below.
\begin{lemma}\label{lem:.}
For all $n$ and $m$ in $\N$,
\begin{align*}
1\cdot n&=n,&(m+1)\cdot n&=m\cdot n+n.
\end{align*}
\end{lemma}
\begin{proof}
Induction.
\end{proof}
\begin{theorem}\label{thm:mult-comm}
Multiplication on $\N$ is
\begin{compactenum}[1)]
\item
commutative: $nm=mn$;
\item
\textbf{distributive}\index{distributive} over addition: $n(m+k)=nm+nk$; and
\item
associative: $n(mk)=(nm)k$.
\end{compactenum}
\end{theorem}
\begin{proof}
Induction and the lemma.
\end{proof}
Landau \cite{MR12:397m} proves \emph{using induction alone}
that $+$ and $\cdot$ exist
as given by the recursive definitions above.
However, Theorem~\ref{thm:cancel} needs more than induction.
So does the existence of the \textbf{factorial}\label{factorial} function
defined by
\begin{align*}
1!&=1,&(n+1)!&=n!\cdot(n+1).
\end{align*}
So does \textbf{exponentiation,}\index{exponentiation} defined by
\begin{align*}
n^1&=n,&n^{m+1}&=n^m\cdot n.
\end{align*}
The usual ordering $<$ of $\N$ is defined recursively as follows.
First note that $m\leq n$ means simply $mQ}
The structure $(\Z,0,-,+,1,\cdot,<)$ is a well-defined substructure of
$(\Q,0,-,+,1,\cdot,<)$.
The structure $(\Z,0,-,+,<)$ is an ordered group.
\end{theorem}
We can also think of $\Q$ as arising from $\Z$
by the same construction that gives us $\Qp$ from $\N$.
This gives us the following.
\begin{theorem}
There is a surjective function $(x,y)\mapsto x/y$
from the product $\Z\times(\Z\setminus\{0\})$ to $\Q$ such that
\begin{gather*}
\frac ab+\frac xy=\frac{ay+bx}{by},\\
1=\frac11,\\
\frac ab\cdot\frac xy=\frac{ax}{by}.
\end{gather*}
Then
\begin{equation*}
\frac ab<\frac xy\liff aymB\liff kC>mD.
\end{equation*}
In this case, the four lengths $A$, $B$, $C$, and $D$ are \emph{proportional,}
and we may write
\begin{equation*}
A:B:\;:C:D.
\end{equation*}
We can write the condition for this proportionality as
\begin{equation*}
\left\{\frac xy\in\Qp\colon xB1$,
there is an isomorphism taking $1$ to $b$.
This isomorphism is denoted by
\begin{equation*}
x\mapsto b^x,
\end{equation*}
and its inverse is
\begin{equation*}
x\mapsto\log_bx.
\end{equation*}
\begin{theorem}[Intermediate Value Theorem]
If $f$ is a continuous singulary operation on $\R$, and $f(a)\cdot f(b)<0$,
then $f$ has a zero between $a$ and $b$.
\end{theorem}
Hence for example the function $x\mapsto x^2-2$
must have a zero in $\R$ between $1$ and $2$.
More generally,
if $A\included\R$, then the set of \emph{polynomial functions over $A$}
is obtained from the set of constant functions taking values in $A$,
along with $-$, $+$, $\cdot$, and the projections $(x_0,\dots,x_{n-1})\mapsto x_i$,
by closing under taking composites.
Then all polynomial functions over $\R$ are continuous,
and so the Intermediate Value Theorem applies to the singulary polynomial functions.
Therefore the ordered field $\R$ is said to be \textbf{real-closed.}
However, there are smaller real-closed ordered fields:
we establish this in the next section.
\section{Countability}\label{sect:count}
A set is \textbf{countable} if it embeds in $\upomega$;
otherwise the set is \textbf{uncountable.}
\begin{theorem}
The sets $\N$, $\Z$, and $\Q$ are all countable.
\end{theorem}
\begin{theorem}\label{thm:pow-un}
$\pow{\upomega}$ is uncountable.
\end{theorem}
\begin{proof}
Suppose $f$ is an injection from $\upomega$ to $\pow{\upomega}$.
Then the subset $\{x\colon x\notin f(x)\}$ of $\upomega$ is not in the range of $f$,
by a variant of the Russell Paradox:
if $\{x\colon x\notin f(x)\}=f(a)$, then $a\in f(a)\liff a\notin f(a)$.
\end{proof}
\begin{theorem}
The set $\R$ is uncountable.
\end{theorem}
\begin{proof}
We shall use the notation whose properties
will be established in sub-\S\ref{subsect:PS} (p.~\pageref{subsect:PS}).
For every subset $A$ of $\upomega$,
let $g(A)$ be the set of rational numbers $x$ such that,
for some $n$ in $\upomega$,
\begin{equation*}
x<\sum_{k\in A\cap n}\frac2{3^k}.
\end{equation*}
Then $g(A)$ is a real number by the original definition.
The function $A\mapsto g(A)$ from $\pow{\upomega}$ to $\R$ is injective.
\end{proof}
However, suppose we let $\rc A$ be the smallest field
that contains all zeros from $\R$ of singulary polynomial functions over $A$.
If we define $A_0=\Q$ and $A_{n+1}=\rc{{A_n}}$,
then $\bigcup_{n\in\upomega}A_n$ will contain
all zeros from $\R$ of singulary polynomial functions over itself.
In fact $\bigcup_{n\in\upomega}A_n$ will be $\rc{\Q}$.
But this field is countable.
We can say more about a set than whether it is countable or uncountable.
The main reason for doing this here is that it provides a good example of a \emph{classification}: see \S\ref{sect:fin} on p.\ \pageref{sect:fin} below.
A class is \textbf{transitive} if it properly includes all of its elements.
A transitive \emph{set} is an \textbf{ordinal}
if it is well-ordered by the relation of membership.
Then all of the elements of $\upomega$ are ordinals, and so is $\upomega$ itself.
The class of all ordinals can be denoted by
\begin{equation*}
\on.
\end{equation*}
\begin{theorem}
The class $\on$ is transitive and is well-ordered by membership.
\end{theorem}
In particular, $\on$ cannot contain itself;
so it is not a set.
This result is the \textbf{Burali-Forti Paradox}\label{BF}~\cite{Burali-Forti}.
\begin{theorem}
Every well-ordered set $(A,<)$ is isomorphic to a unique ordinal.
The isomorphism is a certain function $f$ on $A$,
and this function is determined by the rule
\begin{equation*}
f(b)=\{f(x)\colon x**j-i$ for all $x$ in $\N$.
By Theorem~\ref{thm:N<} (p.\ \pageref{thm:N<}),
\begin{equation*}
i\not\equiv j\pmod n.
\end{equation*}
Thus the given map is injective.
If $k\in\Z$, let $a$ be its least nonnegative residue
(which exists by Theorem~\ref{thm:wo}).
Then $an$: then $O$ is closed under
composition, so $(O,\circ)$ is a semigroup; but it has no identity.
The structure $(\Q,0,-,+,1,\cdot)$
is an example of a \emph{ring} (or more precisely associative ring);
in fact it is a \emph{field,}
and it embeds in the field $(\R,0,-,+,1,\cdot)$ of real numbers,
as follows from Theorem~\ref{thm:R-complete} on p.\ \pageref{thm:R-complete}.
Rings and fields as such will be defined formally
in \S\ref{sect:rings}, beginning on p.\ \pageref{sect:rings}.
\subsection{Some homomorphisms}\label{subsect:homs}
We defined powers of symmetries on p.\ \pageref{sym-pow}.
By the same definition, we obtain at least the \emph{positive} powers
of elements of semigroups:
\begin{align*}
a^1&=a,&a^{n+1}=a\cdot a^n.
\end{align*}
\begin{theorem}
Suppose $(S,\cdot)$ is a semigroup, and $m$ and $n$ range over~$\N$.
\begin{compactenum}
\item
For all $a$ in $S$,
\begin{equation*}
a^{m+n}=a^ma^n.
\end{equation*}
That is, if $a\in S$, then
\begin{equation*}
n\mapsto a^n\colon(\N,+)\to(S,\cdot).
\end{equation*}
%$x\mapsto a^x$ is a homomorphism from $(\N,+)$ to $(S,\cdot)$.
\item
For all $a$ in $S$,
\begin{equation}\label{eqn:xmn}
a^{mn}=(a^m)^n.
\end{equation}
That is,
\begin{equation}\label{eqn:fmn}
n\mapsto(a\mapsto a^n)\colon(\N,1,\cdot)\to(S^S,\id_S,\circ).
\end{equation}
\end{compactenum}
\end{theorem}
\begin{proof}
We use induction.
The first part is proved like Theorem~\ref{thm:s^n}.
For the second part, we have
$a^{n\cdot 1}=a^n=(a^n)^1$, and if $a^{nm}=(a^n)^m$, then
\begin{equation*}
a^{n(m+1)}=a^{nm+n}=a^{nm}a^n=(a^n)^ma^n=(a^n)^{m+1}.
\end{equation*}
This establishes \eqref{eqn:xmn}. If we write $f_x(y)$ for $y^x$, then \eqref{eqn:xmn} becomes
\begin{equation*}
f_{mn}=f_n\circ f_m.
\end{equation*}
Since $mn=nm$, we get \eqref{eqn:fmn}.
\end{proof}
In a monoid, we define
\begin{equation*}
a^0=\gid.
\end{equation*}
\begin{theorem}
Suppose $(M,\gid,\cdot)$ is a monoid.
\begin{compactenum}
\item
If $a\in M$, then $x\mapsto a^x\colon(\upomega,0,+)\to(M,\gid,\cdot)$.
\item
$x\mapsto(y\mapsto y^x)\colon(\upomega,1,\cdot)\to(M^M,\id_A,\circ)$.
\end{compactenum}
\end{theorem}
In a group, we define
\begin{equation*}
a^{-n}=(a^n)\inv.
\end{equation*}
\begin{theorem}\label{thm:exp-in-groups}
Suppose $(G,\gid,{}\inv,\cdot)$ is a group.
\begin{compactenum}
\item
If $a\in G$, then $x\mapsto a^x\colon(\Z,0,-,+)\to(G,\gid,\inv,\cdot)$.
\item
$x\mapsto(y\mapsto y^x)\colon(\Z,1,\cdot)\to(G^G,\id_G,\circ)$.
\end{compactenum}
\end{theorem}
We shall use the following in Theorem~\ref{thm:8} on p.\ \pageref{thm:8}.
\begin{theorem}\label{thm:x2e}
If $x^2=\gid$ for all $x$ in some group, then that group is abelian.
\end{theorem}
\subsection{Pi and Sigma notation}\label{subsect:PS}
We can generalize the taking of powers in a semigroup as follows.
Given elements $a_i$ of a semigroup, where $i$ ranges over $\upomega$,
we define certain \textbf{iterated products} recursively by
\begin{align*}
\prod_{i<0}a_i&=1,&
\prod_{i1$, there is a permutation $\sigma\mapsto\sigma\circ(0\cdiv 1)$ of $\Sym n$ itself
that takes even elements to odd.
In this case, $\Alt n$ is half the size of $\Sym n$.
However, $\Alt1=\Sym 1$.
For this reason, one may wish to say that that $\Alt n$ is defined only when $n\geq2$.
This makes Theorem~\ref{thm:SA2} (p.\ \pageref{thm:SA2} below) simpler to state.
\section{Simplifications}\label{sect:simp}
If a semigroup $(G,\cdot)$ expands to a group $(G,\gid,{}\inv,\cdot)$,
then the semigroup $(G,\cdot)$ itself is often called a group.
But this usage must be justified.
\begin{theorem}\label{thm:u}
A semigroup can expand to a group in only one way.
\end{theorem}
\begin{proof}
Let $(G,\gid,\inv,\cdot)$ be a group.
If $\gid'$ were a second identity, then
\begin{align*}
\gid'x&=\gid x,& \gid'xx\inv&=\gid xx\inv,& \gid'&=\gid.
\end{align*}
If $a'$ were a second inverse of $a$, then
\begin{align*}
a'a&=a\inv a,& a'aa\inv&=a\inv aa\inv,&a'&=a\inv.\qedhere
\end{align*}
\end{proof}
Establishing that a particular structure is a group is made easier by the following.
\begin{theorem}\label{thm:left}
Any structure satisfying the identities
\begin{gather*}
{\gid}x=x,\\
x\inv x=\gid,\\
x(yz)=(xy)z
\end{gather*}
is a group.
In other words,
any semigroup with a left-identity and with left-inverses is a group.
\end{theorem}
\begin{proof}
We need to show $x\gid=x$ and $xx\inv=\gid$. To establish the latter,
using the given identies we have
\begin{equation*}
(xx\inv)(xx\inv)=x(x\inv x)x\inv=x{\gid}x\inv=xx\inv,
\end{equation*}
and so
\begin{equation*}
xx\inv={\gid}xx\inv=(xx\inv)\inv(xx\inv)(xx\inv)=(xx\inv)\inv(xx\inv)={\gid}.
\end{equation*}
Hence also
\begin{equation*}
x{\gid}=x(x\inv x)=(xx\inv)x={\gid}x=x.\qedhere
\end{equation*}
\end{proof}
The theorem has an obvious ``dual'' involving right-identities and right-inverses. By the theorem, the semigroups that expand to groups are precisely the semigroups that satisfy the axiom
\begin{gather*}
\Exists z(\Forall xzx=x\land\Forall x\Exists y yx=z),
\end{gather*}
which is logically equivalent to
\begin{equation}\label{eqn:sg-ax}
\Exists z\Forall x\Forall y\Exists u(zx=x\land uy=z).
\end{equation}
We shall show that this sentence is more complex than need be.
Thanks to Theorem~\ref{thm:u}, if a semigroup $(G,\cdot)$ does expand to a group, then we may unambiguously refer to $(G,\cdot)$ itself as a group. Furthermore, we may refer to $G$ as a group: this is commonly done, although, theoretically, it may lead to ambiguity.
\begin{theorem}\label{thm:solutions}
Let $G$ be a nonempty semigroup. The following are equivalent.
\begin{compactenum}
\item\label{item:exp}
$G$ expands to a group.
%\item\label{item:exp-u}
%$G$ expands uniquely to a group.
\item\label{item:sol}
Each equation $ax=b$ and $ya=b$ with parameters from $G$ has a
solution in $G$.
\item\label{item:sol-u}
Each equation $ax=b$ and $ya=b$ with parameters from $G$ has a
unique solution in $G$.
\end{compactenum}
\end{theorem}
\begin{proof}
Immediately \eqref{item:sol-u}$\lto$\eqref{item:sol}. Almost as easily, \eqref{item:exp}$\lto$\eqref{item:sol-u}. For, if $a$ and $b$ belong to some semigroup that expands to a group, we have $ax=b\liff x=a\inv b$; and we know by Theorem~\ref{thm:u} that $a\inv$ is uniquely determined. Likewise for $ya=b$.
Finally we show \eqref{item:sol}$\lto$\eqref{item:exp}.
Suppose $G$ is a nonempty semigroup in which all equations $ax=b$ and $ya=b$ have solutions. If $c\in G$, let $\gid$ be
a solution to $yc=c$. If $b\in G$, let $d$ be a
solution to
$cx=b$. Then
\begin{equation*}
{\gid}b={\gid}(cd)=({\gid}c)d=cd=b.
\end{equation*}
Since $b$ was chosen arbitrarily, $\gid$ is a left identity. Since the equation $yc={\gid}$ has a solution, $c$ has a left inverse. But $c$ is an arbitrary element of $G$. By Theorem~\ref{thm:left}, we are done.
\end{proof}
Now we have that the semigroups that expand to groups
are just the semigroups that satisfy the axiom
\begin{equation*}
\Forall x\Forall y(\Exists zxz=y\land\Exists wwx=y).
\end{equation*}
This may not look simpler than \eqref{eqn:sg-ax}, but it is.
It should be understood as
\begin{equation*}
\Forall x\Forall y\Exists z\Exists w(xz=y\land wx=y),
\end{equation*}
which is a sentence of the general form $\forall\exists$;
whereas \eqref{eqn:sg-ax} is of the form $\exists\forall\exists$.
\begin{theorem}\label{thm:gp-hom}
A map $f$ from one group to another is a homomorphism, provided it is a
homomorphism of semigroups, that is, $f(xy)=f(x)f(y)$.
\end{theorem}
\begin{proof}
In a group, if $a$ is an element, then the identity is the unique
solution of $xa=a$, and $a\inv$ is the unique solution of $yaa=a$. A
semigroup homomorphism $f$ takes solutions of
these equations to solutions of $xb=b$ and $ybb=b$, where $b=f(a)$.
\end{proof}
\emph{Inclusion} of a substructure in a larger structure is a homomorphism.
In particular, if $(G,\gid,{}\inv,\cdot)$ and $(H,\gid,{}\inv,\cdot)$
are groups, we have
\begin{equation*}
(G,\cdot)\included(H,\cdot)
\implies(G,\gid,{}\inv,\cdot)\included(H,\gid,{}\inv,\cdot).
\end{equation*}
If an arbitrary class of structures is axiomatized
by $\forall\exists$ sentences,
then the class is ``closed under unions of chains\label{chains}''
in the sense that,
if $\str A_0\included\str A_1\included\str A_2\included\dotsb$,
where each $\str A_k$ belongs to the class,
then the union of all of these structures also belongs to the class.
In fact the converse is also true,
by the so-called Chang--\L o\'s--Suszko Theorem \cite{MR0103812,MR0089813}.
With this theorem,
and with Theorem~\ref{thm:gp-hom} in place of~\ref{thm:solutions},
we can still conclude
that the theory of groups in the signature $\{\cdot\}$
has $\forall\exists$ axioms,
although we may not know what they are.
Theorem~\ref{thm:gp-hom} fails with monoids in place of groups.
For example, $(\Z,1,\cdot)$ and $(\Z\times\Z,(1,1),\cdot)$ are monoids
(the latter being the product of the former with itself
as defined in \S\ref{sect:new}),
and $x\mapsto(x,0)$ is an embedding
of the semigroup $(\Z,\cdot)$ in $(\Z\times\Z,\cdot)$,
but it is not an embedding of the monoids.
\section{Associative rings}\label{sect:rings}
A homomorphism from a structure to itself is an
\textbf{endomorphism.}\index{endomorphism}
Recall from p.\ \pageref{abelian}
that a group in which the multiplication is commutative
is said to be an \textbf{abelian group,}
and (p.\ \pageref{additive}) its operation is usually written additively.
The set of endomorphisms of an abelian group can be made into an
abelian group in which:
\begin{compactenum}[1)]
\item
the identity is the constant function $x\mapsto\gid$;
\item
additive inversion converts $f$ to $x\mapsto-f(x)$;
\item
addition converts $(f,g)$ to $x\mapsto f(x)+g(x)$.
\end{compactenum}
If $E$ is an abelian group, let the abelian group of its endomorphisms
be denoted by
\begin{equation*}
\End E.
\end{equation*}
The set of endomorphisms of $E$ can also be made into
a monoid in which
the identity is the identity function $\id_E$, and multiplication
is functional composition.
This multiplication
distributes in both senses over addition:
\begin{align*}
f\circ(g+h)&=f\circ g+f\circ h,& (f+g)\circ h&=f\circ h+g\circ h.
\end{align*}
We may denote the two combined structures---abelian group and
monoid together---by
\begin{equation*}
(\End E,\id_E,\circ);
\end{equation*}
this is the \textbf{complete ring of
endomorphisms of}\index{complete ring of endomorphisms} $E$.
A substructure of $(\End E,\id_E,\circ)$ can be called
simply a \textbf{ring of endomorphisms}\index{ring of endomorphisms of} $E$.
An \textbf{associative ring} is a structure $(R,0,-,+,1,\cdot)$ such that
\begin{compactenum}[1)]
\item
$(R,0,-,+)$ is an abelian group,
\item
$(R,1,\cdot)$ is a monoid,
\item
the multiplication distributes in both senses over addition.
\end{compactenum}
Then rings of endomorphisms are associative rings.%%%%%
\footnote{See note~\ref{note:ring} on p.\ \pageref{note:ring}
for the origin of the term \emph{ring.}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
It may be convenient to write an associative ring as $(R,1,\cdot)$,
where $R$ is implicitly an abelian group.
We might even say simply that $R$ is an associative ring.
An associative ring is usually just called a ring;
however, we shall consider some rings that are not associative rings in \S\ref{sect:non-assoc} (p.\ \pageref{sect:non-assoc}).
Some authors might not require
an associative ring to have a multiplicative identity.%%%%%
\footnote{For Lang \cite[ch.~II, \S1, p.~83]{Lang-alg},
a ring is what we have defined as an associative ring.
For Hungerford \cite[ch.~III, \S1, p.~115]{MR600654},
what we call an associative ring is a \emph{ring with identity.}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
We require it, so that the next theorem holds.
As with a group, so with an associative ring,
an element $a$ determines a singulary operation $\uplambda_a$ on the structure,
the operation being given by
\begin{equation*}
\uplambda_a(x)=ax.
\end{equation*}
Then we have an analogue of Cayley's Theorem (p.\ \pageref{thm:Cay}):
\begin{theorem}\label{thm:x-lambda_x}
For every associative ring $(R,1,\cdot)$,
the function
\begin{equation*}
x\mapsto\uplambda_x
\end{equation*}
embeds $(R,1,\cdot)$ in $(\End R,\id_R,\circ)$.
\end{theorem}
In an associative ring, if the multiplication commutes,
then the ring is a \textbf{commutative ring.}\index{commutative ring}
For example, $(\Z,0,-,+,1,\cdot)$\label{Z-as-ring}
and $(\Q,0,-,+,1,\cdot)$
are commutative rings.
The following is easy to check,
but can be seen
as a consequence of Theorem \ref{thm:ring-q} on p.\ \pageref{thm:ring-q} below,
which is itself easy to prove, especially given Theorem~\ref{thm:cong}.
\begin{theorem}\label{thm:Zmod-ring}
$(\Zmod n,0,-,+,1,\cdot)$ is a commutative ring.
\end{theorem}
In an associative ring,
an element with both a left and a right multiplicative inverse can be
called simply \textbf{invertible;}\index{invertible} it is also called
a \textbf{unit.}\index{unit}
\begin{theorem}\label{thm:units}
In an associative ring, the units compose a group with respect to
multiplication. In particular, a unit has a unique
left inverse, which is also a right inverse.
\end{theorem}
The group of units of an associative ring $R$ is denoted by
\begin{equation*}
\units R.
\end{equation*}
For example, $\units{\Z}=\{1,-1\}$.
Evidently all two-element groups
are isomorphic to this one.
By the theorem, if an element of an associative ring
has both a left inverse and a right inverse,
then they are equal.
However, possibly an element can have a right inverse,
but not a left inverse.
We can construct an example by means of the following.
\begin{theorem}\label{thm:power}
If $I$ is a set and $G$ is a group,
then the set $G^I$ of functions from $I$ to $G$
is a group with multiplication given by
\begin{equation*}
(x_i\colon i\in I)\cdot(y_i\colon i\in I)=(x_i\cdot y_i\colon i\in I).
\end{equation*}
\end{theorem}
Let $G$ be any nontrivial group.\label{exa:no-unit}
An arbitrary element $(x_n\colon n\in\upomega)$ of $G^{\upomega}$
can be written also as
\begin{equation*}
(x_0,x_1,\dots).
\end{equation*}
Then $\End{G^{\upomega}}$ contains elements $f$ and $g$ given by
\begin{gather*}
f(x_0,x_1,\dots)=(x_1,x_2,x_3,x_4,\dots),\\
g(x_0,x_1,\dots)=(x_0,x_0,x_1,x_2,\dots),
\end{gather*}
so that
\begin{gather*}
fg(x_0,x_1,\dots)=(x_0,x_1,x_2,\dots),\\
gf(x_0,x_1,\dots)=(x_1,x_1,x_2,\dots).
\end{gather*}
In particular, $g$ is a right inverse of $f$, but not a left inverse.
The construction in Theorem~\ref{thm:power} will be generalized on p.\ \pageref{dprod}.
If $R$ is a commutative ring, and
$\units R=R\setminus\{0\}$, then $R$ is called a \textbf{field.}\index{field}
For example, $\Q$ and $\R$ are fields.
The field $\C$ can be defined as $\R\times\R$ with the appropriate operations:
see p.\ \pageref{C}.
The trivial group $\{0\}$ becomes the trivial associative ring
when we define $1=0$ and $0\cdot0=0$.
This ring is not a field, because its only element $0$ is a unit.
\chapter{Groups}
\section{*General linear groups}\label{sect:gl}
The purpose of this section is to define some families of examples of groups,
besides the finite symmetry groups $\Sym n$.
By Cayley's Theorem, p.\ \pageref{thm:Cay},
we know that every finite group embeds, for some $n$ in $\upomega$, in $\Sym n$.
We know in turn (from p.\ \pageref{sym-omega})
that each $\Sym n$ embeds in $\Sym{\upomega}$,
which however is uncountable by Theorem~\ref{thm:sym-o-un}.
For every commutative ring $R$,
for every $n$ in $\upomega$,
we shall define the group $\GLR$
of \emph{invertible $n\times n$ matrices over} $R$.
Both $\Sym n$ and $\units R$ embed in $\GLR$.
If $R$ is countable, then so is $\GLR$.
If $R$ is finite, then so is $\GLR$.
In any case, $\GLR$ can be understood as the automorphism group of $R^n$,
when this is considered as an \emph{$R$-module.}
We shall use invertible matrices over $\Z$ in classifying
the \emph{finitely generated} abelian groups,
in \S\ref{sect:fgag} (p.\ \pageref{sect:fgag}).
\subsection{Additive groups of matrices}
For any commutative ring $R$,
for any two elements $m$ and $n$ of $\upomega$,
a function $(i,j)\mapsto a^i_j$ from $m\times n$ to $R$
can be called an $m\times n$ \textbf{matrix over} $R$
and denoted by the expression
\begin{equation*}
\begin{pmatrix}
a^0_0&\cdots&a^0_{n-1}\\
\vdots&\ddots&\vdots\\
a^{m-1}_0&\cdots&a^{m-1}_{n-1}
\end{pmatrix},
\end{equation*}
which has $m$ rows and $n$ columns.
We may abbreviate this matrix to
\begin{equation*}
(a^i_j)^{i< m}_{j< n},
\end{equation*}
or simply
\begin{equation*}
(a^i_j)^{i}_{j}
\end{equation*}
if the sets over which $i$ and $j$ range is clear.
The \textbf{entries} $a^i_j$ are from $R$.
The set of all $m\times n$ matrices over $R$ can be denoted by
\begin{equation*}
\MatR[m\times n].
\end{equation*}
This is an abelian group in the obvious way, with addition defined by
\begin{equation*}
(a^i_j)^{ia_{k+2},
\end{equation*}
so the sequence must have a last term; this is $\gcd(a_0,a_1)$.
When this is $1$, then $a_0$ and $a_1$ are said to be
\textbf{prime to} one another, or \textbf{relatively prime.}
In this case,
by Theorem~\ref{thm:ax+by=d},
the equation
\begin{equation*}
a_0x+a_1y=1
\end{equation*}
is soluble in $\Z$.
If $a\equiv b\pmod n$, then $\gcd(a,n)=\gcd(b,n)$.
Hence the following makes sense:
\begin{theorem}\label{thm:Znx}
For all positive integers $n$,
\begin{equation*}
\Zmodu n=\{x\in\Zmod n\colon\gcd(x,n)=1\}.
\end{equation*}
\end{theorem}
\begin{proof}
By the proof of Theorem~\ref{thm:mZnx},
$\Zmodu n$ consists of those $m$ in $\Zmod n$ such that the congruence
\begin{equation*}
mx\equiv1\pmod n
\end{equation*}
is soluble, that is, the equation $mx+ny=1$ is soluble,
so that $\gcd(m,n)$ must be $1$.
Conversely, if $\gcd(m,n)=1$, then the equation $mx+ny=1$ is soluble by Theorem~\ref{thm:ax+by=d}.
\end{proof}
For an arbitrary subset $A$ of an arbitrary group,
it is not so easy\label{gpgen-hard}
to give a description of the elements of $\gpgen A$.
We shall do it
by means of Theorem~\ref{thm:free-gp} on p.\ \pageref{thm:free-gp}.
Meanwhile, we may note some more specific examples:
The subgroup $\gpgen{(0\cdiv 1),(2\cdiv 3)}$ of $\Sym4$
is the subgroup given above in Theorem~\ref{thm:subgp-ex}
as being isomorphic to $\Kfg$.
The subgroup $\gpgen{\mi,\mj}$ of $\units{\Ham}$
is the \textbf{quaternion group,}\index{quaternion group}
denoted by
\begin{equation*}\label{quat}
\quat;
\end{equation*}
it has eight elements:
$\pm1$, $\pm\mi$, $\pm\mj$, and $\pm\mk$, where $\mk=\mi\mj$.
We consider this group further in the next section (\S\ref{sect:cyclic}) and later.
\begin{theorem}\label{thm:Dih-2}
If $n\geq3$, let
\begin{gather*}
\sigma_n=
\begin{cycle}
0\cdiv 1\cdiv \dots\cdiv n-1
\end{cycle},\\
\beta=
\begin{cycle}
1\cdiv n-1
\end{cycle}
\begin{cycle}
2\cdiv n-2
\end{cycle}
\cdots
\begin{cycle}
m\cdiv n-m
\end{cycle}
\end{gather*}
in $\Sym n$,
where $m$ is the greatest integer that is less than $n/2$.
Then
\begin{equation*}
\Dih n=\gpgen{\sigma_n,\beta}=\gpgen{\beta,\beta\sigma_n}.
\end{equation*}
\end{theorem}
\begin{proof}
The subset $\{\sigma_n{}^i\beta^j\colon(i,j)\in n\times 2\}$ of $\Sym n$
is a subset of $\Dih n$ and has $2n$ distinct elements,
so by Theorem~\ref{thm:Dih} (p.~\pageref{thm:Dih}) it must be all of $\Dih n$.
Moreover
$\gpgen{\beta,\beta\sigma_n}\subgp\gpgen{\sigma_n,\beta}$, but also $\gpgen{\sigma_n,\beta}\subgp\gpgen{\beta,\beta\sigma_n}$
since $\sigma=\beta\cdot\beta\sigma_n$.
\end{proof}
Our analysis of $\Dih n$ is continued in Theorem~\ref{thm:Dn} below.
In case $n=0$, the group $\gpgen{a_0,\dotsc,a_{n-1}}$
should logically be denoted by $\gpgen{\ }$.
Probably most people write $\gpgen{\gid}$ instead.
This is not wrong, but is redundant,
since every group contains an identity,
and the angle brackets indicate that a group is being given.
The practice of these notes will be to write $\trivgp$.
\section{Order}\label{sect:cyclic}
The \textbf{order}\index{order!--- of a group} of a group is its cardinality.
The order of a group $G$ is therefore denoted by
\begin{equation*}
\order G.
\end{equation*}
We have examples in Theorems~\ref{thm:Sym-ord} and \ref{thm:Dih} (pp.~\pageref{thm:Sym-ord}--\pageref{thm:Dih}).
If $a\in G$, then the order of the cyclic subgroup $\gpgen a$ of $G$
is said to be the \textbf{order}\index{order!--- of an element} of $a$ simply
and is denoted by
\begin{equation*}
\order a.
\end{equation*}
For example, in the quaternion group $\quat$ (p.~\pageref{quat} above), we have
\begin{align*}
\gpgen{\mi}&=\{0,\mi,-1,-\mi\},&\order{\mi}&=4.
\end{align*}
In the notation of Theorem~\ref{thm:Dih-2} above,
\begin{align*}
\order{\sigma_n}&=n,&
\order{\beta}&=2=\order{\beta\sigma_n}.
\end{align*}
For another example, we have the following.
\begin{theorem}
The order of a finite permutation
is the least common multiple of the orders of its disjoint cyclic factors.
\end{theorem}
\begin{theorem}\label{thm:el-ord}
In a group, if $a$ is an element of finite order $n$, then
\begin{equation*}
\gpgen a=\{a^i\colon i\in n\},
\end{equation*}
and $x\mapsto a^x$ is a well-defined isomorphism from $\Zmod n$ to $\gpgen a$,
so in particular
\begin{equation*}
a^n=\gid.
\end{equation*}
\end{theorem}
\begin{proof}
Since $\gpgen a$ does not have $n+1$ distinct elements,
for some $i$ and $j$ we have $0\leq i2$, and $G=\gpgen{a,b}$, where
\begin{align*}
\order a&=n,&
\order b&=2,&
\order{ab}&=2,
\end{align*}
then
\begin{equation*}
G\cong\Dih n.
\end{equation*}
\end{theorem}
\begin{proof}
Assume $n\geq 2$.
Since $abab=\gid$ and $b\inv=b$, we have
\begin{align*}
ba&=a\inv b,&
ba\inv&=ab.
\end{align*}
Therefore $ba^k=a^{-k}b$ for all integers
$k$. This shows
\begin{equation*}
G=\{a^ib^j\colon(i,j)\in n\times 2\}.
\end{equation*}
It remains to show $\order G=2n$.
Suppose
\begin{equation*}
a^ib^j=a^kb^{\ell},
\end{equation*}
where $(i,j)$ and $(k,\ell)$ are in $n\times 2$. Then
\begin{equation*}
a^{i-k}=b^{\ell-j}.
\end{equation*}
If $b^{\ell-j}=\gid$, then $\ell=j$ and $i=k$. The alternative is that
$b^{\ell-j}=b$. In this case,
\begin{equation*}
n\divides2(i-k).
\end{equation*}
If $n\divides i-k$, then $i=k$ and hence $j=\ell$. The only other
possibility is that $n=2m$ for some $m$, and $i-k=\pm m$, so that $a^m=b$.
But then $aa^maa^m=a^2$, while $abab=\gid$, so $n=2$.
\end{proof}
According to this theorem,
if a group with certain abstract properties of $\Dih n$ exists,
then that group is isomorphic to $\Dih n$.
In \S\ref{sect:pres}, we shall develop a way to create a group $G$ with those properties,
regardless of whether we know about $\Dih n$.
Then, using Theorem~\ref{thm:Dn}, we shall be able to conclude
that $G$ is isomorphic to $\Dih n$.
This result is Theorem~\ref{thm:Dn-pres} (p.~\pageref{thm:Dn-pres}).
\begin{theorem}\label{thm:quat}
If $G=\gpgen{a,b}$, where
\begin{align*}
\order a&=4,&
b^2&=a^2,&
ba&=a^3b,
\end{align*}
then, under an isomorphism taking $a$ to $\mi$ and $b$ to $\mj$,
\begin{equation*}
G\cong\quat.
\end{equation*}
\end{theorem}
\begin{proof}
Since $ba=a^3b$ and $\order a=4$, we have also
\begin{equation*}
ba\inv=ba^3=a^9b=ab,
\end{equation*}
so we can write every element of $G$
as a product $a^ib^j$ for some $i$ and $j$ in $\Z$.
By Theorem~\ref{thm:el-ord}, since $\order a=4$, we can require $i\in4$.
Similarly, since $b^2=a^2$, we can require $j\in 2$.
In $\quat$, the elements $\mi$ and $\mj$ have the given properties of $a$ and $b$.
Moreover $\size{\quat}=8$,
so that if $(i,j)$ and $(k,\ell)$ are distinct elements of $4\times2$,
then
\begin{equation*}
\mi^i\mj^j\neq\mi^k\mj^{\ell}.
\end{equation*}
Therefore there is a well-defined surjective function $\mi^i\mj^j\mapsto a^ib^j$
from $\quat$ to $G$,
and this function is a homomorphism.
It remains to show $\size G=8$.
Suppose $(i,j)$ and $(k,\ell)$ are in $4\times 2$, and
\begin{equation*}
a^ib^j=a^kb^{\ell}.
\end{equation*}
Then $a^{i-k}=b^{\ell-k}$ and hence
\begin{equation*}
a^m=b^n
\end{equation*}
for some $n$ in $2$ and $m$ in $4$.
If $n=0$, then $m=0$ (since $\order a=4$), and so $(i,j)=(k,\ell)$.
But $a\neq b$ (since $ba=a^3b$ and $\order a=4$).
Similarly $a^3\neq b$.
Finally, $a^2\neq b$ (since $b^2=a^2$ and $\order a=4$).
Thus $n\neq1$, so $n=0$.
\end{proof}
As with $\Dih n$, so with $\quat$, we shall be able to create the group
using only the abstract properties just given,
in Theorem~\ref{thm:quat-pres} (p.~\pageref{thm:quat-pres}).
\section{Cosets}\label{sect:cosets}
Suppose $H\subgp G$. If $a\in G$, let
\begin{gather*}
aH=\{ax\colon x\in H\},\\
Ha=\{xa\colon x\in H\}.
\end{gather*}
Each of the sets $aH$ is a \textbf{left coset}\index{left!---
coset}\index{coset}
of $H$, and the set $\{xH\colon x\in G\}$ of left cosets
is denoted by
\begin{equation*}
G/H.
\end{equation*}
Each of the sets $Ha$ is a \textbf{right coset}\index{right!--- coset} of $H$, and the set $\{Hx\colon x\in G\}$ of right cosets
is denoted by
\begin{equation*}
H\backslash G.
\end{equation*}
Note that $H$ itself is both a left and a right coset of itself.
Sometimes, for each $a$ in $G$, we have $aH=Ha$. For example, this is the case when $G=G_0\times G_1$, and $H=G_0\times\trivgp$, so that, if $a=(g_0,g_1)$, then
\begin{equation*}
aH=H\times\{g_1\}=Ha.
\end{equation*}
Sometimes left and right cosets are different, as in the example\label{ex:32again} on p.\ \pageref{ex:32}, where $G=\Sym 3$, and $H$ is the image of $\Sym 2$ in $G$. In this case
\begin{align*}
(0\cdiv 2)H&=\{(0\cdiv 2),(0\cdiv 1\cdiv 2)\},& H(0\cdiv 2)&=\{(0\cdiv 2),(0\cdiv 2\cdiv 1)\},\\
(1\cdiv 2)H&=\{(1\cdiv 2),(0\cdiv 2\cdiv 1)\},& H(1\cdiv 2)&=\{(1\cdiv 2),(0\cdiv 1\cdiv 2)\}.
\end{align*}
Moreover, there are no other cosets of $H$, besides $H$ itself, by the next theorem; so in the example, no left coset, besides $H$, is a right coset.
\begin{theorem}\label{thm:cosets}
Suppose $H\subgp G$.
The left cosets of $H$ in $G$ compose a partition of $G$. Likewise for the right cosets. All
cosets of $H$ have the same size; also, $G/H$ and
$H\backslash G$ have the same size.
\end{theorem}
\begin{proof}
We have $a\in aH$. Suppose $aH\cap bH\neq\emptyset$. Then $ah=bh_1$ for some $h$ and $h_1$ in $H$, so that $a=bh_1h\inv$, which is in $bH$. Thus $a\in bH$, and hence $aH\included bH$. By symmetry of the argument, we have also $bH\included aH$, and therefore $aH=bH$. Hence the left cosets compose a partition of $G$. By symmetry again, the same is true for the right cosets.
All cosets of $H$ have the same size as $H$, since
the map $x\mapsto ax$ from $H$ to $aH$ is a bijection with inverse $x\mapsto a\inv H$, and likewise $x\mapsto xa$ from $H$ to $Ha$ is a bijection. (One might see this as an application of Cayley's Theorem, Theorem~\ref{thm:Cay}, p.\ \pageref{thm:Cay}.)
Inversion is a
permutation of $G$ taking $aH$ to $Ha\inv$, so $G/H$ and
$H\backslash G$ must have the same size.
\end{proof}
\begin{corollary}\label{cor:cosets-1}
If $H\subgp G$, then the relation $\sim$ on $G$ defined by
\begin{equation*}
a\sim x\liff aH=xH
\end{equation*}
is an equivalence-relation, and
\begin{equation*}
G/H=G/\mathord{\sim}.
\end{equation*}
\end{corollary}
\begin{corollary}\label{cor:cosets-2}
If $H\subgp G$ and $aH=Hb$, then $aH=Ha$.
\end{corollary}
\begin{proof}
Under the assumption, $a\in Hb$, so $Ha\included Hb$, and therefore $Ha=Hb$.
\end{proof}
The cardinality of $G/H$ (or of $H\backslash G$)
is called the \textbf{index}\index{index} of $H$ in $G$
and can be denoted by
\begin{equation*}
[G:H].
\end{equation*}
If $G$ is finite, then by the last theorem,
\begin{equation*}
[G:H]=\frac{\size G}{\size H}.
\end{equation*}
However, $[G:H]$ may be finite, even though $G$ is not.
In this case, $H$ must also be infinite,
and indeed the last equation may be understood to say this,
since an infinite cardinal divided by a finite cardinal should still be infinite.
Of the next theorem, we shall be particularly interested in a special case,
Lagrange's Theorem, in the next section.
\begin{theorem}\label{thm:KHG}
If $K\subgp H\subgp G$, then $[G:K]=[G:H][H:K]$.
\end{theorem}
\begin{proof}
Every left coset of $K$ is included in a left coset of $H$.
Indeed, if $bK\cap aH\neq\emptyset$, then as in the proof of
Theorem~\ref{thm:cosets}, $bK\included aH$.
Moreover, every left coset of $H$ includes the same number of left cosets of $K$. For, the bijection $x\mapsto ax$ that takes $H$ to $aH$ also takes each coset $bK$ of $K$ to a coset $abK$ of $K$.
\end{proof}
\section{Lagrange's Theorem}\label{sect:Lagrange}
According to \cite[p.~141--2]{MR1517828},
the following ``is implied but not explicitly proved''
in a memoir by Lagrange published in 1770--1.
\begin{theorem}[Lagrange]\label{thm:Lagrange}\index{Lagrange's
Theorem}\index{theorem!Lagrange's Th---}
If $H\subgp G$ and $G$ is finite, then $\order H$ divides $\order G$.
\end{theorem}
\begin{proof}
Use Theorem~\ref{thm:KHG} when $K=\trivgp$.
\end{proof}
\begin{corollary}
If $G$ is finite and $a\in G$, then $a^{\order G}=\gid$.
\end{corollary}
\begin{proof}
$a^{\order a}=\gid$ by Theorem~\ref{thm:el-ord} (p.~\pageref{thm:el-ord}),
and $\order a$ divides $\order G$.
\end{proof}
Cauchy's Theorem (p.\ \pageref{thm:Cauchy})
and its generalization,
the first Sylow Theorem (p.\ \pageref{thm:Sylow-1}),
are partial converses of Lagrange's Theorem.
Meanwhile, some basic results of number theory
can be seen as applications of Lagrange's Theorem.
First we obtain a classification of certain finite groups.
An integer greater than $1$ is called \textbf{prime}\label{prime-number}
if its only divisors are itself and $1$.
\begin{theorem}
All groups of prime order are cyclic.
\end{theorem}
\begin{proof}
Say $\order G=p$.
There is $a$ in $G\setminus\trivgp$,
so $\order a>1$;
but $\order a$ divides $p$, so $\order a=p$, and therefore $G=\gpgen a$.
\end{proof}
The following can be obtained
as a corollary of Theorem~\ref{thm:Znx} (p.\ \pageref{thm:Znx});
but we can obtain it also from Lagrange's Theorem.%%%%%
\footnote{This is observed by Timothy Gowers, editor of \cite{MR2467561},
in a Google+ article of December 21, 2013.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{theorem}\label{thm:p-prime}
An integer $p$ that is greater than $1$ is prime if and only if
\begin{equation*}
\Zmodu p=\{1,\dots,p-1\}.
\end{equation*}
\end{theorem}
\begin{proof}
Say $11$, since $[\Sym n:\Alt n]=2$, we now have
\begin{equation*}
\Alt n\nsubgp\Sym n.
\end{equation*}
Of course we have this trivially if $n\leq1$.
In general, if $N\nsubgp G$, then the group $G/N$ is called the
\textbf{quotient-group}%
\index{quotient!--- group}\index{group!quotient ---} of
$G$ by $N$. In this case, we can write the group also as
\begin{equation*}
\frac GN.
\end{equation*}
\begin{theorem}\label{thm:NGHG}
If $N\nsubgp G$ and $H\subgp G$, then $N\cap H\nsubgp H$.
(That is, normality is preserved in subgroups.)
\end{theorem}
\begin{proof}
The defining property of normal subgroups is universal.
In particular,
$N\nsubgp G$ means that the sentence
\begin{equation*}
\Forall x\Forall y(x\in N\to yxy\inv\in N)
\end{equation*}
is true in the structure $(G,N)$. Therefore the same sentence is true in every substructure of $(G,N)$. If $H\subgp G$, then $(G,N\cap H)$ is a substructure of $(G,N)$.
\end{proof}
For example, if $m4$.
\end{theorem}
\begin{proof}
Suppose $\Alt n$ has normal subgroup $N$ with a nontrivial element $\sigma$.
Then $\sigma$ is the product of disjoint cycles, among which are:
\begin{compactenum}[1)]
\item
a cycle of order at least $4$; or
\item
two cycles of order $3$; or
\item
transpositions, only one $3$-cycle, and no other cycles; or
\item
only transpositions.
\end{compactenum}
We show that, in each case,
$N$ contains a $3$-cycle.
\begin{asparaenum}
\item
Suppose first that $\sigma$ is
$\begin{cycle}
0 \cdiv 1 \cdiv \dots \cdiv k-1
\end{cycle}\tau$
for some $\tau$ that is disjoint from
$\begin{cycle}
0 \cdiv 1 \cdiv \cdots \cdiv k-1
\end{cycle}$.
Then $N$ contains both
\begin{equation*}
\begin{cycle}
0 \cdiv 1 \cdiv 2
\end{cycle}
\begin{cycle}
0 \cdiv 1 \cdiv \cdots \cdiv k-1
\end{cycle}\tau
\begin{cycle}
2 \cdiv 1 \cdiv 0
\end{cycle}
\end{equation*}
and
$\tau\inv
\begin{cycle}
k-1\cdiv \cdots \cdiv 1 \cdiv 0
\end{cycle}$,
and their product is a $3$-cycle:
\begin{multline*}
\begin{cycle}
0 \cdiv 1 \cdiv 2
\end{cycle}
\begin{cycle}
0 \cdiv 1 \cdiv \cdots \cdiv k-1
\end{cycle}\tau
\begin{cycle}
2 \cdiv 1 \cdiv 0
\end{cycle}
\tau\inv
\begin{cycle}
k-1\cdiv \cdots \cdiv 1 \cdiv 0
\end{cycle}\\
=
\begin{cycle}
0 \cdiv 1 \cdiv 3
\end{cycle}.
\end{multline*}
\item
If $\tau$ is disjoint from $\begin{cycle}
0 \cdiv 1 \cdiv 2
\end{cycle}
\begin{cycle}
3 \cdiv 4 \cdiv 5
\end{cycle}$, then we reduce to the previous case:
\begin{multline*}
\begin{cycle}
0 \cdiv 1 \cdiv 3
\end{cycle}
\underbrace{
\begin{cycle}
0 \cdiv 1 \cdiv 2
\end{cycle}
\begin{cycle}
3 \cdiv 4 \cdiv 5
\end{cycle}}\tau
\begin{cycle}
3 \cdiv 1 \cdiv 0
\end{cycle}
\tau\inv
\underbrace{
\begin{cycle}
5 \cdiv 4 \cdiv 3
\end{cycle}
\begin{cycle}
2 \cdiv 1 \cdiv 0
\end{cycle}}\\
=
\begin{cycle}
0 \cdiv 1 \cdiv 4 \cdiv 2 \cdiv 3
\end{cycle}.
\end{multline*}
\item
If $\tau$ is disjoint from
$\begin{cycle}
0 \cdiv 1 \cdiv 2
\end{cycle}$ and is the product of transpositions, then
\begin{equation*}
\left[\begin{cycle}
0 \cdiv 1 \cdiv 2
\end{cycle}\tau\right]^2=
\begin{cycle}
2 \cdiv 1 \cdiv 0
\end{cycle}.
\end{equation*}
\item
Finally, suppose $\tau$ is a product of transpositions disjoint from
$\begin{cycle}
0\cdiv 1
\end{cycle}$ and
$\begin{cycle}
2\cdiv 3
\end{cycle}$.
Then
\begin{equation*}
\begin{cycle}
0 \cdiv 1 \cdiv 2
\end{cycle}
\underbrace{
\begin{cycle}
0 \cdiv 1
\end{cycle}
\begin{cycle}
2 \cdiv 3
\end{cycle}
\tau}
\begin{cycle}
2 \cdiv 1 \cdiv 0
\end{cycle}
\underbrace{
\tau
\begin{cycle}
3 \cdiv 2
\end{cycle}
\begin{cycle}
1 \cdiv 0
\end{cycle}}
=
\begin{cycle}
0 \cdiv 2
\end{cycle}
\begin{cycle}
1 \cdiv 3
\end{cycle}.
\end{equation*}
Furthermore, since $n>4$, in $\Alt n$ we compute
\begin{equation*}
\begin{cycle}
0 \cdiv 2 \cdiv 4
\end{cycle}
\underbrace{
\begin{cycle}
0 \cdiv 2
\end{cycle}
\begin{cycle}
1 \cdiv 3
\end{cycle}}
\begin{cycle}
4 \cdiv 2 \cdiv 0
\end{cycle}
\underbrace{
\begin{cycle}
3 \cdiv 1
\end{cycle}
\begin{cycle}
2 \cdiv 0
\end{cycle}}
=
\begin{cycle}
0 \cdiv 4 \cdiv 2
\end{cycle}.\qedhere
\end{equation*}
\end{asparaenum}
\end{proof}
For the sake of classifying small finite groups in general
(in \S\ref{sect:class-small}, p.\ \pageref{sect:class-small}),
we shall want the following,
which assumes $\Alt n$ is defined just when $n\geq2$ (see p.\ \pageref{Alt} above).
\begin{theorem}\label{thm:SA2}
$\Alt n$ is the unique subgroup of $\Sym n$ of index $2$.
\end{theorem}
% END OF DAY 6 (October 13, 2008)
\chapter{Category theory}
\section{Products}\label{sect:prod}
There is a simple property of direct products of groups (as defined on p.\ \pageref{dp})
that will turn out to characterize these products.
If $G_0$ and $G_1$ are groups,
then we know from Theorem~\ref{thm:coord-proj} on p.\ \pageref{thm:coord-proj}
that for each $i$ in $2$, the function
\begin{equation*}
(x_0,x_1)\mapsto x_i
\end{equation*}
from $G_0\times G_1$ to $G_i$ is a homomorphism.
It can be called a \textbf{coordinate projection}\label{coord-proj}
and denoted by
\begin{equation*}
\uppi_i.
\end{equation*}
\begin{theorem}\label{thm:prod}
Let $G_0$, $G_1$ and $H$ be groups such that,
for each $i$ in $2$, there is a homomorphism $f_i$ from $H$ to $G_i$.
Then the function
\begin{equation*}
x\mapsto(f_0(x),f_1(x))
\end{equation*}
from $H$ to $G_0\times G_1$ is a homomorphism,
and it is the unique homomorphism $f$ from $H$ to $G_0\times G_1$
such that, for each $i$ in~$2$,
\begin{equation*}
\uppi_if=f_i,
\end{equation*}
that is, the following diagram commutes:
\begin{equation*}
\xymatrix{
G_0 & \ar[l]_-{\uppi_0} G_0\times G_1 \ar[r]^-{\uppi_1} & G_1\\
& \ar[ul]^{f_0} \ar[u]_f H \ar[ur]_{f_1} &
}
\end{equation*}
If the groups $G_i$ are abelian, then so is $G_0\times G_1$.
\end{theorem}
\begin{proof}
If $u\in G_0\times G_1$, then
\begin{equation*}
u=(\uppi_0(u),\uppi_1(u)).
\end{equation*}
Hence, if $f\colon H\to G_0\times G_1$,
then $f(x)=(\uppi_0f(x),\uppi_1f(x))$.
In particular then, $f$ is as desired if and only if
$f(x)=(f_0(x),f_1(x))$.
\end{proof}
\newcounter{prod}
\setcounter{prod}{\value{theorem}}
Considering this theorem and its proof,
we may see that a more general result can be obtained.
This is the porism below.
We obtain it by considering
an \textbf{indexed family} $(G_i\colon i\in I)$ of groups.
This is an indexed set in the sense of p.\ \pageref{indexed};
we use the word \emph{family}
to emphasize that the structure of each $G_i$ will be important.
The \textbf{direct product}\index{direct product}\label{dprod}
of the indexed family can be denoted by one of
\begin{align*}
&\prod_{i\in I}G_i,&&\prod(G_i\colon i\in I).
\end{align*}
This is, first of all,
the set whose elements are indexed sets $(x_i\colon i\in I)$
such that $x_i\in G_i$ for each $i$ in $I$.
Note a special case: If all of the groups $G_i$ are the same group $G$, then
\begin{equation*}
\prod_{i\in I}G=G^I.
\end{equation*}
In case $I=n$, we may write $\prod_{i\in I}G_i$ also as
\begin{equation*}
G_0\times\cdots\times G_{n-1},
\end{equation*}
and a typical element of this as $(x_0,\dots,x_{n-1})$.
\begin{theorem}
The direct product $(G_i\colon i\in I)$ of an indexed family of groups
is a group under the multiplication given by
\begin{equation*}
(x_i\colon i\in I)\cdot(y_i\colon i\in I)=(x_i\cdot y_i\colon i\in I).
\end{equation*}
Each of the functions
\begin{equation*}
(x_j\colon j\in I)\mapsto x_i
\end{equation*}
is a homomorphism from $\prod_{j\in I}G_j$ to $G_i$.
\end{theorem}
\begin{proof}
As for Theorem~\ref{thm:power} on p.\ \pageref{thm:power}
and Theorem~\ref{thm:coord-proj} on p.\ \pageref{thm:coord-proj}.
\end{proof}
As before, the homomorphisms in the porism are the \textbf{coordinate projections,}
denoted by
\begin{equation*}
\uppi_i.
\end{equation*}
\newcounter{savethm}
\setcounter{savethm}{\value{theorem}}
\setcounter{theorem}{\value{prod}}
\begin{porism}\label{por:prod}
Suppose $(G_i\colon i\in I)$ is an indexed family of groups, and $H$
is a group, and for each $i$ in $I$ there is a homomorphism from $H$
to $G_i$. Then there is a homomorphism
\begin{equation}\label{eqn:unique-hom}
x\mapsto(f_i(x)\colon i\in I)
\end{equation}
from $H$ to
$\prod_{i\in I}G_i$, and this
is the unique homomorphism $f$ from $H$ to
$\prod_{i\in I}G_i$ such that, for each $i$ in~$I$,
\begin{equation*}
\uppi_if=f_i,
\end{equation*}
that is, the following diagram commutes:
\begin{equation*}
\xymatrix{
\displaystyle\prod_{j\in I}G_j \ar[r]^-{\uppi_i} & G_i\\
H\ar[u]^<<0$,
we can replace $x_i\bv^i$ with $x_i$-many copies of $\bv^i$,
and if $x_j<0$,
we can replace $x_j\bv^j$ with $-x_j$-many copies of $-\bv^j$.
For example,
\begin{equation*}
3\bv^0-2\bv^1=\bv^0+\bv^0+\bv^0-\bv^1-\bv^1.
\end{equation*}
In general, every nontrivial element of $\sum_{i\in I}\Z$
is \emph{uniquely} a sum of some copies of the $\bv^i$ and the $-\bv^j$,
if we disregard order,
and if we never allow $\bv^i$ and $-\bv^i$ for the same $i$ to appear in the same sum.
If we use multiplicative notation instead,
and if we do not disregard order,
what we get is not an abelian group, much less a free abelian group;
but it is a \emph{free group.}
To be precise, a \textbf{word}\index{word} on $I$ is a
finite nonempty string $t_0t_1\cdots t_n$,
where
each entry $t_k$ is either $\gid$, or else $a$ or $a\inv$ for some $a$
in $I$. A word is \textbf{reduced}\index{reduced} if
$a$ and $a\inv$ are never adjacent in it, and
$\gid$ is never adjacent to any other entry.
Thus the only reduced word in which $\gid$ can appear
is just the word of length $1$ whose only entry is $\gid$.
The \textbf{free group on} $I$, denoted by
\begin{equation*}
\free I,
\end{equation*}
consists of the reduced words on $I$.
Multiplication in this group
is juxtaposition followed by \textbf{reduction,}\index{reduction}
namely, replacement of each occurrence of $aa\inv$ or $a\inv a$ with $\gid$,
and replacement of each occurrence of $x\gid$ or $\gid x$ with $x$.
Thus, if we write an element $a$ of $I$ as $a^1$,
we can express the product of two arbitrary reduced words by the equation
\begin{equation*}
(a_{m}^{\epsilon(m)}\cdots a_{0}^{\epsilon(0)})
(b_{0}^{\zeta(0)}\cdots b_{n}^{\zeta(n)})=
a_{m}^{\epsilon(m)}\cdots a_{j}^{\epsilon(j)}b_j^{\zeta(j)}\cdots
b_{n}^{\zeta(n)},
\end{equation*}
where each exponent $\epsilon(i)$ or $\zeta(i)$ is $\pm1$, and the equation
\begin{equation*}
a_i^{\epsilon(i)}= b_i^{-\zeta(i)}
\end{equation*}
is true when $i2$, then $\Dih n$ has the presentation
\begin{equation*}
\gpres{a,b}{a^n,b^2,(ab)^2}.
\end{equation*}
\end{theorem}
\begin{proof}
Note first that, in the group $\gpres{a,b}{a^n,b^2,(ab)^2}$,
the order of $a$ must divide $n$,
and each of the orders of $b$ and $ab$ must divide $2$.
Now, by Theorem~\ref{thm:Dih-2} on p.\ \pageref{thm:Dih-2},
$\Dih n$ has elements $\alpha$ and $\beta$ that generate the group
and are such that $\alpha^n$, $\beta^2$, and $(\alpha\beta)^2$ are all equal to $\gid$.
By von Dyck's Theorem then,
there is an epimorphism
from $\gpres{a,b}{a^n,b^2,(ab)^2}$ to $\Dih n$
taking $a$ to $\alpha$ and $b$ to $\beta$ and hence $ab$ to $\alpha\beta$.
Therefore the order of $a$ must be exactly $n$,
and the orders of $b$ and of $ab$ must be $2$.
By Theorem~\ref{thm:Dn} on p.\ \pageref{thm:Dn},
the epimorphism onto $\Dih n$ must be an isomorphism.
\end{proof}
\begin{theorem}\label{thm:quat-pres}
The quaternion group $\quat$ has the presentation
\begin{equation*}
\gpres{\mi,\mj}{\mi^4,\mi^2\mj^2,\mi\mj\mi^3\mj},
\end{equation*}
or equivalently $\gpres{\mi,\mj}{\mi^4=\gid,\ \mi^2=\mj^2,\ \mj\mi=\mi^3\mj}$.
\end{theorem}
\begin{proof}
Use von Dyck's Theorem and Theorem~\ref{thm:quat} in the manner of the previous proof.
\end{proof}
Yet another example of a presentation
will be given in Theorem~\ref{thm:S3Z4} on p.\ \pageref{thm:S3Z4}.
\section%[Fin.~gen.~ab.~groups]
{Finitely generated abelian groups}\label{sect:fgag}
We now classify,
in the sense of \S\ref{sect:fin} (p.\ \pageref{sect:fin}),
the abelian groups with finite sets of generators,
and in particular the finite abelian groups.
A useful application of this
will be that the group of units of every finite field is cyclic
(Theorem~\ref{thm:Zp-cross}).
\begin{theorem}\label{thm:prod-quot}
If $(G_i\colon i\in I)$ is an indexed family of groups,
and for each $i$ in $I$, $N_i\nsubgp G_i$,
then
\begin{align*}
\prod_{i\in I}N_i&\nsubgp\prod_{i\in I}G_i,&
\left.\prod_{i\in I}G_i\right/\prod_{i\in I}N_i
&\cong\prod_{i\in I}\frac{G_i}{N_i}.
\end{align*}
\end{theorem}
\begin{theorem}\label{thm:fin-gen-ab}
For every abelian group $G$ on $n$ generators, there is a unique
element $k$ of $n+1$, along with positive integers $d_0$, \dots,
$d_{k-1}$, where
\begin{equation}\label{d}
d_0\divides d_1\land\dots\land d_{k-2}\divides d_{k-1},
\end{equation}
such that
\begin{equation}\label{FH}
G\cong
\Zmod{d_0}\oplus\dotsb\oplus\Zmod{d_{k-1}}
\oplus\underbrace{\Z\oplus\dotsb\oplus\Z}_{n-k}.
\end{equation}
\end{theorem}
\begin{proof}
Suppose $G=\gpgen{g^i\colon i1$, then $\Dih{2^n}$ is a nonabelian $2$-group.
By Cauchy's Theorem, the hypothesis of the following is always satisfied.
\begin{theorem}\label{thm:abe}
Suppose $p$ and $q$ are distinct primes, and $G$ is a group of order $pq$.
If $a$ and $b$ are elements of $G$ of orders $p$ and $q$ respectively,
then
\begin{align*}
\gpgen a\cap\gpgen b&=\trivgp,& G&=\gpgen a\gpgen b.
\end{align*}
\end{theorem}
In the theorem, if $\gpgen a$ is a \emph{normal} subgroup of $G$,
then $G$ is a semidirect product,
by Theorem~\ref{thm:isdp} on p.\ \pageref{thm:isdp}.
If also $\gpgen b\nsubgp G$, then $G$ is actually a direct product,
isomorphic to $\Zmod p\times\Zmod q$.
Otherwise, $G$ is not abelian, and by Theorem~\ref{thm:pq} there is only one possibility.
With Theorem~\ref{thm:pq-class} on p.\ \pageref{thm:pq-class},
we shall show that
one of $\gpgen a$ and $\gpgen b$ must be a normal subgroup of $G$,
and so $G$ is indeed either a direct or a semidirect product.
\section{Actions of groups}\label{sect:actions}
A homomorphism from a group $G$ to the symmetry group of a set $\setactedon$
is called an \textbf{action} of $G$ on $\setactedon$.
An alternative characterization of actions is given by the following.
\begin{theorem}
Let $G$ be a group, and $\setactedon$ a set. There is a one-to-one
correspondence between
\begin{compactenum}
\item
homomorphisms $g\mapsto(a\mapsto ga)$ from $G$ into
$\Sym{\setactedon}$, and
\item
functions $(g,a)\mapsto ga$ from $G\times\setactedon$ into $\setactedon$ such that
\begin{align}\label{act:gha}
\gid a&=a,&
(gh)a&=g(ha)
\end{align}
for all $h$ and $h$ in $G$ and $a$ in $\setactedon$.
\end{compactenum}
\end{theorem}
\begin{proof}
If $g\mapsto(a\mapsto ga)$ maps $G$ homomorphically into
$\Sym{\setactedon}$, then the identities in~\eqref{act:gha} follow.
Suppose conversely that these hold. Then, in particular,
\begin{equation*}
g(g\inv a)=(gg\inv)a=\gid a=a
\end{equation*}
and likewise $g\inv(ga)=a$, so $a\mapsto g\inv a$ is the inverse of
$a\mapsto ga$, and the function $g\mapsto(a\mapsto ga)$ does map $G$
into $\Sym{\setactedon}$, homomorphically by~\eqref{act:gha}.
\end{proof}
Usually it is a function $(g,a)\mapsto ga$ from $G\times\setactedon$ to $\setactedon$
as in the theorem that is called an action of $G$ on $\setactedon$.
So in the notation of the proof of Cauchy's Theorem,
the function $(k,\bm x)\mapsto f_k(\bm x)$ is an action of $\Zmod p$ on $\setactedon$.
Immediately, for any set $\setactedon$,
the function $(\sigma,x)\mapsto\sigma(x)$ from $\Sym{\setactedon}\times\setactedon$ to $\setactedon$
is an action of $\Sym{\setactedon}$ on $\setactedon$.
Other examples that will be of interest to us are given by the following.
\begin{theorem}
Let $G$ be a group and $H}[l]\ar[d]\\
G \ar[d] & \cseries nG' \ar[l]\ar[d] & \cseries{n-1}G'\ar[l]\ar[d] &
\ar[l]\ar[d]\cseries{n-2}G'\ar[l] & \centr G' \ar@{.>}[l]
\ar[d]\\
\cseries nG & \ar[l] \cseries {n-1}G & \ar[l] \cseries{n-2}G & \ar[l]
\cseries{n-3}G & \trivgp \ar@{.>}[l]
}
\end{equation*}
Since $\Sym3/\Alt3$ is abelian, we have
\begin{align*}
\Sym3'&\subgp\Alt3,&
\Sym3''\subgp\Alt3'=\trivgp,
\end{align*}
so $\Sym3$ is soluble. However,
\begin{equation*}
\Sym3=\Alt3\rtimes\gpgen{(0\cdiv 1)},
\end{equation*}
the semidirect product of its Sylow subgroups;
but the product is not \emph{direct,}
so $\Sym3$ is not nilpotent.
\begin{theorem}
Let $H\subgp G$ and $N\nsubgp G$.
\begin{compactenum}
\item
If $G$ is soluble, then so are $H$ and $G/N$.
\item
If $N$ and $G/N$ are soluble, then so is $G$.
\end{compactenum}
\end{theorem}
\begin{proof}
\begin{asparaenum}
\item
$\dsubgp kH\subgp\dsubgp kG$ and $\dsubgp k{(G/N)}=\dsubgp kGN/N$.
\item
If $G/N$ is soluble,
then $\dsubgp nG\subgp N$ for some $n$.
If also $N$ is soluble,
then $\dsubgp mN=\trivgp$ for some $m$,
so $\dsubgp{n+m}G\subgp\dsubgp mN=\trivgp$.\qedhere
\end{asparaenum}
\end{proof}
\begin{theorem}
Groups with non-abelian simple subgroups are not soluble.
\end{theorem}
\begin{proof}
Suppose $H$ is simple. Since $H'\nsubgp H$, we have either
$H'=\trivgp$ or $H'=H$. In the former case, $H$ is abelian; in
the latter, $H$ is insoluble.
\end{proof}
In particular, $\Sym 5$ is not soluble if $n\geq 5$.%%%%%
\footnote{This is why the general $5$th-degree polynomial equation
% $a+bx+cx^2+dx^3+ex^4+x^5=0$
is insoluble by radicals.}
\section{Normal series}\label{sect:NS}
A \textbf{normal series}\index{normal!--- series} for a group $G$
is a list $(G_0,\dots,G_n)$ of subgroups, where
\begin{equation*}
G=G_0\nsupgp G_1\nsupgp\dots\nsupgp G_n=\trivgp.
\end{equation*}
We do not require $G_k\nsupgp G_{k+2}$.%%%%%
\footnote{One may call a normal series a \emph{subnormal series,}
reserving the term \emph{normal series} for the case where $G\nsupgp G_k$ for each $k$.
However, we shall not be interested in the distinction recognized by this terminology.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%
The quotients $G_k/G_{k+1}$
are called the \textbf{factors}\index{factor} of the normal series.
The series is called
\begin{compactenum}[1)]
\item
a \textbf{composition series,}\index{composition series}\index{series!composition ---} if the factors are simple;
\item
a \textbf{soluble series,}\index{soluble series}\index{soluble!---
series}\index{series!soluble ---} if the factors are abelian.
\end{compactenum}
\begin{theorem}
A group is soluble if and only if it has a soluble series.
\end{theorem}
\begin{proof}
If $\dsubgp nG=\trivgp$,
then $(\dsubgp0G,\dots,\dsubgp nG)$ is a soluble series for $G$,
by Theorem~\ref{thm:G'}.
Suppose conversely $(G_0,\dots,G_n)$ is a soluble series for $G$.
Again by Theorem~\ref{thm:G'}, we have $G_k{}'\subgp G_{k+1}$ for each $k$ in $n$.
Since also $H\subgp K$ implies $H'\subgp K'$, we have
\begin{gather*}
G'\subgp G_1,\\
G''\subgp G_1{}'\subgp G_2,\\
\makebox[4.5cm]{\dotfill},\\
\dsubgp nG\subgp \dsubgp{n-1}{G_1}\subgp \dots\subgp G_{n-1}{}'\subgp G_n=\trivgp.\qedhere
\end{gather*}
\end{proof}
Since not every finite group is soluble,
not every finite group has a soluble series.
However:
\begin{theorem}\label{thm:comp}
Every finite group has a composition series.
\end{theorem}
\begin{proof}
Trivially $(\trivgp)$ is a composition series.
Every nontrivial finite group $G$ has at least one proper normal subgroup,
namely $\trivgp$.
Being finite, $G$ has only finitely many normal subgroups.
Therefore $G$ has a maximal proper normal subgroup, $G^*$
(which need not be unique).
Then $G/G^*$ is simple, by Theorem~\ref{thm:GNKN} (p.\ \pageref{thm:GNKN}):
every normal subgroup of $G/G^*$ is $K/G^*$
for some normal subgroup $K$ of $G$ such that $G^*\subgp K$,
and therefore $K$ is either $G^*$ or $G$,
so $K/G^*$ is either $\trivgp$ or $G/G^*$.
Now let $G_0=G$, and let $G_{k+1}=G_k{}^*$ unless $G_k=\trivgp$.
Since $G$ is finite and $G_k\psupgp G_{k+1}$,
we must have $G_n=\trivgp$ for some $n$.
Then $(G_0,\dots,G_n)$ is the desired composition series.
\end{proof}
Two normal series are \textbf{equivalent}
if they have the same \emph{multiset}\label{multiset} of
(isomorphism classes of) nontrivial factors.
A \textbf{multiset} is a set in which repetitions of members are allowed.
For a formal definition,
we can say a multiset is a pair $(A,f)$,
where $A$ is a set and $f\colon A\to\N$.
For example, the two series
\begin{align*}
&(\Zmod{60},\gpgen2,\gpgen6,\gpgen{12},\trivgp),&
(\Zmod{60},\gpgen3,\gpgen{15},\gpgen{15},\gpgen{30},\trivgp)
\end{align*}
are equivalent, because the factors of the first
are isomorphic to $\Zmod2$, $\Zmod3$, $\Zmod2$, and $\Zmod5$ respectively,
and the factors of the second
are isomorphic $\Zmod3$, $\Zmod5$, $\trivgp$, $\Zmod2$, and $\Zmod2$ respectively,
so each series has the same multiset of factors, namely
\begin{equation*}
\{\Zmod2,\Zmod2,\Zmod3,\Zmod5\}.
\end{equation*}
These series are \emph{not} equivalent to $(\Zmod{30},\gpgen2,\gpgen6,\trivgp)$,
whose factors are $\Zmod2$, $\Zmod3$, and $\Zmod5$.
If, from a normal series for a group,
another normal series for the group can be obtained by deleting some terms,
then the former series is a \textbf{refinement}\index{refinement} of the latter.
For example,
the series $(\Zmod{60},\gpgen2,\gpgen4,\gpgen{12},\trivgp)$
is a refinement of $(\Zmod{60},\gpgen4,\gpgen{12},\trivgp)$.
Every normal series is a refinement of a normal series with no trivial factors,
and these two series are equivalent.
Among normal series with no trivial factors,
composition series are \emph{maximal} in that they have no proper refinements.
If
\begin{gather*}
G=G_0(0)\nsupgp G_0(1)\nsupgp G_0(2)\nsupgp\dotsb\nsupgp G_0(n_0)=\trivgp,\\
G=G_1(0)\nsupgp G_1(1)\nsupgp G_1(2)\nsupgp\dotsb\nsupgp G_1(n_1)=\trivgp,
\end{gather*}
and the two normal series are equivalent
and have no trivial factors,
this means $n_0=n_1$, and there is $\sigma$ in $\Sym{n_0}$ such that
\begin{equation*}
G_0(i)/G_0(i+1)\cong G_1(\sigma(i))/G_1(\sigma(i)+1)
\end{equation*}
for each $i$ in $n_0$.
\begin{theorem}
A soluble series for a finite group has a refinement
in which the nontrivial factors are cyclic of prime order.
\end{theorem}
We now aim to prove Theorem~\ref{thm:JH} below.
The proof will use the following,
which is known as the Butterfly Lemma,
because the groups that it involves form the commutative diagram
in Figure~\ref{fig:butt}
(in which arrows are inclusions).
\begin{figure}[ht]
\begin{equation*}
\xymatrix{
& H_0 & & H_1 &\\
& N_0H\ar[u] & & N_1H\ar[u] &\\
& &H\ar[ul]\ar[ur]& &\\
&N_0(H_0\cap N_1)\ar[uu]& &N_1(H_1\cap N_0)\ar[uu]&\\
N_0\ar[ur]&&K\ar[ul]\ar[ur]\ar[uu]&&N_1\ar[ul]\\
&H_1\cap N_0\ar[ul]\ar[ur]&&H_0\cap N_1\ar[ul]\ar[ur]&
}
\end{equation*}
\caption{The Butterfly Lemma}\label{fig:butt}
\end{figure}
\begin{lemma}[Zassenhaus]\index{Zassenhaus
Lemma}\index{Butterfly Lemma}\index{theorem!Zassenhaus Lemma}
\index{theorem!Butterfly Lemma}\index{lemma|see{theorem}}
For a group $G$, suppose
\begin{align*}
N_0&\nsubgp H_0\subgp G,&
N_1&\nsubgp H_1\subgp G,
\end{align*}
and let
\begin{align*}
K&=(H_0\cap N_1)(H_1\cap N_0),&
H&=H_0\cap H_1.
\end{align*}
Then
\begin{equation*}
K\nsubgp H,
\end{equation*}
and for each $i$ in $2$,
there is a well-defined epimorphism
\begin{equation*}
nh\mapsto Kh
\end{equation*}
from $N_iH$ to $H/K$ with kernel $N_i(H_i\cap N_{1-i})$.
Hence:
\begin{compactenum}[1)]
\item
$N_i(H_i\cap N_{1-i})\nsubgp N_iH$ for each $i$ in $2$, and
\item
the two groups $N_iH/N_i(H_i\cap N_{1-i})$ are isomorphic to one another.
\end{compactenum}
\end{lemma}
\begin{proof}
For each $i$ in $2$, we have $H_i\cap N_{1-i}\nsubgp H$
by Theorem~\ref{thm:NGHG} (p.\ \pageref{thm:NGHG}).
Hence $K\nsubgp H$.
If $n,n'\in N_0$ and
$h,h'\in H$ and $nh'=n'h$, then
\begin{equation*}
h'h\inv=n\inv n',
\end{equation*}
which is in $N_0\cap H$ and hence in $K$,
so that $Kh=Kh'$.
Thus $nh\mapsto Kh$ (where $n\in N_0$ and $h\in H$)
is indeed a well-defined homomorphism $f$ from
$N_0H$ into $H/K$.
It is clear that $f$ is surjective.
Now let $n\in N_0$ and $h\in H$,
and suppose $nh\in\ker(f)$,
that is,
\begin{equation*}
h\in K.
\end{equation*}
Then $h=n_0n_1$ for some $n_0$ in $H_1\cap N_0$ and $n_1$ in $H_0\cap N_1$.
Hence $nh=nn_0n_1$,
which is in $N_0(H_0\cap N_1)$.
Thus
\begin{equation*}
nh\in N_0(H_0\cap N_1).
\end{equation*}
Conversely, suppose this last condition holds.
Since $h=n\inv nh$, we now have also
\begin{equation*}
h\in N_0(H_0\cap N_1).
\end{equation*}
so $h=n'h'$ for some $n'$ in $N_0$
and some $h'$ in $H_0\cap N_1$.
Then $n'=h(h')\inv$, which is in $H(H_0\cap N_1)$;
but this is a subgroup of $H_1$.
So $n'\in N_0\cap H_1$,
and therefore $n'h'$, which is $h$, is in $K$,
and so $nh\in\ker(f)$.
Thus $\ker(f)=N_0(H_0\cap N_1)$.
\end{proof}
\begin{theorem}[Schreier]\index{Schreier Theorem}
\index{theorem!Schreier Th---}
Any two normal series have equivalent refinements.
\end{theorem}
\begin{proof}
Suppose
\begin{equation*}
G=G_i(0)\nsupgp G_i(1)\nsupgp\dotsb\nsupgp G_i(n_i)=\trivgp,
\end{equation*}
where $i<2$.
In particular then,
\begin{align*}
G_0(j+1)&\nsubgp G_0(j)\subgp G,&
G_1(k+1)&\nsubgp G_1(k)\subgp G.
\end{align*}
Define
\begin{gather*}
G_0(j,k)=G_0(j+1)\cdot\bigl(G_0(j)\cap G_1(k)\bigr),\\
G_1(j,k)=G_1(k+1)\cdot\bigl(G_0(j)\cap G_1(k)\bigr),
\end{gather*}
where $(j,k)\in n_0\times n_1$.
Then by the Butterfly Lemma
\begin{gather*}
G_0(j)=G_0(j,0)\nsupgp\dotsb\nsupgp G_0(j,n_1)=G_0(j+1),\\
G_1(k)=G_1(0,k)\nsupgp\dotsb\nsupgp G_1(n_0,k)=G_1(k+1),
\end{gather*}
giving us normal series that are refinements of the original ones,
and also
\begin{equation*}
G_0(j,k)/G_0(j,k+1)\cong G_1(j,k)/G_1(j+1,k),
\end{equation*}
so that the two refinements are equivalent.
\end{proof}
\begin{theorem}[Jordan--H\"older]\label{thm:JH}\index{Jordan--H\"older
Theorem}\index{theorem!Jordan--H\"older Th---}
Any two composition series of a group are equivalent.
\end{theorem}
\begin{proof}
By Schreier's Theorem, any two composition series of a group have equivalent refinements;
but every refinement of a composition series is already equivalent to that series.
\end{proof}
Combining this with Theorem~\ref{thm:comp},
we have that every finite group determines a multiset
of finite simple groups,
and these are just the nontrivial factors
of any composition series of the group.
Hence arises the interest in the classification of the finite simple groups:
it is like studying the prime numbers.
\part{Rings}
\chapter{Rings}
\section{Rings}\label{sect:nna-rings}
We defined associative rings
in \S\ref{sect:rings} (p.\ \pageref{sect:rings}).
Now we define rings in general.
If $E$ is an abelian group (written additively),
then a \textbf{multiplication}\index{multiplication} on $E$
is a binary operation $\cdot$ that distributes in both senses over addition,
so that
\begin{align*}
x\cdot(y+z)&=x\cdot y+x\cdot z,&
(x+y)\cdot z&=x\cdot z+y\cdot z.
\end{align*}
A \textbf{ring}\index{ring} is an abelian group with
a multiplication.
In particular,
if $(R,1,\cdot)$ is an associative ring,
then $(R,\cdot)$ is a ring.
However, rings that are not (reducts of) associative rings
are also of interest:
see the next section.
\begin{theorem}\label{thm:0x0}
Every ring satisfies the identities
\begin{align*}
(x-y)\cdot z&=x\cdot z-y\cdot z,&
x\cdot(y-z)&=x\cdot y-x\cdot z.
\end{align*}
Hence, in particular,
\begin{gather*}
0\cdot x=0=x\cdot 0,\\
(-x)\cdot y=-(x\cdot y)=x\cdot(-y).
\end{gather*}
\end{theorem}
By Theorem~\ref{thm:exp-in-groups} (p.\ \pageref{thm:exp-in-groups}),
given an abelian group $E$, we have a homomorphism $n\mapsto(x\mapsto nx)$
from the monoid $(\Z,1,\cdot)$ to the monoid $(E^E,\id_E,\circ)$.
This is actually a homomorphism of associative rings:
\begin{theorem}\label{thm:Z-action}
For every abelian group $E$,
\begin{equation*}
n\mapsto (x\mapsto nx)\colon(\Z,0,-,+,1,\cdot)\to(\End E,\id_E,\circ).
\end{equation*}
%the map $n\mapsto (x\mapsto nx)$ is a homomorphism from $(\Z,0,-,+,1,\cdot)$ to $(\End E,\id_E,\circ)$.
In particular,
\begin{align*}
0x&=0,&1x&=x,&(-1)x&=-x.
\end{align*}
\end{theorem}
In the theorem, if the abelian group has a multiplication, then
\begin{equation*}
0\cdot x=0x,
\end{equation*}
where the zeros come from the ring and from $\Z$ respectively.
If, further, the multiplication has the identity $1$, then
\begin{equation*}
1\cdot x=1x.
\end{equation*}
More generally, we have
\begin{theorem}
For every integer $n$, every ring satisfies the identities
\begin{equation*}
(nx)\cdot y=n(x\cdot y)=x\cdot ny.
\end{equation*}
\end{theorem}
The kernel of the homomorphism in Theorem~\ref{thm:Z-action}
is $\gpgen k$ for some $k$ in $\upomega$,
by Theorem~\ref{thm:Z-subg} (p.\ \pageref{thm:Z-subg}).
Then $k$ can then be called the \textbf{characteristic} of $E$.
For example, if $n\in\N$, then $\Zmod n$ has characteristic $n$,
while $\Z$ has characteristic $0$.
\begin{theorem}
If $(E,1,\cdot)$ is a ring with a multiplicative identity $1$,
then
\begin{equation*}
n\mapsto n1\colon(\Z,0,-,+,1,\cdot)\to(E,1,\cdot).
\end{equation*}
The kernel of this homomorphism is $\gpgen k$,
where $k$ is the characteristic of~$E$.
\end{theorem}
\begin{theorem}
Every ring embeds in a ring with identity having the same
characteristic, and in a ring with identity having characteristic $0$.
\end{theorem}
\begin{proof}
Suppose $R$ is a ring of characteristic $n$. Let $A$ be $\Z$ or
$\Zmod n$, and give $A\oplus R$ the multiplication defined by
\begin{equation*}
(m,x)(n,y)=(mn,my+nx+xy);
\end{equation*}
then $(1,0)$ is an identity, and $x\mapsto(0,x)$ is an embedding.
\end{proof}
\section{Examples}\label{sect:non-assoc}
The continuous functions on $\R$ with compact support
compose a ring with respect to the operations induced from $\R$.
Multiplication in this ring is associative,
but there is no identity.
If $n>1$, then $\gpgen n$ is a sub-ring of $\Z$ with no identity.
On p.\ \pageref{Ham} we obtained $\Ham$
as the sub-ring of $\Mat[2\times2]{\C}$
that is the image of $\C\oplus\C$ under the group-homomorphism
\begin{equation*}
(x,y)\mapsto
\begin{pmatrix}
x&y\\-\bar y&\bar x
\end{pmatrix}.
\end{equation*}
We also defined
\begin{equation*}
\mj=
\begin{pmatrix}
0&1\\-1&0
\end{pmatrix},
\end{equation*}
so that every element of $\Ham$ is $z+w\mj$
for some unique $(z,w)$ in $\C^2$.
Then $\Ham$ has the automorphism $z+w\mj\mapsto\overline{z+w\mj}$, where
\begin{equation*}
\overline{z+w\mj}=\bar z-w\mj.
\end{equation*}
then the same construction that creates $\Ham$ out of $\C$
can be applied to $\Ham$ itself,
yielding the ring $\Oct$ of \textbf{octonions;}\index{octonion}
but this ring is not associative.
In any ring $(E,\cdot)$, we define
\begin{equation*}
[x,y]=x\cdot y-y\cdot x;
\end{equation*}
Then the binary operation $(x,y)\mapsto[x,y]$
is also a multiplication on $E$.
This operation can be called the \textbf{Lie bracket.}
We have
\begin{equation}\label{eqn:xx0}
[x,x]=0.
\end{equation}
\begin{theorem}
In an associative ring,
\begin{equation}\label{eqn:xyz}
\bigl[[x,y],z\bigr]=\bigl[x,[y,z]\bigr]-\bigl[y,[x,z]\bigr].
\end{equation}
\end{theorem}
The identity \eqref{eqn:xyz} is called the \textbf{Jacobi identity.}
A \textbf{Lie ring} is a ring whose multiplication
has the properties of the Lie bracket
given by the identities~\eqref{eqn:xx0} and~\eqref{eqn:xyz}.
if $(E,1,\cdot)$ is an associative ring,
and $\bracket$ is the Lie bracket in this ring,
then $(E,\bracket)$ is a Lie ring.
However, we shall see presently
that there are Lie rings that do not arise in this way.
If $(E,\cdot)$ is a ring,
and $D$ is an element of $\End E$ satisfying the \textbf{Leibniz rule}
\begin{equation*}
D(x\cdot y)=Dx\cdot y+x\cdot Dy,
\end{equation*}
then $D$ is called a \textbf{derivation} of $(E,\circ)$.\label{derivation}
For example,
let $\diff$ be the set of all infinitely differentiable functions
from $\R$ to itself.
This is an associative ring in the obvious way.
Then differentiation is a derivation of $\diff$.
\begin{theorem}
The set of derivations of a ring $(E,\cdot)$
is the universe of an abelian subgroup of $\End E$
and is closed under the bracket
\begin{equation*}
(X,Y)\mapsto X\circ Y-Y\circ X.
\end{equation*}
\end{theorem}
The abelian group of derivations of a ring $(E,\cdot)$
can be denoted by
\begin{equation*}
\Der{E,\cdot}.
\end{equation*}
Then $(\Der{E,\cdot},\bracket)$ is a sub-ring of $\End E,\bracket)$,
but is not generally closed under $\circ$.
\section{Associative rings}
We know from Theorem~\ref{thm:units} (p.\ \pageref{thm:units})
that an associative ring $(R,1,\cdot)$ has a group of units, $\units R$.
In particular, in an associative ring,
when an element has both a left and a right inverse,
they are equal.
However, the example on p.\ \pageref{exa:no-unit}
shows that some ring elements can have right inverses that are not units.
A \textbf{zero-divisor}\index{zero-divisor}\index{divisor!zero ---}
of the ring $R$ is a nonzero element $b$
such that the equations
\begin{align*}
bx&=0,&yb&=0
\end{align*}
have nonzero solutions in $R$.
So zero-divisors are not units.
For example, if $m>1$ and $n>1$,
then $m+\gpgen {mn}$ and $n+\gpgen{mn}$ are zero-divisors in $\Zmod {mn}$.
The unique element of the trivial ring $\Zmod 1$ is a unit,
but not a zero-divisor.
A commutative ring is an
\textbf{integral domain}\index{integral domain}\index{domain!integral ---}%
\index{ring|seealso{domain}} if it has no zero-divisors and $1\neq0$.
If $n\in\N$, the ring $\Zmod n$ is an integral domain
if and only if $n$ is prime.%%%%%
\footnote{Lang refers to integral domains
as \emph{entire} rings \cite[p.~91]{Lang-alg}.
It would appear that integral domains
were originally subgroups of $\C$
that are closed under multiplication
\emph{and} that include the integers \cite[p.~47]{Cohn-ANT}.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Hence the characteristic of an integral domain must be prime or $0$.
Fields are integral domains,
but $\Z$ is an integral domain that is not a field.
If $p$ is prime, then,
by Theorem~\ref{thm:p-prime} (p.\ \pageref{thm:p-prime}),
$\Zmod p$ is a field,
and as such it is denoted by
\begin{equation*}
\F_p.
\end{equation*}
An arbitrary associative ring $R$
such that $R\setminus\units R=\{0\}$
is a \textbf{division ring.}\index{division ring}
So fields are division rings;
but $\Ham$ is a non-commutative division ring.
If $R$ is an associative ring, and $G$ is a group,
we can form the direct sum $\sum_{g\in G}R$,
which is, first of all, an abelian group.
It becomes a module over $R$
(in the sense of sub-\S\ref{subsect:mod}, p.\ \pageref{subsect:mod})
when we define
\begin{equation*}
r\cdot(x_g\colon g\in G)=(r\cdot x_g\colon g\in G)
\end{equation*}
for all $r$ in $R$ and $(x_g\colon g\in G)$ in $\sum_{g\in G}R$.
If $g\in G$, we have the canonical injection $\upiota_g$
of $R$ in $\sum_{g\in G}R$ as defined on p.\ \pageref{caninj}.
Let us denote $\upiota_g(1)$ also by
\begin{equation*}
g.
\end{equation*}
Then
\begin{equation*}
(r_g\colon g\in G)=\sum_{g\in G}r_g\cdot g.
\end{equation*}
Thus an element of $\sum_{g\in G}R$
becomes a \textbf{formal $R$-linear combination}\index{linear combination}
of elements of $G$.
Then multiplication on $\sum_{g\in G}R$ is defined in an obvious way:
if $r_i\in R$ and $g_i\in G$ for each $i$ in $2$, then
\begin{equation*}
(r_0\cdot g_0)(r_1\cdot g_1)=r_0r_1\cdot g_0g_1.
\end{equation*}
The definition extends to all of $\sum_{g\in G}R$ by distributivity.
The resulting ring can be denoted by
\begin{equation*}
R(G);
\end{equation*}
it is the \textbf{group ring}\index{group ring} of $G$ over $R$.
We can do the same construction with monoids, rather than groups.
For example, if we start with the free monoid generated by a symbol $X$,
we get a \textbf{polynomial ring}\index{polynomial ring}\label{poly-ring}
in one variable, denoted by
\begin{equation*}
R[X];
\end{equation*}
this is the ring of formal $R$-linear combinations of powers of $X$.
Such combinations can be written as
\begin{equation*}
\sum_{k1$ and is not prime,
so that $n=ab$ for some $a$ and $b$ in $\{2,\dots,n-1\}$,
then $a$ and $b$ are zero-divisors in $\Zmod n$,
so $(n)$ is not prime.
The converse of the corollary fails easily,
since $(0)$ is a prime but non-maximal ideal of $\Z$.
However, every prime ideal of $\Z$ other than $(0)$ is maximal.
This is not the case for $\Q[X,Y]$,
which has the prime but non-maximal ideal $(X)$.
In some rings, \emph{every} prime ideal is maximal.
Such is the case for fields, since their only proper ideals are $(0)$.
It is also the case for \emph{Boolean rings.}
A ring is called \textbf{Boolean}\index{Boolean} if it satisfies the identity
\begin{equation*}
x^2=x.
\end{equation*}
In defining ultraproducts in \S\ref{sect:ultra} (p.\ \pageref{sect:ultra}),
we shall use the example established by the following:
\begin{theorem}\label{thm:Br}
if $\Omega$ is a set,
then $\pow{\Omega}$\label{pow} is a Boolean ring, where
\begin{align*}
X\cdot Y&=X\cap Y,&X+Y&=(X\setminus Y)\cup(Y\setminus X).
\end{align*}
\end{theorem}
\begin{theorem}\label{thm:Br-2}
Every Boolean ring in which $0\neq1$ has characteristic $2$.
\end{theorem}
\begin{proof}
In a Boolean ring, $2x=(2x)^2=4x^2=4x$, so
\begin{equation*}
2x=0. \qedhere
\end{equation*}
\end{proof}
The following will be generalized
by Theorem~\ref{thm:reg-pr-max} (p.\ \pageref{thm:reg-pr-max}).
\begin{theorem}\label{thm:Boole}
In Boolean rings, all prime ideals are maximal.
\end{theorem}
\begin{proof}
In a Boolean ring,
\begin{equation*}
x\cdot(x-1)=x^2-x=x-x=0,
\end{equation*}
so every $x$ is a zero-divisor unless $x$ is $0$ or $1$.
Therefore there are no Boolean integral domains besides $\{0,1\}$,
which is the field $\F_2$.
\end{proof}
In $\Z$, by Theorem~\ref{thm:Z-subg} (p.\ \pageref{thm:Z-subg}),
the ideal $(a,b)$ is the principal ideal generated by $\gcd(a,b)$.
So $a$ and $b$ are relatively prime if and only if $(a,b)=\Z$.
We can write this condition as
\begin{equation*}
(a)+(b)=\Z.
\end{equation*}
Then the following generalizes Theorem~\ref{thm:CRT}
(p.\ \pageref{thm:CRT}).
\begin{theorem}[Chinese Remainder Theorem]\label{thm:CRT-R}\index{Chinese
Remainder Theorem}
\index{theorem!Chinese Remainder Th---}
Suppose $R$ has an indexed family $(I_i\colon ib\geq0$.
(This is not a sufficient condition for being an ideal, however.)
We have a well-defined function $N$
from the set of subgroups of $\Z[\sqrt D]$ to $\N$ given by
\begin{equation*}
N\bigl(G(X)\bigr)=\abs{\det(X)}.
\end{equation*}
In case $D<0$, this new function $N$
agrees with the earlier function called $N$ in the sense that
\begin{multline*}
N\bigl((a+b\sqrt D)\bigr)
=\N\bigl(\gpgen{a+b\sqrt D,bD+a\sqrt D}\bigr)\\
=\abs{a^2-b^2D}
=a^2-b^2D
=N(a+b\sqrt D).
\end{multline*}
If $I$ and $J$ are ideals of $\Z[\sqrt D]$ such that
$I\pincluded J\pincluded\Z[\sqrt D]$,
then we must have
\begin{align*}
N(J)&\divides N(I),&N(I)&>N(J)>1.
\end{align*}
In case $d=-5$, we compute
\begin{gather*}
(2,1+\sqrt{-5})=\gpgen{2,2\sqrt{-5},1+\sqrt{-5},\sqrt{-5}-5}
=\gpgen{2,1+\sqrt{-5}},\\
(3,1\pm\sqrt{-5})=\gpgen{3,3\sqrt{-5},1\pm\sqrt{-5},\sqrt{-5}\mp 5}
=\gpgen{3,1\pm\sqrt{-5}},
\end{gather*}
hence
\begin{equation*}
\begin{array}{c||c|c}
I&(2,1+\sqrt{-5})&(3,1\pm\sqrt{-5})\\\hline
N(I)&2&3
\end{array}.
\end{equation*}
So these ideals are maximal, hence prime.
Ideals of the rings $\Z[\sqrt D]$ were originally called \emph{ideal numbers.}
\section{Integral domains}\label{sect:int-dom}
We now consider some rings that are increasingly close
to having all of the properties of $\Z$.
We start with arbitrary integral domains.
We have noted in effect that the following fails in $\Zmod6$.
\begin{lemma}
In an integral domain, if $a$ and $b$ are non-zero associates, and
\begin{equation*}
a=bx,
\end{equation*}
then $x$ is a unit.
\end{lemma}
\begin{proof}
We have also, for some $y$,
\begin{align*}
b&=ay=bxy,&b\cdot(1-xy)&=0,&1&=xy,
\end{align*}
since $b\neq0$ and we are in an integral domain.
\end{proof}
\begin{theorem}\label{thm:pr-irr}
In an integral domain, prime elements are irreducible.
\end{theorem}
\begin{proof}
If $p$ is prime, and $p=ab$, then $p$ is an associate of $a$ or $b$,
so the other is a unit.
\end{proof}
By this and Euclid's Lemma (p.\ \pageref{thm:Euc-Lem}),
the irreducibles of $\Z$ are precisely the primes.
Recall from p.\ \pageref{multiset} that
a \emph{multiset} is a pair $(A,f)$, where $f\colon A\to\N$.
If $A$ here is a finite subset of a ring, then the product
\begin{equation*}
\prod_{a\in A}a^{f(a)}
\end{equation*}
is well-defined (see p.\ \pageref{unord-prod})
and can be called the \textbf{product of the multiset.}
The components of the proof of the following are found in Euclid,
although Gauss's version \cite[\P16]{Gauss}
seems to be the first formal statement of the theorem \cite[p.~10]{MR568909}.
\begin{theorem}[Fundamental Theorem of Arithmetic]\label{thm:FTA}
Every element of $\N$ has a unique prime factorization.
That is, every natural number is the product of a unique multiset of prime numbers.
\end{theorem}
\begin{proof}
We first show that every integer greater than $1$ has a prime factor:
this is Propositions VII.31--2 of the \emph{Elements.}
Suppose $m>1$, and
let $p$ be the least integer $a$ such that $a\divides m$ and $11$,
and every $m$ such that $1n_1>n_2>\cdots,
\end{equation}
which is absurd in $\N$.
It follows that $n_0$ must have had a prime factorization.
An arbitrary ring will not have an ordering as $\N$ does,
but the relation of divisibility will be an adequate substitute,
at least in a \pid.
Indeed, with the $n_i$ as above, we have
\begin{equation}\label{eqn:(n_0)}
(n_0)\pincluded(n_1)\pincluded(n_2)\pincluded\cdots
\end{equation}
This is a strictly ascending chain of ideals.
A ring is called \textbf{Noetherian}\index{Noetherian ring}
if its every strictly ascending chain of ideals is finite.
\begin{theorem}
Every \pid\ is Noetherian.
\end{theorem}
\begin{proof}
If $I_0\included I_1\included\dotsb$,
then $\bigcup_{i\in\upomega}I_i$ is an ideal $(a)$;
then $a\in I_n$ for some $n$, so the chain cannot grow beyond $I_n$.
\end{proof}
Now we can adapt to an arbitrary \pid\
the foregoing argument that elements of $\N$ have prime factorizations.
In fact that argument can be streamlined.
If $n_0$ has no prime factorization,
then $n_0=m_0\cdot n_1$ for some non-units $m_0$ and $n_1$,
where at least $n_1$ has no prime factorization.
Again we obtain a descending sequence as in~\eqref{eqn:n_0},
hence an ascending sequence as in~\eqref{eqn:(n_0)}.
\begin{theorem}\label{thm:PID->UFD}
Every \pid\ is a \ufd.
\end{theorem}
\begin{proof}
By Theorem~\ref{thm:pid-irr-pr}, irreducibles in a \pid\ are prime,
and therefore irreducible factorizations are unique when they exist.
Indeed, if
\begin{equation*}
\prod_{iQ} (p.\ \pageref{thm:Z->Q}).
A nonempty subset of a ring
is called \textbf{multiplicative}\index{multiplicative}
if it is closed under multiplication.
For example, $\Z\setminus\{0\}$ is a multiplicative subset of $\Z$,
and more generally,
the complement of any prime ideal of any ring is multiplicative.
\begin{lemma}
If $S$ is a multiplicative subset of a ring $R$, then on $R\times S$
there is an equivalence-relation $\sim$ given by
\begin{equation}\label{eqn:q}
(a,b)\sim (c,d)\iff (ad-bc)\cdot e=0\text{ for some $e$ in }S.
\end{equation}
If $R$ is an integral domain and $0\notin S$, then
\begin{equation*}
(a,b)\sim(c,d)\iff ad=bc.
\end{equation*}
\end{lemma}
\begin{proof}
Reflexivity and symmetry are obvious.
For transitivity, note that,
if $(a,b)\sim(c,d)$ and $(c,d)\sim(e,f)$,
so that, for some $g$ and $h$ in $S$,
\begin{align*}
0&=(ad-bc)g=adg-bcg,&0&=(cf-de)h=cfh-deh,
\end{align*}
then
\begin{align*}
(af-be)cdgh
&=afcdgh-becdgh\\
&=adgcfh-bcgdeh
=bcgcfh-bcgcfh=0,
\end{align*}
so $(a,b)\sim(e,f)$.
\end{proof}
In the notation of the lemma,
the equivalence-class of $(a,b)$ is denoted by $a/b$ or
\begin{equation*}
\frac ab,
\end{equation*}
and the quotient $R\times S\modsim$ is denoted by
\begin{equation*}
S\inv R.
\end{equation*}
If $0\in S$, then $S\inv R$ has exactly one element.
An instance where $R$ is not an integral domain
will be considered in the next section (\S\ref{sect:ultra}).
\begin{theorem}\label{thm:loc}
Suppose $R$ is a ring with multiplicative subset $S$.
\begin{compactenum}
\item
In $S\inv R$, if $c\in S$,
\begin{equation*}
\frac ab=\frac{ac}{bc}.
\end{equation*}
\item
$S\inv R$ is a ring
in which the operations are given by
\begin{align*}
\frac ab\cdot\frac cd&=\frac{ac}{bd},&
\frac ab\pm\frac cd&=\frac{ad\pm bc}{bd}.
\end{align*}
\item
There is a ring-homomorphism $\phi$ from $R$ to $S\inv R$ where, for every $a$ in $S$,
\begin{equation*}
\phi(x)=\frac{xa}a.
\end{equation*}
If $1\in S$, then $\phi(x)=x/1$.
\newcounter{local}
\setcounter{local}{\value{enumi}}
\end{compactenum}
Suppose in particular $R$ is an integral
domain and $0\notin S$.
\begin{compactenum}\setcounter{enumi}{\value{local}}
\item
$S\inv R$ is an integral domain, and the homomorphism $\phi$ is an embedding.
\item
If $S=R\setminus\{0\}$, then $S\inv R$ is a field, and if
If $\psi$ is an embedding of $R$ in a field $K$, then there is an embedding $\tilde{\psi}$ of $S\inv R$ in $K$ such that $\tilde{\psi}\circ\phi=\psi$.
\end{compactenum}
\end{theorem}
When $S$ is the complement of a prime ideal $\primei$,\label{mathfrak}
then $S\inv R$ is called
the \textbf{localization}\index{local!---ization} of $R$ at $\primei$
and can be denoted by
\begin{equation*}
R_{\primei}.
\end{equation*}
(See Appendix \ref{app:German}, p.\ \pageref{app:German},
for Fraktur letters like $\mathfrak p$.)
If $R$ is an integral domain, so that $(0)$ is prime,
then localization $R_{(0)}$ (which is a field by the theorem)
is the \textbf{quotient-field}\label{qf}%
\index{quotient!--- field}\index{field!quotient ---}
of $R$.
In this case, the last part of the theorem
describes the quotient field
in terms of a \emph{universal property}
in the sense of p.\ \pageref{up}.
However, it is important to note that,
if $R$ is not an integral domain,
then the homomorphism $x\mapsto x/1$ from $R$ to $R_{\primei}$
might not be an embedding.
The following will be generalized as Theorem~\ref{thm:reg-quot-loc}
(p.\ \pageref{thm:reg-quot-loc} below).
\begin{theorem}\label{thm:Br-quot-loc}
For every Boolean ring $R$,
for every prime ideal $\primei$ of $R$,
the homomorphism $x\mapsto x/1$ from $R$ to $R_{\primei}$
is surjective and has kernel $\primei$.
Thus
\begin{equation*}
\F_2\cong R/\primei\cong R_{\primei}.
\end{equation*}
\end{theorem}
A \textbf{local ring}\index{ring!local ---}\index{local!--- ring}
is a ring with a unique maximal ideal.
The connection between localizations and local rings
is made by the theorem below.
\begin{lemma}
An ideal $\maxi$ of a ring $R$ is a unique maximal ideal of $R$
if and only if
\begin{equation*}
\units R=R\setminus\maxi.
\end{equation*}
\end{lemma}
\begin{theorem}\label{thm:local-ring}
The localization $R_{\primei}$ of a ring $R$ at a prime ideal $\primei$
is a local ring
whose unique maximal ideal is
\begin{equation*}
\primei R_{\primei},
\end{equation*}
namely the ideal generated by the image of $\primei$.
\end{theorem}
\begin{proof}
The ideal $\primei R_{\primei}$ consists of those $a/b$ such that $a\in\primei$.
In this case, if $c/d=a/b$, then $cb=da$, which is in $\primei$,
so $c\in\primei$ since $\primei$ is prime and $b\notin\primei$.
Hence for all $x/y$ in $R_{\primei}$,
\begin{align*}
x/y\notin R_{\primei}\primei
&\iff x\notin\primei\\
&\iff x/y\text{ has an inverse, namely }y/x.
\end{align*}
By the lemma, we are done.
\end{proof}
\section{*Ultraproducts of fields}\label{sect:ultra}
An \emph{ultraproduct} of fields
is the quotient of the direct product of a family of fields
by a maximal ideal.
An algebraic investigation of this construction will involve
maximal ideals, prime ideals, localizations,
and our theorems about them.
First we shall establish the usual tool
by which the very existence of maximal ideals is established:
\subsection{Zorn's Lemma}
On p.\ \ref{thm:rec} we established a Recursion Theorem
for $\N$ as an algebra, and hence for $\upomega$.
Now we establish another such theorem,
for arbitrary ordinals, not just $\upomega$;
but the ordinals are now to be considered
as well-ordered sets,
not algebras.
\begin{theorem}[Transfinite Recursion]
For all sets $A$, for all ordinals $\alpha$,
for all functions $f$
from $\bigcup\{A^{\beta}\colon\beta<\alpha\}$ to $A$,
there is a unique element
\begin{equation*}
(a_{\beta}\colon\beta<\alpha)
\end{equation*}
of $A^{\alpha}$ such that, for all $\beta$ in $\alpha$,
\begin{equation*}
f(a_{\gamma}\colon\gamma<\beta)=a_{\beta}.
\end{equation*}
\end{theorem}
\begin{proof}
We first prove uniqueness.
Suppose, if possible,
$(a'_{\beta}\colon\beta<\alpha)$ is another element of $A^{\alpha}$ as desired,
and let $\beta$ be minimal such that $a_{\beta}\neq a'_{\beta}$.
Then
\begin{equation*}
(a_{\gamma}\colon\gamma<\beta)
=(a'_{\gamma}\colon\gamma<\beta),
\end{equation*}
so by definition $a_{\beta}=a'_{\beta}$.
We now prove existence.
If the theorem fails for some $\alpha$,
let $\alpha$ be minimal such that it fails.
Say $f\colon\bigcup\{A^{\beta}\colon\beta<\alpha\}\to A$.
By hypothesis, for each $\beta$ in $\alpha$,
there is a unique element $(a_{\gamma}\colon\gamma<\beta)$ of $A^{\beta}$
such that, for all $\gamma$ in $\beta$,
\begin{equation*}
f(a_{\delta}\colon\delta<\gamma)=a_{\gamma}.
\end{equation*}
As before, $a_{\gamma}$ is independent
of the choice of $\beta$ such that $\gamma<\beta<\alpha$.
Then for all $\beta$ in $\alpha$ we are free to define
\begin{equation*}
a_{\beta}=f(a_{\gamma}\colon\gamma<\beta).
\end{equation*}
Then the element $(a_{\beta}\colon\beta<\alpha)$ of $A^{\alpha}$
shows that the theorem does not fail for $\alpha$.
\end{proof}
Our proof used the method of the \textbf{minimal counterexample:}
we showed that there could not be such a counterexample.
We now proceed to Zorn's Lemma.
Suppose $\Omega$ is a set and $A\included\pow{\Omega}$.
Then proper inclusion ($\pincluded$)
is a transitive irreflexive relation on $A$ and on each of its subsets
(see Theorems~\ref{thm:1$, then $f'(c)=0$.
If $f'(c)=0$, then $m\cdot 0^{m-1}\cdot g(c)=0$,
so $0^{m-1}=0$ and hence $m>1$.
\end{proof}
If $L$ is a field with subfield $K$,
then a polynomial over $K$ may be irreducible over $K$, but not over $L$.
For example, $X^2+1$ is irreducible over $\Q$, but not over $\Q(\mi)$.
Likewise, the polynomial may have zeros from $L$, but not $K$.
Hence it makes sense to speak of zeros of an irreducible polynomial.
\begin{theorem}
If $f$ is an irreducible polynomial with multiple zeros over a field $K$,
then $K$ has characteristic $p$ for some prime number $p$,
and
\begin{equation*}
f=g(X^p)
\end{equation*}
for some polynomial $g$ over $K$.
\end{theorem}
\begin{proof}
If $f$ has the multiple zero $c$,
then by the lemma $X-c$ is a common factor of $f$ and $f'$.
Since $f$ is irreducible,
itself must be a common factor of $f$ and $f'$,
so $f'$ can only be $0$, since $\deg(f')<\deg(f)$.
Say $f=\sum_{k=0}^na_kX^k$, so
$f'=\sum_{k=0}^{n-1}(k+1)\cdot a_{k+1}X^k$.
If $f'=0$, but $a_{k+1}\neq0$,
then $k+1$ must be $0$ in $K$,
that is, its image under the homomorphism from $\Z$ to $K$ must be $0$.
Then this homomorphism has a kernel $\gpgen p$ for some prime number $p$.
Hence $a_k=0$ whenever $p\ndivides k$,
so $f$ can be written as $\sum_{j=0}^ma_{pj}X^{pj}$,
which is $g(X^p)$, where $g=\sum_{j=0}^ma_{pj}X^j$.
\end{proof}
\subsection{Factorization}
Throughout this subsection, $R$ is a \ufd\ with quotient field $K$.
We know from Theorem~\ref{thm:deg-euc} that $K[X]$ is a Euclidean domain
and therefore a \ufd.
Now we shall show that $R[X]$ too is a \ufd.
It will then follow that each of the polynomial rings $R[X_0,\dots,X_{n-1}]$ is a \ufd.
A polynomial over $R$ is called \textbf{primitive}
if $1$ is a greatest common divisor of its coefficients.
Gauss proved a version of the following
for the case where $R$ is $\Z$ \cite[\P42]{Gauss}.
\begin{lemma}[Gauss]
The product of primitive polynomials over $R$ is primitive.
\end{lemma}
\begin{proof}
Let $f=\sum_{k=0}^ma_kX^k$ and $g=\sum_{k=0}^nb_kX^k$. Then
\begin{equation*}
fg=\sum_{k=0}^{mn}c_kX^k,
\end{equation*}
where
\begin{equation*}
c_k=\sum_{i+j=k}a_ib_j=a_0b_k+a_1b_{k-1}+\dotsb+a_kb_0.
\end{equation*}
Suppose $f$ is primitive, but $fg$ is not,
so the coefficients $c_k$ have a common prime factor $\pi$.
There is some $\ell$ such that $\pi\divides a_i$ when $i<\ell$,
but $\pi\ndivides a_{\ell}$.
Then $\pi$ divides
\begin{equation*}
c_{\ell}-(a_0b_{\ell}+\dots+a_{\ell-1}b_1),
\end{equation*}
which is $a_{\ell}b_0$, so $\pi\divides b_0$.
Hence $\pi$ divides
\begin{equation*}
c_{\ell+1}-(a_0b_{\ell+1}+\dots+a_{\ell-1}b_2)-a_{\ell+1}b_0,
\end{equation*}
which is $a_{\ell}b_1$, so $\pi\divides b_1$, and so on.
Thus $g$ is not primitive.
\end{proof}
\begin{lemma}\label{lem:poly-ass}
Primitive polynomials over $R$ that are associates over $K$
are associates over~$R$.
\end{lemma}
\begin{proof}
Suppose $f$ and $g$ are polynomials that are defined over $R$
and are associates over $K$.
Then $uf=g$ for some $u$ in $\units K$,
and consequently $bu=a$ for some $a$ and $b$ in $R$, so $af=bg$.
If $f$ and $g$ are primitive, then $a$ and $b$ must be associates in $R$,
and therefore $u\in\units R$,
so $f$ and $g$ are associates over $R$.
\end{proof}
\begin{lemma}
Primitive polynomials over $R$ are irreducible over $R$
if and only if they are irreducible over~$K$.
\end{lemma}
\begin{proof}
Suppose $f$ and $g$ are polynomials over $K$
such that the product $fg$ is a primitive polynomial over $R$.
For some $a$ and $b$ in $K$,
the polynomials $af$ and $bg$ have coefficients in $R$
and are primitive over $R$.
By Gauss's Lemma, $abfg$ is primitive.
Since $fg$ is already primitive,
$ab$ must be a unit in $R$.
In particular, $abu=1$ for some $u$ in $\units R$.
Then $af$ and $bug$ are primitive polynomials over $R$
whose product is $fg$.
Now, the units of $K[X]$ are just the polynomials of degree $0$,
that is, the elements of $\units K$.
In particular,
\begin{equation*}
f\in\units{K[X]}\iff af\in\units{K[X]}.
\end{equation*}
The unit \emph{primitive} elements of $R[X]$
are the elements of $\units R$.
Thus
\begin{equation*}
af\in\units{K[X]}\iff af\in\units{R[X]}.
\end{equation*}
Therefore $fg$ is irreducible over $K$ if and only if over $R$.
\end{proof}
Note however that if $f$ is primitive and irreducible over $R$,
and $a$ in $R$ is not a unit or $0$,
then $af$ is still irreducible over $K$
(since $a$ is a unit in $K$) but not over $R$.
\begin{theorem}\label{thm:R[X]-ufd}
$R[X]$ is a \ufd.
\end{theorem}
\begin{proof}
Every nonzero element of $R[X]$ can be written as $af$,
where $a\in R\setminus\{0\}$ and $f$ is primitive.
Then $f$ has a prime factorization over $K$
(since $K[X]$ is a Euclidean domain):
say $f=f_0\dotsm f_{n-1}$.
There are $b_k$ in $K$ such that $b_kf_k$ is a primitive polynomial over $R$.
The product of these is still primitive by Gauss's Lemma,
so the product of the $b_k$ must be a unit in $R$.
We may assume this unit is $1$.
Thus $f$ has an irreducible factorization
\begin{equation*}
(b_0f_0)\dotsm(b_{n-1}f_{n-1})
\end{equation*}
over $R$.
Its uniqueness follows from its uniqueness over $K$
and Lemma~\ref{lem:poly-ass}.
Since $a$ has a unique irreducible factorization,
%$a_0\cdots a_{m-1}$,
we obtain a unique irreducible factorization of $af$.
\end{proof}
We end with a test for irreducibility.
\begin{theorem}[Eisenstein's Criterion]
If $\pi$ is an irreducible element of $R$
and $f$ is a polynomial
\begin{equation*}
a_0+a_1X+\dots+a_nX^n
\end{equation*}
over $R$ such that
\begin{align*}
\pi^2&\ndivides a_0,&
\pi&\divides a_0,&
\pi&\divides a_1,&
&\dots,&
\pi&\divides a_{n-1},&
\pi\ndivides a_n,
\end{align*}
then $f$ is irreducible over $K$ and, if primitive, over $R$.
\end{theorem}
\begin{proof}
Suppose $f=gh$, where
\begin{align*}
g=&\sum_{k=0}^nb_kX^k,&h=&\sum_{k=0}^nc_kX^k,
\end{align*}
all coefficients being from $R$.
We may assume $f$ is primitive, so $g$ and $h$ must be primitive.
We may assume $\pi$ divides $b_0$, but not $c_0$.
Let $\ell$ be such that $\pi\divides b_k$ when $k<\ell$.
If $\ell=n$, then (since $g$ is primitive)
we must have $b_n\neq0$, so $\deg(g)=n$.
In this case $\deg(h)=0$, so $h$ is a unit.
If $\ell>}[d]^f\\
f[K] \ar[r] & H
}
\end{equation*}
\end{lemma}
The next theorem is a refinement of Theorem~\ref{thm:3-cycles} (p.~\pageref{thm:3-cycles}).
\begin{theorem}
$\Alt n$ is generated by the $3$-cycles
$\begin{pmatrix}
0 & 1 & k
\end{pmatrix}$,
where $1UFD}
(p.\ \pageref{thm:PID->UFD}),
that every \ufd\ is a \pid.
\begin{proof}
They do exist, because by the last theorem,
a tree of factorizations can have no infinite branches.
That is, let $N$ be the set of non-zero non-irreducible non-units in some \pid,
and suppose $a\in N$.
Then $a$ is a product $a_{(0)}\cdot a_{(1)}$ of non-units.
If one of these $a_{(i)}$ is in $N$,
it is a product $a_{(i,0)}\cdot a_{(i,1)}$ of non-units.
Otherwise, we let $a_{(i,0)}=a_{(i)}$, but $a_{(i,1)}$ is undefined.
In general, if $a_{(n_0,\dots,n_{k-1})}$ has been defined and is in $N$, we let
\begin{equation*}
a_{(n_0,\dots,n_{k-1})}=a_{(n_0,\dots,n_{k-1},0)}\cdot a_{(n_0,\dots,n_{k-1},1)},
\end{equation*}
where each of the $a_{(n_0,\dots,n_{k-1},i)}$ is a non-unit;
but if $a_{(n_0,\dots,n_{k-1})}$ is irreducible, we let
\begin{equation*}
a_{(n_0,\dots,n_{k-1},0)}=a_{(n_0,\dots,n_{k-1})},
\end{equation*}
while $a_{(n_0,\dots,n_{k-1},1)}$ is undefined.
Strictly we need to use the Axiom of Choice here:
assuming we have well-ordered the ring, we let $a_{(n_0,\dots,n_{k-1},0)}$ be the \emph{least} possibility that meets our conditions.
Thus we obtain a non-zero non-unit $a_{\sigma}$
for each $\sigma$ in a subset $B$
of the set $\bigcup_{k\in\upomega}2^k$ of finite binary sequences.
Moreover, for each $k$ in $\upomega$,
\begin{equation*}
a=\prod_{\sigma\in 2^k\cap B}a_{\sigma}.
\end{equation*}
We claim that, for some $k$ in $\upomega$, for each $\sigma$ in $2^k\cap B$,
the factor $a_{\sigma}$ is irreducible.
For, let
\begin{equation*}
A=\{\emptyset\}
\cup\bigcup_{k\in\upomega}
\{\sigma\in 2^{k+1}\cap B\colon a_{\sigma\restriction k}\in N\}.
\end{equation*}
Then for all $k$ in $\upomega$, for all $\sigma$ in $2^{k+1}$,
\begin{equation*}
\sigma\in A\implies\sigma\restriction k\in A.
\end{equation*}
If $A$ is finite, then for some $n$ in $\upomega$,
we have $A\included\bigcup_{k**