\documentclass[%
version=last,%
a5paper,
10pt,%
headings=small,%
bibliography=totoc,%
index=totoc,%
twoside,%
reqno,%
cleardoublepage=empty,%
open=any,%
parskip=half,%
draft=true,%
%DIV=classic,%
DIV=12,%
headinclude=false,%
pagesize]
{scrbook}

%\usepackage[notref,notcite]{showkeys}

\usepackage{pstricks}
\renewcommand{\captionformat}{\ }

\usepackage{relsize,cclicenses}
\usepackage{url}
\usepackage{amsmath,amssymb,amsthm,amscd}
\usepackage[mathscr]{euscript}
\usepackage{upgreek}
\usepackage{multicol}
\usepackage{stmaryrd}  % \triangle{left,right}eqslant
\usepackage[matrix,arrow]{xy}
\usepackage{hfoldsty}
\usepackage{verbatim}
\usepackage[neverdecrease]{paralist}
\usepackage{graphicx,rotating} % for the German script picture

%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  Theorems
%
%%%%%%%%%%%%%%%%%%%%%%%%

%\swapnumbers

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem*{porism}{Porism}
\newtheorem*{corollary}{Corollary}

\theoremstyle{definition}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{example}[theorem]{Example}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\usepackage{makeidx}
\makeindex

\newcommand{\zfc}{\mathrm{ZFC}}
\newcommand{\zf}{\mathrm{ZF}}
\newcommand{\lto}{\Rightarrow}
\newcommand{\liff}{\Leftrightarrow}
%\renewcommand{\land}{\mathrel{\&}}


\usepackage{bm}
\newcommand{\tuple}[1]{\bm{#1}}

\newcommand{\included}{\subseteq}      % [the name suggests the meaning here]
\newcommand{\pincluded}{\subset}      % [the name suggests the meaning here]
\renewcommand{\leq}{\leqslant}
\renewcommand{\geq}{\geqslant}
\renewcommand{\emptyset}{\varnothing}
\renewcommand{\setminus}{\smallsetminus}
\renewcommand{\phi}{\varphi}

\newcommand{\stnd}[1]{\mathbb{#1}}
\newcommand{\N}{\stnd{N}}
\newcommand{\Z}{\stnd{Z}}         % integers
\newcommand{\Q}{\stnd{Q}}         % rationals
\newcommand{\Qp}{\stnd{Q}^+}         % positive rationals
\newcommand{\C}{\stnd{C}}         % complex numbers
\newcommand{\R}{\stnd{R}}         % real numbers
\newcommand{\F}{\stnd{F}}         % 
\newcommand{\Ham}{\stnd{H}}         % quaternions 
\newcommand{\Oct}{\stnd{O}}         % octonions
\newcommand{\id}{\operatorname{id}}          % identity-map
\newcommand{\gid}{\operatorname{e}}  % identity of group
\newcommand{\inv}{^{-1}}                % mult. inverse

\newcommand{\Mat}[2][n]{\operatorname M_{#1}(#2)}
\newcommand{\MatR}[1][n]{\Mat[#1]{R}}
\newcommand{\MatZ}[1][n]{\Mat[#1]{\Z}}
\newcommand{\GL}[2][n]{\operatorname{GL}_{#1}(#2)}
\newcommand{\GLZ}[1][n]{\GL[#1]{\Z}}
\newcommand{\GLR}[1][n]{\GL[#1]{R}}
\newcommand{\Kfg}{\mathrm V}     % Klein four group
\newcommand{\quat}{\mathrm Q_8}  % Quaternion group

\newcommand{\str}[1]{\mathfrak{#1}}     % structure
\newcommand{\qsep}{\;}                 % follows a quantified variable
\newcommand{\Forall}[1]{\forall{#1}\qsep }
\newcommand{\Exists}[1]{\exists{#1}\qsep }
\newcommand{\modsim}{/\mathord{\sim}}  % modulo the eq-ren \sim
\newcommand{\eqc}[1]{[#1]}             % equivalence-class

\newcommand{\divides}{\mathrel{|}}
\newcommand{\ndivides}{\mathrel{\nmid}}
\newcommand{\order}[1]{\lvert#1\rvert}
\newcommand{\gpgen}[1]{\langle#1\rangle}% subgroup generated by #1
\newcommand{\unordered}[2]{[#2]^{#1}}  % unordered #1-tuples from #2
\newcommand{\free}[1]{\operatorname{F}(#1)}  % free group on #1
\newcommand{\fggen}{I}  % generating set of a free group
\newcommand{\gprels}{B} % relations
\newcommand{\setactedon}{A}  % set acted on by a group

\newcommand{\setcolon}{\colon}

\newcommand{\subgp}{<}              % subgroup
\newcommand{\nsubgp}{\vartriangleleft}  % normal subgroup
\newcommand{\nsupgp}{\vartriangleright}  % normal supergroup
\newcommand{\psubgp}{\lneqq}

\newcommand{\Ker}[1]{\ker(#1)}
%\DeclareMathOperator{\im}{im}          % image
\newcommand{\im}[1]{\operatorname{im}(#1)}

\newcommand{\congruence}{\equiv}
\newcommand{\siml}{\congruence_{\ell}^H}
\newcommand{\simr}{\congruence_{\mathrm r}^H}

\newcommand{\weakprod}{\sideset{}{^{\mathrm{w}}}\prod}
\newcommand{\textweakprod}{\prod^{\mathrm w}}
\newcommand{\freeprod}{\sideset{}{^*}\prod}
\newcommand{\textfreeprod}{\prod^*}
\newcommand{\gpres}[2]{\gpgen{#1\mid#2}}% group on #1 with rel'ns #2
\newcommand{\centr}[1]{\operatorname{C}(#1)}  % center
\newcommand{\cseries}[2]{\operatorname{C}_{#1}(#2)} % central series
\newcommand{\cseriesplain}[1]{\operatorname{C}_{#1}} % central series
\newcommand{\centralizer}[2]{\operatorname{C}_{#2}(#1)} % centralizer
\newcommand{\normalizer}[2]{\operatorname{N}_{#2}(#1)}
\newcommand{\dsubgp}[2]{#2^{(#1)}}  % n-th derived subgroup of #2, where n=#1.
\newcommand{\tsubgp}[1]{#1_{\mathrm{t}}} % torsion sub-group


\newcommand{\family}[1]{\mathcal{#1}}  % family (of sets)
\newcommand{\class}[1]{\mathbf{#1}}    % class

\newcommand{\unit}[1]{{#1}^{\times}}    % group of units of a ring
\newcommand{\Zmod}[1]{\Z_{#1}}
\newcommand{\Zmodu}[1]{\unit{\Zmod{#1}}}
\DeclareMathOperator{\lcm}{lcm}
\newcommand{\rest}[1]{\restriction{#1}}% restriction of function to #1
\newcommand{\modulo}{\emph{modulo}}

\newcommand{\bracket}{\operatorname b}  % (Lie) bracket

% Concerning permutations:

\newcommand{\sgn}[1]{\operatorname{sgn}(#1)}
\newcommand{\sq}[1]{q_{\sigma}(#1)}  % used to define sgn

\newcommand{\Sym}[1]{\operatorname{Sym}(#1)}
\newcommand{\Alt}[1]{\operatorname{Alt}(#1)}       % alternating group
\newcommand{\Dih}[1]{D_{#1}}       % dihedral group

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\mi}{\mathrm i}
\newcommand{\mj}{\mathrm j}
\newcommand{\mk}{\mathrm k}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%\newcommand{\setimb}[1]{[#1]}   % image of a set, using brackets
\newcommand{\abs}[1]{\left\lvert#1\right\rvert}  % absolute value
\newcommand{\size}[1]{\lvert#1\rvert}  % cardinality

\newcommand{\so}[1]{\operatorname{E}(#1)}
\newcommand{\End}[1]{\operatorname{End}(#1)}
\newcommand{\Aut}[1]{\operatorname{Aut}(#1)}
\newcommand{\Hom}[1]{\operatorname{Hom}(#1)}
\newcommand{\Inn}[1]{\operatorname{Inn}(#1)}
\newcommand{\Der}[1]{\operatorname{Der}(#1)}

%\newcommand{\pid}{\textsc{pid}}
\newcommand{\pid}{PID}
\newcommand{\ufd}{UFD}
\newcommand{\ed}{ED}

\newcommand{\primei}{\mathfrak{p}}      % a prime ideal
\newcommand{\maxi}{\mathfrak{m}}        % a maximal ideal
\newcommand{\supp}[1]{\operatorname{supp}(#1)}
\newcommand{\Supp}[1]{\operatorname{supp}[#1]}
\newcommand{\symdiff}{\mathbin{\triangle}}

%\newcommand{\lang}{\mathcal{L}}        % a language or signature

\newcommand{\pow}[1]{\mathscr{P}(#1)}  % power set
\let\oldsqrt\sqrt
\renewcommand{\sqrt}[2][1]{\oldsqrt{\vphantom{#1}}{#2}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%\renewcommand{\theequation}{\roman{equation}}
%\renewcommand{\theequation}{\fnsymbol{equation}}

\renewcommand{\thepart}{\Roman{part}}

\begin{document}
\title{Groups and Rings}
\author{David Pierce}
\date{\today}
\publishers{Matematik B\"ol\"um\"u\\
Mimar Sinan G\"uzel Sanatlar \"Universitesi\\
\url{dpierce@msgsu.edu.tr}\\
\url{http://mat.msgsu.edu.tr/~dpierce/}}
\uppertitleback{\centering
\emph{Groups and Rings}\\
\mbox{}\\
This work is licensed under the\\
 Creative Commons Attribution--Noncommercial--Share-Alike
License.\\
 To view a copy of this license, visit\\
  \url{http://creativecommons.org/licenses/by-nc-sa/3.0/}\\
\mbox{}\\
\cc \ccby David Pierce \ccnc \ccsa\\
\mbox{}\\
Mathematics Department\\
Mimar Sinan Fine Arts University\\
Istanbul, Turkey\\
\url{http://mat.msgsu.edu.tr/~dpierce/}\\
\url{dpierce@msgsu.edu.tr}
}
%\frontmatter
\maketitle

\chapter*{Preface}

I wrote the first draft of these notes during
a graduate course in algebra 
at METU in Ankara in 2008--9.  
I had taught this course also in 2003--4.  
I revised my notes when teaching the course a third time, in 2009-10.

Section~\ref{sect:N} (p.~\pageref{sect:N}) is based 
on part of a course called Non-Standard Analysis, 
which I gave at the Nesin Mathematics Village, 
\c Sirince, in the summer of 2009.  
I built up Chapter~\ref{ch:N} around this section. 

For the remaining chapters, 
the main reference is Hungerford's \emph{Algebra} \cite{MR600654}.  
This was the suggested text for the course at METU, 
as well as for the algebra course 
that I myself took as a graduate student.

Hungerford is inspired by category theory, 
of which his teacher Saunders Mac Lane was one of the creators.  
(See \S\ref{sect:category}, p.~\pageref{sect:category} below.)  
The spirit of category theory is seen for example 
at the beginning of Hungerford's Chapter I, ``Groups'':
\begin{quote}\relscale{0.9}
There is a basic truth 
that applies not only to groups 
but also to many other algebraic objects 
(for example, rings, modules, vector spaces, fields): 
in order to study effectively an object 
with a given algebraic structure, 
it is necessary to study as well the functions 
that preserve the given algebraic structure 
(such functions are called homomorphisms).
\end{quote}  
Hungerford's term \emph{object} here reflects the usage of category theory.  
Inspired myself by model theory, 
I shall use the term \emph{structure} instead.  
(See \S\ref{sect:structures}, p.~\pageref{sect:structures} below.)  
The objects named here by Hungerford are all structures 
in the sense of model theory, 
although not every object in a category is a structure in this sense.

%\newpage

\tableofcontents

\addchap{Note to the reader}

Every theorem must have a proof.  
Some proofs in the present notes are sketchy, if not missing entirely.  
In such cases, details should be supplied by the reader.  
No theorem here is expected to be taken on faith.
However, for the purposes of an algebra course, 
some proofs are more important than others.
The full development of Chapter~\ref{ch:N} would take a course in itself, 
but is not required for algebra as such.

The material here is taken mainly from Hungerford \cite{MR600654}, but there are various rearrangements and additions.  The back cover of Hungerford's book quotes a review:
\begin{quote}\relscale{0.9}
Hungerford's exposition is clear enough that an average graduate student can read the text on his own and understand most of it.
\end{quote}
I myself aim for logical clarity; but I do not intend for these notes to be a replacement for lectures in a classroom.  Such lectures may amplify some parts, while glossing over others.

\setcounter{chapter}{-1}
\chapter{Mathematical foundations}\label{ch:N}%\label{part:N}
%\setcounter{section}{-1}

%\setchapterpreamble{
The full details of this chapter are not strictly part of an algebra course, but are logically presupposed by the course.  The main purpose of the chapter is to establish the notation whereby
\begin{align*}
\N&=\{1,2,3,\dots\},&
\upomega&=\{0,1,2,\dots\}.
\end{align*} 
%$\N=\{1,2,3,\dots\}$ and $\upomega=\{0,1,2,\dots\}$. 
The elements\footnote{The letter $\upomega$ is not the minuscule English letter called \emph{double u,} but the minuscule Greek \emph{omega,} which is probably in origin a double o.  Obtained with the control sequence \url{\upomega} from the \url{upgreek} package, the $\upomega$ used here is upright, unlike the standard slanted $\omega$ (obtained with \url{\omega}).  The latter $\omega$ might be used as a variable, although it is not so used in these notes.  One could similarly distinguish between the constant $\uppi$ (used for the ratio of the circumference to the diameter of a circle) and the variable $\pi$.} of $\upomega$ are the von-Neumann natural numbers, so that if $n\in\upomega$, then $n=\{0,\dots,n-1\}$.  In particular, $n$ is itself a set with $n$ elements.  When $n=0$, this means $n$ is the empty set.  A cartesian power $A^n$ can be understood as the set of functions from $n$ to $A$.  Then a typical element of $A^n$ can be written as $(a_0,\dots,a_{n-1})$.  Most people write $(a_1,\dots,a_n)$ instead; and when they want an $n$-element set, they use $\{1,\dots,n\}$, which might be denoted by something like $[n]$.  This is a needless complication.
%}

\section{Sets and classes}\label{sect:sets}

A \textbf{collection} is many things, considered as one.  Those many
things are the 
\textbf{members}%
\index{member}
or 
\textbf{elements}%
\index{element}
of the collection.  The members \textbf{compose} the collection, and the collection \textbf{comprises} them.\footnote{Thus the relations named by the verbs \emph{compose} and \emph{comprise} are converses of one another; but native English speakers often confuse these two verbs.}  Each member \textbf{belongs} to the collection, and the collection \textbf{contains} it.  

A 
\textbf{set}%
\index{set}
is a special kind of collection.  The properties of sets are given by
\emph{axioms}; we shall use a version of the Zermelo--Fraenkel Axioms
with the Axiom of Choice \cite{Zermelo-invest}.  The collection of these axioms is denoted by $\zfc$.
In the logical formalism that we shall use for the these axioms, every element of a
set is itself a set.  By definition, two sets are 
\textbf{equal}%
\index{equal}
if they have the same elements.  There is an empty set---a set with no \mbox{members---,} denoted by $\emptyset$.  If $a$ is a set, then there is a set $\{a\}$, with the unique element $a$.  If $b$ is also a set, then there is a set $a\cup b$, whose members are precisely the members of $a$ and the members of $b$.  Thus there are sets $a\cup\{b\}$ and $\{a\}\cup\{b\}$; the latter is usually written as $\{a,b\}$.  If $c$ is another set, we can form the set $\{a,b\}\cup\{c\}$, which we write as $\{a,b,c\}$; and so forth.  This allows us to build up the following infinite sequence:
\begin{align*}
&\emptyset,&
&\{\emptyset\},&
&\bigl\{\emptyset,\{\emptyset\}\bigr\},&
&\Bigl\{\emptyset,\{\emptyset\},\bigl\{\emptyset,\{\emptyset\}\bigr\}\Bigr\},&
&\dots
\end{align*}
By definition, these sets are the natural numbers $0$, $1$, $2$, $3$, \dots

As we shall understand them, the $\zfc$ axioms are written in a certain \emph{logic,} whose symbols are:
\begin{compactenum}[1)]
\item
variables, as $x$, $y$, and $z$;
\item
the symbol $\in$ denoting the membership relation, so that $x\in y$ means $x$ is a member of $y$;
\item
the Boolean connectives of propositional logic: $\lor$ (``or''), $\land$ (``and''), $\lto$ (``implies''), $\liff$ (``if and only if''), and $\lnot$ (``not'');
\item
parentheses or brackets;
\item
quantification symbols $\exists$ (``there exists'') and $\forall$ (``for all'').
\end{compactenum}
We may also introduce constants, as $a$, $b$, and $c$, or $A$, $B$, and $C$, to stand for particular sets.  A variable or a constant is called a \emph{term.}  If $t$ and $u$ are terms, then the expression
\begin{equation*}
t\in u
\end{equation*}
is an \emph{atomic formula.}  From atomic formulas, other formulas are built up \emph{recursively} by use of the symbols above, according to certain rules.  For example, $\lnot\;t\in u$ is the formula saying that $t$ is \emph{not} a member of $u$.  We usually abbreviate this formula by
\begin{equation*}
t\notin u.
\end{equation*}
Now we can write the \textbf{Empty Set Axiom:}
\begin{equation*}
\Exists x\Forall yy\notin x.
\end{equation*}
The expression $\Forall z(z\in x\lto z\in y)$ is the formula saying
that every element of $x$ is an element of $y$.  Another way to say
this is that $x$ is a 
\textbf{subset}%
\index{subset}
of $y$, or that $y$ 
\textbf{includes}%
\index{include}
$x$.  We abbreviate this formula by\footnote{The relation $\included$ of being included is completely different from the relation $\in$ of being contained.  However, many mathematicians confuse these relations in words, using the word \emph{contained} to describe both.}
\begin{equation*}
x\included y.
\end{equation*}
The formula $x\included y\land y\included x$ says that $x$ and $y$ have the same members, so that they are equal by the definition given above; in this case we use the abbreviation
\begin{equation*}
x=y.
\end{equation*}

Some occurrences of a variable in a formula are \emph{bound.}\footnote{The word \emph{bound} here is the past participle of the verb \emph{to bind.}  The unrelated verb \emph{to bound} is also used in mathematics, but its past participle is \emph{bounded.}}  In particular, if $\phi$ is a formula, then so are $\Exists x\phi$ and $\Forall x\phi$, but all occurrences of $x$ in these two formulas are bound.  Occurrences of a variable that are not bound are \emph{free.}  If $\phi$ is a formula in which only $x$ occurs freely, we may write $\phi$ as $\phi(x)$.  If $a$ is a set,  then by replacing every free occurrence of $x$ in $\phi$ with $a$, we obtain the formula $\phi(a)$, which is a \textbf{sentence} because it has no free variables.  This sentence is true or false (depending on which set $a$ is).  If the sentence is true, then $a$ can be said to \emph{satisfy} the formula.  There is a collection of all sets that satisfy $\phi$.  We denote this collection by
\begin{equation*}
\{x\colon\phi(x)\}.
\end{equation*}
Such a collection is called a 
\textbf{class.}%
\index{class}
In particular, it is the class \emph{defined} by the formula $\phi$.

The definition of equality can also be expressed by the following sentences:
\begin{gather}\label{eqn:=}
\Forall x\Forall y\Forall z\bigl(x=y\lto(z\in x\liff z\in y)\bigr),\\\label{eqn:>=}
\Forall x\Forall y\bigl(\Forall z(z\in x\liff z\in y)\lto x=y\bigr).
\end{gather}
%Here we use convention whereby $\phi\lto\psi\lto\chi$ means $\phi\lto(\psi\lto\chi)$.
That equal sets belong to the same sets is the \textbf{Equality Axiom:}
\begin{equation}\label{eqn:=2}
\Forall x\Forall y\Forall z\bigl(x=y\lto(x\in z\liff y\in z)\bigr).
\end{equation}
The meaning of the sentences \eqref{eqn:=} and \eqref{eqn:=2} is that equal sets satisfy the same atomic formulas, be they of the form $x\in a$ or $a\in x$.
It is then a theorem that equal sets satisfy the same formulas in general:
\begin{equation}\label{eqn:=thm}
\Forall x\Forall y\Bigl(x=y\lto\bigl(\phi(x)\liff\phi(y)\bigr)\Bigr).
\end{equation}
The theorem is proved by \emph{induction} on the complexity of formulas.  Such a proof is possible because formulas are defined recursively.  See \S\ref{sect:N} below.

It is more usual to take the sentence \eqref{eqn:=thm} as a logical axiom, of which \eqref{eqn:=} and \eqref{eqn:=2} are special cases; but then \eqref{eqn:>=} is no longer true by definition or by proof, but must be taken as an axiom, which is called the \textbf{Extension Axiom.}  The idea behind the name is that having the same members means having the same \emph{extension.}  

In any case, all of the sentences \eqref{eqn:=}, \eqref{eqn:>=}, \eqref{eqn:=2}, and \eqref{eqn:=thm} end up being true.  They tell us that equal sets are precisely those sets that are logically indistinguishable.
We customarily treat equality as \emph{identity.}  We consider equal sets to be the \emph{same} set.  If $a=b$, we may say simply that $a$ is $b$.  

With this understanding,
we obtain the sequence $0$, $1$, $2$, \dots, described above by starting with the Empty Set Axiom and continuing with the \textbf{Adjunction Axiom:}
\begin{equation*}
\Forall x\Forall y\Exists z\Forall w(w\in z\liff w\in x\lor w=y).
\end{equation*}
In fact this is not one of Zermelo's original axioms of 1908.  It and the Empty Set Axiom have as a consequence
\begin{equation*}
\Forall x\Forall y\Exists z\Forall w(w\in z\liff w=x\lor w=y).
\end{equation*}
This is usually called the \textbf{Pairing Axiom} and is one of Zermelo's original axioms.  More precisely, Zermelo has an \textbf{Elementary Set Axiom,} which consists of the Empty Set Axiom and the Pairing Axiom.\footnote{Zermelo also requires that for every set $a$ there be a set $\{a\}$; but this is a special case of pairing.}

We define two classes to be equal if they have the same members.  Thus if
\begin{equation*}
\Forall x\bigl(\phi(x)\liff\psi(x)\bigr),
\end{equation*}
then the formulas $\phi$ and $\psi$ define equal classes.  Here too we consider equality as identity.

Similarly, since $1/2=2/4$, we consider $1/2$ and $2/4$ to be the same.  In ordinary life they are distinct: $1/2$ is one thing, namely one half, while $2/4$ is two things, namely two quarters.  In mathematics, we ignore this distinction.

We now have that
\emph{every set is a class.}  In particular, every set $a$ is the class $\{x\colon x\in a\}$.

However, \emph{not every class is a set.}  For, the class $\{x\colon x\in x\}$
is not a set.  If it were a set $a$, then $a\in a\liff a\notin a$,
which is a contradiction.  This is the \emph{Russell Paradox} \cite{Russell-letter}.

Every set $a$ has a \textbf{union,} which is the class $\{x\colon\Exists y(x\in y\land y\in a)\}$.  This union is denoted by $\bigcup a$.
The \textbf{Union Axiom} is that this class is a set:
\begin{equation*}
\Forall x\Exists yy=\bigcup x.
\end{equation*}
Note that $a\cup b=\bigcup\{a,b\}$.
The Adjunction Axiom is a consequence of the Union and Pairing Axioms.  We use the Union Axiom when considering unions of chains of structures (as on page \pageref{chains} below).

Suppose $A$ is a set and $\bm C$ is the class $\{x\colon\phi(x)\}$.  Then we can form the class $A\cap\bm C$, which is defined by the formula $x\in A\land\phi(x)$.  The \textbf{Separation Axiom} is that this class is a set.  We may denote this set by $\{x\in A\colon\phi(x)\}$.  Actually Separation is a \emph{scheme} of axioms, one for each singulary formula $\phi$:
\begin{equation*}
\Forall x\Exists y\Forall z\bigl(z\in y\liff z\in x\land\phi(z)\bigr).
\end{equation*}

In most of mathematics, and in particular in these notes, one need not worry about the distinction between sets and classes.  But it is logically important.  It turns out that the objects of interest in mathematics can be understood as sets.  Indeed, we have already defined the natural numbers as sets.  We can talk about sets by means of formulas.  Formulas define classes of sets, as above.  Some of these classes turn out to be sets themselves; but there is no reason to expect all of them to be sets.  Indeed, as we have noted, some of them are not sets.  \emph{Sub-classes} of sets are sets; but some classes are too big to be sets.  The class $\{x\colon x=x\}$ of all sets is not a set, since if it were, then the sub-class $\{x\colon x\notin x\}$ would be a set, and it is not.

Every set $a$ has a \emph{power class,} namely the class $\{x\colon x\included a\}$ of all subsets of $a$.  This class is denoted by $\pow a$.  The \textbf{Power Set Axiom} is that this class is a set:
\begin{equation*}
\Forall x\Exists yy=\pow x.
\end{equation*}
Then $\pow a$ can be called the \textbf{power set} of $a$.
The Power Set Axiom will be of minor importance to us; we shall not actually use it until page~\pageref{pow}.

We shall not use the Axiom of Choice to prove anything.  However, it can be used to show that some objects that we shall study are interesting (p.~\pageref{ac}) or even exist at all (p.~\pageref{ac-up}).

The \textbf{Axiom of Infinity} is that the collection $\{0,1,2,\dots\}$ of natural numbers is a set.  It is not obvious how to formulate this axiom as a sentence of our logic.  One approach is to let $\phi(x)$ be the formula
\begin{equation*}
\Forall y\bigl(0\in x\land(y\in x\lto y\cup\{y\}\in x)\bigr)
\end{equation*}
and to declare that the Axiom of Infinity is the sentence $\Exists x\phi(x)$.
Then by definition
\begin{equation}\label{eqn:upomega-defn}
\upomega=\bigcap\{x\colon\phi(x)\}.
\end{equation}
In general, $\bigcap a$ is the class
\begin{equation*}
\{x\colon\Forall y(y\in a\lto x\in y)\}.
\end{equation*}
This class is \textbf{intersection} of $a$.  If $b\in a$, then $\bigcap a\included b$, and so $\bigcap a$ is a set by the Separation Axiom.  In particular, by the Axiom of Infinity, $\upomega$ is a set.  However, $\bigcap\emptyset$ is the class of all sets.

Our definition of $\upomega$ does not by itself establish that it has the properties we expect of the natural numbers.  We shall do this in \S\ref{sect:omega} (p.~\pageref{sect:omega}).

For the record, we have now named all of the axioms given by Zermelo in 1908:
\begin{inparaenum}[(I)]
\item
Extension,
\item
Elementary Set,
\item
Separation,
\item
Power Set,
\item
Union, and
\item
Choice.
\end{inparaenum}
Zermelo assumes that equality is identity: we have expressed this as the sentence \eqref{eqn:=thm} above.  In fact Zermelo does not use logical formalism as we have.  We prefer to define equality with \eqref{eqn:=} and \eqref{eqn:>=} and then use the Axioms of 
\begin{inparaenum}[(1)]
\item
Empty Set,
\item
Equality,
\item
Adjunction,
\item
Separation,
\item
Union,
\item
Power Set, and
\item
Choice.
\end{inparaenum}
But these two collections of axioms are logically equivalent.  

Apparently Zermelo overlooked on axiom, the \emph{Replacement Axiom,} which was supplied in 1922 by Skolem \cite{Skolem-some-remarks} and by Fraenkel.\footnote{I have not been able to consult Fraenkel's original papers.  According to van Heijenoort \cite[p.~291]{MR1890980}, Lennes also suggested something like the Replacement Axiom at around the same time (1922) as Skolem and Fraenkel; but Cantor had suggested such an axiom in 1899.}  We shall give this axiom in the next section.  

An axiom never needed in ordinary mathematics is the \emph{Foundation Axiom.}  Stated originally by von Neumann \cite{von-Neumann-ax}, it ensures that certain pathological situations, like a set containing itself, are impossible.  It does this by declaring that every nonempty set has an element that is disjoint from it: $\Forall x\Exists y(x\neq\emptyset\lto y\in x\land x\cap y=\emptyset)$.  We shall never use this.

The collection called $\zfc$ is Zermelo's axioms, along with Replacement and Foundation.  If we leave out Choice, we have what is called $\zf$.  But we shall not use these expressions again in these notes.


\section{Functions and relations}\label{sect:f}
%I start with a brief set-theoretic review of \emph{functions.}
If $A$ and $B$ are sets, then we define
\begin{equation*}
A\times B=\{z\colon\Exists x\Exists y(z=(x,y)\land x\in A\land y\in B)\}.
\end{equation*}
This is the \textbf{cartesian
  product}\index{cartesian product}
  of $A$ and $B$.
Here the \textbf{ordered pair}\index{ordered pair} $(x,y)$ is defined so that
\begin{equation*}
(a,b)=(x,y)\liff a=x\land b=y.
\end{equation*}
One definition that accomplishes this is
$(x,y)=\bigl\{\{x\},\{x,y\}\bigr\}$, but we never actually need the precise
definition. 
An \textbf{ordered triple}\index{ordered triple} $(x,y,z)$ can be defined as $\bigl((x,y),z\bigr)$, and
so forth.

A \textbf{function}\index{function} or \textbf{map}\index{map} from $B$ to
$A$ is a subset 
$f$ of $B\times A$ such that, for each $b$ in $B$, there is exactly
one $a$ in $A$ such that $(b,a)\in f$.  Then instead of $(b,a)\in f$,
we write 
\begin{equation}\label{eqn:f}
  f(b)=a.
\end{equation}
I assume the reader is familiar with the \emph{kinds} of functions
from $B$ to $A$:
injective or one-to-one, surjective or onto, and bijective.  If it is not convenient to name a function with a single letter like $f$, we may write the function as $x\mapsto f(x)$, where the expression $f(x)$ would be replaced by some particular expression involving $x$.  As an abbreviation of the statement that $f$ is a function from $B$ to $A$, we may write\footnote{Thus, while the symbol $f$ can be understood as a \emph{noun,} the expression $f\colon B\to A$ is a complete \emph{sentence.}  We may write ``Let $f\colon B\to A$'' to mean ``Let $f$ be a function from $B$ to $A$.''  It would be redundant and even illogical to write ``Let $f\colon B\to A$ be a function from $B$ to $A$''; however, such confusing expressions are common in mathematical writing.}
\begin{equation}\label{eqn:f:B->A}
f\colon B\to A.
\end{equation}
If $C\included B$, the class $\{y\colon\Exists x(x\in C\land y=f(x)\}$ can be written as one of\footnote{The notation $f(C)$ is also used, but the ambiguity is dangerous, at least in set theory as such.}
\begin{align*}
&\{f(x)\colon x\in C\},&
&f[C].
\end{align*}
This class is the \textbf{image} of $C$ under $f$.
Here this class is a sub-class of $A$, and so it is a set by the Separation Axiom.  By the \textbf{Replacement Axiom,} the image of every set under every function is a set.  For example, if we are just given a function $n\mapsto G_n$ on $\upomega$, by Replacement we have that the class $\{G_n\colon n\in\upomega\}$ is a set.

A \textbf{singulary operation}\index{singulary}\footnote{The word
  \textbf{unary}\index{unary} is  
  more common, but less etymologically correct.} on $A$ is a function
from $A$ to itself; a \textbf{binary}\index{binary} operation on $A$
is a function 
from $A\times A$ to $A$.  A \textbf{binary relation} on $A$ is a
subset of $A\times A$; if $R$ is such, and $(a,b)\in R$, we often
write
\begin{equation*}
  a\mathrel Rb.
\end{equation*}
A singulary operation on $A$ is a particular kind of binary
relation on $A$; for such a relation, we already have the
special notation in~\eqref{eqn:f}.  I assume the reader is familiar
with other kinds of binary relations, such as orderings. 

\section{An axiomatic development of the natural numbers}\label{sect:N}

In \S\ref{sect:sets} (p.~\pageref{sect:sets}) we sketched an axiomatic approach to set theory.  Now we start over with an axiomatic approach to the natural numbers alone.  We integrate numbers and sets in the section after this.

For the moment, we forget the definition of $\upomega$.  We forget about starting the natural numbers with $0$.  Children learn to count starting with $1$, not $0$.  Let us understand the natural numbers to compose \emph{some} set called $\N$ that has
\begin{compactenum}[1)]
\item 
a distinguished \textbf{initial element,}\index{initial element} denoted by $1$
and called \textbf{one,}\index{zero} and
\item
a distinguished singulary operation of
\textbf{succession,}\index{succession, successor} namely $n\mapsto n+1$, where $n+1$ is called the \textbf{successor} of $n$. 
\end{compactenum}
I propose to refer to the ordered triple $(\N,1,n\mapsto n+1)$ as an
\emph{iterative structure.}

In general, by an \textbf{iterative structure,}\index{iterative} I mean any set that has a
distinuished element and a distinguished singulary operation.  Here
the underlying set is sometimes called the
\textbf{universe}\index{universe} of the 
structure.  If one wants a simple notational distinction between a
structure and its universe, and the universe is $A$, then the
structure might be denoted by $\str A$.  (Here $\str A$ is
  the Fraktur version of $A$.  See Appendix~\ref{app:German}.)

 The
iterative structure $(\N,1,n\mapsto n+1)$ is
distinguished among iterative structures by satisfying the
following axioms.
\begin{compactenum}
\item\label{ax:0}
$1$ is not a successor: $0\neq n+1$.
\item\label{ax:inj}
Succession is injective: if $m+1=n+1$, then $m=n$.
\item\label{ax:ind}
the structure admits \textbf{proof by induction,}\index{induction} in
the following sense.  Suppose $A$ is a subset of the universe with the following two closure
  properties:
  \begin{compactenum}
  \item 
$1\in A$;
\item
for all $n$, if $n\in A$, then $n+1\in A$.
  \end{compactenum}
Then $A$ must be the whole universe: $A=\N$.
\end{compactenum}

These axioms seem to have been discovered originally by
Dedekind~\cite[II, VI (71), p.~67]{MR0159773}, although they were also
written down by 
Peano~\cite{Peano} and are often known as the \textbf{Peano
  axioms.}\index{Peano} 
  
Suppose $(A,b,f)$ is an iterative structure.  If we successively compute $b$, $f(b)$, $f(f(b))$, $f(f(f(b)))$, and so on, either we always get a new element of $A$ or we don't.  In the latter case, we may eventually come back to $b$.  Otherwise, we reach an element $c$, and later a different element $d$, such that $f(c)=f(d)$.  The second of the Peano Axioms would rule out this possibility; the first would ensure that our computations never returned to $b$.  The last axiom, the \emph{Induction Axiom,} would ensure that every element of $A$ was reached by our computations.  None of the three axioms implies the others, although the Induction Axiom implies that exactly one of the other two axioms holds \cite{MR0120156}.
  
The following theorem will allow us to define all of the usual operations on $\N$.  The theorem is difficult to prove.  Not the least difficulty is seeing that the theorem \emph{needs} to be proved.  However, as we shall note later, the theorem is not just an immediate consequence of induction.  The proof uses all three of the Peano Axioms.

\begin{theorem}[Recursion]
For every iterative structure $(A,b,f)$, there is a unique
\textbf{homomorphism}\index{homomorphism} to this structure from
$(\N,1,n\mapsto n+1)$: 
that is, there is a unique function $h$ from $\N$ to 
$A$ such that
\begin{compactenum}
\item 
$h(1)=b$,
\item
$h(n+1)=f(h(n))$ for all $n$ in $\N$.
\end{compactenum}
\end{theorem}

\begin{proof}
We seek $h$ as a particular subset of $\N\times A$.
Let $B$ be the set whose elements are the subsets $C$ of $\N\times
A$ such that, if $(x,y)\in C$, then either 
\begin{compactenum}
\item 
$(x,y)=(1,b)$ or else
\item $C$ has an element
$(u,v)$ such that $(x,y)=(u+1,f(v))$.
\end{compactenum}
Let $R=\bigcup B$; so $R$ is a subset of $\N\times A$.  We may say $R$
is a \emph{relation} from $\N$ to $A$.  If
$(x,y)\in R$, we may write also 
\begin{equation*}
x\mathrel Ry.  
\end{equation*}
Since ${(1,b)}\in
B$, we have $1\mathrel Rb$.  If $n\mathrel Ry$, then $(n,y)\in C$ for
some $C$ in $B$, but then $C\cup\{(n+1,f(y))\}\in B$ by definition of $B$, so
$(n+1)\mathrel R f(y)$.  Therefore $R$ is the desired function $h$,
provided it is a \emph{function} from $\N$ to $A$.  Proving this has
two stages.
\begin{asparaenum}[1.]
  \item
For all $n$ in $\N$, there is $y$ in $A$ such that $n\mathrel Ry$.
Indeed, let $D$ be the set of such $n$.  Then we have just seen that
$1\in D$, and if $n\in D$, then $n+1\in D$.  By induction, $D=\N$.
\item
For all $n$ in $\N$, if $n\mathrel Ry$ and $n\mathrel Rz$, then $y=z$.
Indeed, let $E$ be the set of such $n$.  Suppose $1\mathrel R y$.  Then
$(1,y)\in C$ for some $C$ in $B$.  Since $1$ is not a successor, we
must have $y=b$, by definition of $B$.  Therefore $1\in E$.  Suppose
$n\in E$, and $(n+1)\mathrel Ry$.  Then $(n+1,y)\in C$ for some $C$ in
$B$.  Again since $1$ is not a successor, we must have
$(n+1,y)=(m+1,f(v))$ for some $(m,v)$ in $C$.  Since succession is
injective, we must have $m=n$.  Since $n\in E$, we know $v$ is
\emph{unique} such that $n\mathrel Rv$.  Since $y=f(v)$, therefore $y$
is unique such that $(n+1)\mathrel Ry$.  Thus $n+1\in E$.  By
induction, $E=\N$.
\end{asparaenum}

So $R$ is the desired function $h$.
Finally, $h$ is unique by induction.
\end{proof}

\begin{corollary}
For every set $A$ with a distinguished element $b$, and for every function
$F$ from $\N\times B$ to $B$, there is a unique function $H$ from $\N$ to
$A$ such that
\begin{compactenum}
\item 
$H(1)=b$,
\item
$H(n+1)=F(n,H(n))$ for all $n$ in $\N$.
\end{compactenum}
\end{corollary}

\begin{proof}
Let $h$ be the unique homomorphism from $(\N,1,n\mapsto n+1)$ to
$(\N\times A,(1,b),f)$, where $f$ is the operation
$(n,x)\mapsto(n+1,F(n,x)))$.  In particular, $h(n)$ is always an
ordered pair.  By induction, the 
first entry of $h(n)$ is always $n$; so there is a function $H$ from
$\N$ to $A$ such that $h(n)=(n,H(n))$.  Then $H$ is as desired.  By
induction, $H$ is unique.
\end{proof}

We can now use recursion to \emph{define} on $\N$ 
\begin{compactenum}[1)]
  \item
the binary operation $(x,y)\mapsto x+y$
of \textbf{addition,}\index{addition} and 
\item
the binary operation
$(x,y)\mapsto x\cdot y$
of \textbf{multiplication.}\index{multiplication}  (We often write $xy$ for $x\cdot y$.) 
\end{compactenum}
The definitions are:
\begin{align*}
&  \begin{gathered}
n+1=n+1,\\
n\cdot1=n,    
  \end{gathered}&
&  \begin{gathered}
    n+(m+1)=(n+m)+1,\\
n\cdot(m+1)=n\cdot m+n.
  \end{gathered}
\end{align*}

\begin{lemma}\label{lem:+}
For all $n$ and $m$ in $\N$,
\begin{align*}
  1+n&=n+1,&(m+1)+n&=(m+n)+1.
\end{align*}
\end{lemma}

\begin{proof}
  Induction.
\end{proof}

\begin{theorem}\label{thm:N-comm}
Addition on $\N$ is
  \begin{compactenum}[1)]
  \item 
\textbf{commutative:}\index{commutative} $n+m=m+n$; and
\item
\textbf{associative:}\index{associative} $n+(m+k)=(n+m)+k$.
  \end{compactenum}
\end{theorem}

\begin{proof}
  Induction and the lemma.
\end{proof}

\begin{theorem}\label{thm:cancel}
  Addition on $\N$ allows \textbf{cancellation:}\index{cancellation}
if $n+x=n+y$, then $x=y$.
\end{theorem}

\begin{proof}
  Induction, and injectivity of succession.
\end{proof}

\begin{lemma}\label{lem:.}
For all $n$ and $m$ in $\N$,
\begin{align*}
  1\cdot n&=n,&(m+1)\cdot n&=m\cdot n+n.
\end{align*}
\end{lemma}

\begin{proof}
  Induction.
\end{proof}

\begin{theorem}\label{thm:mult-comm}
Multiplication on $\N$ is
  \begin{compactenum}[1)]
  \item 
commutative: $nm=mn$;
\item
\textbf{distributive}\index{distributive} over addition: $n(m+k)=nm+nk$; and
\item
associative: $n(mk)=(nm)k$.
  \end{compactenum}
\end{theorem}

\begin{proof}
  Induction and the lemma.
\end{proof}

Landau \cite{MR12:397m} proves \emph{using induction alone} that $+$
and $\cdot$ exist 
as given by the recursive definitions above.  However,
Theorem~\ref{thm:cancel} needs more than induction.  Also, the
existence of \textbf{exponentiation,}\index{exponentiation} as an
operation $(x,y)\mapsto 
x^y$ such that
\begin{align*}
  n^1&=n,& n^{m+1}&=n^m\cdot n,
\end{align*}
requires more than induction.

The usual ordering $<$ of $\N$ is defined recursively as follows.
First note that $m\leq n$ means simply $m<n$ or $m=n$.  Then the
definition of $<$ is:
\begin{compactenum}[1)]
\item 
$m\not<1$;% for \emph{no} $m$ in $\N$;
\item
$m<n+1$ if and only if $m\leq n$.
\end{compactenum}
In particular, $n<n+1$.
Really, it is the sets $\{x\in\N\colon x<n\}$ that are defined by
recursion:
\begin{compactenum}[1)]
\item 
$\{x\in\N\colon x<1\}=\emptyset$;
\item
$\{x\in\N\colon x<n+1\}=\{x\in\N\colon x<n\}\cup\{n\}$.
\end{compactenum}
We now have $<$ as a binary relation on $\N$;
we must \emph{prove} that it is an ordering.

\begin{theorem}\label{thm:<trans}
  The relation $<$ is \textbf{transitive}\index{transitive} on $\N$,
  that is, if $k<m$ 
  and $m<n$, then $k<n$.
\end{theorem}

\begin{proof}
  Induction on $n$.
\end{proof}

\begin{lemma}
  $m\neq m+1$.
\end{lemma}

\begin{proof}
  The claim is true when $m=1$, since $1$ is not a successor.  Suppose
  the claim is true when $m=k$, that is, $k\neq k+1$.  Then $k+1\neq
  (k+1)+1$, by injectivity of succession, so the claim is true when
  $m=k+1$.  By induction, the claim is true for all $m$.
\end{proof}

\begin{theorem}\label{thm:<irr}
  The relation $<$ is \textbf{irreflexive}\index{irreflexive} on $\N$:
  $m\not<m$. 
\end{theorem}

\begin{proof}
  The claim is true when $m=1$, since $m\not<1$ by definition.
  Suppose the claim \emph{fails} when $m=k+1$.  This means $k+1<k+1$.
  Therefore $k+1\leq k$ by definition.  By the previous lemma,
  $k+1<k$.  But $k\leq k$, so $k<k+1$ by definition.  So $k<k+1$ and
  $k+1<k$; hence $k<k$ by
  Theorem~\ref{thm:<trans}, that is, the claim fails when $m=k$.  By
  induction, the claim holds for all $m$. 
\end{proof}

\begin{lemma}
  $1\leq m$.
\end{lemma}

\begin{proof}
  Induction.
\end{proof}

\begin{lemma}
If $k<m$, then $k+1\leq m$.
\end{lemma}

\begin{proof}
The claim is vacuously true when $m=1$.  Suppose it is true when
$m=n$.  Say $k<n+1$.  Then $k\leq n$.  If $k=n$, then
$k+1=n+1<(n+1)+1$.  If $k<n$, then $k+1<n+1$ by inductive hypothesis,
so $k+1<(n+1)+1$ by transitivity.  Thus the claim holds when $m=n+1$.
By induction, the claim holds for all $m$.\qedhere
\end{proof}

\begin{theorem}\label{thm:<tot}
  The relation $\leq$ is \textbf{total}\index{total} on $\N$: either
  $k\leq m$ or 
  $m\leq k$.
\end{theorem}

\begin{proof}
  Induction and the two lemmas.
\end{proof}

Because of Theorems~\ref{thm:<trans},~\ref{thm:<irr},
and~\ref{thm:<tot}, the set $\N$ is \textbf{(strictly)
  ordered}\index{order}\index{strict} by the relation $<$. 

\begin{theorem}\label{thm:m+x=n}
  For all $m$ and $n$ in $\N$, we have $m<n$ if and only if the
  equation
  \begin{equation}\label{eqn:m+x=n}
    m+x=n
  \end{equation}
is soluble in $\N$.
\end{theorem}

\begin{proof}
  By induction on $k$, if $m+k=n$, then $m<n$.  We prove the converse by induction on $n$.  We never have $m<1$.  Suppose for some $r$ that, for all $m$, if $m<r$, then the equation $m+x=r$ is soluble.  Suppose also $m<r+1$.  Then $m<r$ or $m=r$.  In the former case, by inductive hypothesis, the equation $m+x=r$ has a solution $k$, and therefore $m+(k+1)=r+1$.  If $m=r$, then $m+1=r+1$.  Thus the equation $m+x=r+1$ is soluble whenever $m<r+1$.
By
induction, for all $n$ in $\N$, if $m<n$, then~\eqref{eqn:m+x=n}
is soluble in $\N$. 
\end{proof}

\begin{theorem}\label{thm:N<}
If $k<\ell$, then
\begin{align*}
  k+m&<\ell+m,&
km&<\ell m.
\end{align*}
\end{theorem}

Here the first conclusion is a refinement of Theorem~\ref{thm:cancel}; the second yields the following analogue of Theorem~\ref{thm:cancel} for multiplication.

\begin{corollary}
  If $km=\ell m$, then $k=\ell$.
\end{corollary}

\begin{theorem}\label{thm:wo}
  $\N$ is \textbf{well ordered}\index{well ordered} by $<$: every
  nonempty set of natural 
  numbers has a least element.
\end{theorem}

\begin{proof}
  Suppose $A$ is a set of natural numbers with no least element.  Let
  $B$ be the set of natural numbers $n$ such that, if $m\leq n$, then
  $m\notin A$.  Then
  $1\in B$, by the last lemma, since otherwise $1$ would be the least
  element of $A$.  Suppose $m\in B$.  Then $m+1\in B$, since otherwise
  $m+1$ would be the least element of $A$.  By induction, $B=\N$, so
  $A=\emptyset$. 
\end{proof}

\section{A construction of the natural numbers}\label{sect:omega}

Now we recall the definition \eqref{eqn:upomega-defn} (p.~\pageref{eqn:upomega-defn}) of $\upomega$.  By this definition, $\upomega$ contains $\emptyset$ and is closed under the operation
$x\mapsto x'$, where
\begin{equation*}
  x'=x\cup\{x\}.
\end{equation*}
Moreover, $\upomega$ is the \emph{smallest} of the sets with these properties.  (Such sets exist by the Axiom of Infinity.)
Therefore the iterative structure
 $(\upomega,\emptyset,{}')$ admits induction.  We now prove that this structure satisfies the remaining two Peano Axioms.

\begin{lemma}\label{lem:mem-inc}
  On $\upomega$, membership implies inclusion.
\end{lemma}

\begin{proof}
  By induction on $n$, we prove that, for all $k$ in $\upomega$, if $k\in
  n$, then $k\included n$.  The claim is vacuously true when
  $n=\emptyset$.  Suppose it is true when $n=m$.  If $k\in m'$, then
  either $k\in m$ or else $k=m$.  In the former case, by inductive
  hypothesis, $k\included m\included m'$; in the latter case,
  $k=m\included m'$.  Thus the claim is true when $n=m'$.  By
  induction, the claim is true for all $n$ in $\upomega$.
\end{proof}

\begin{lemma}\label{lem:p}
  In $\upomega$, if $k\pincluded n$, then $k'\included n$.
\end{lemma}

\begin{proof}
  The claim is vacuously true when $n=\emptyset$.  Suppose it is true
  when $n=m$.  Say $k\pincluded m'$.  If $k\included m$, then either
  $k\pincluded m$, in which case the inductive hypothesis implies,
  giving us $k'\included m\included m'$,---or else $k=m$, so that
  $k'=m'$.
If $k\not\included m$, then $m\in k$, so by Lemma~\ref{lem:mem-inc} we
have $m\included k\pincluded
m'=m\cup\{m\}$, and therefore $m=k$, so again $k'=m'$.  Thus the claim
is true when $n=m'$.  Therefore the claim holds for all $n$ in $\upomega$.
\end{proof}

\begin{lemma}\label{lem:<}
  Inclusion is a total ordering of $\upomega$.
\end{lemma}

\begin{proof}
  We have to show on $\upomega$ that, if $k\not\included n$, then
  $n\included k$.  The claim is trivially true when $n=\emptyset$.
  Suppose it is true when $n=m$.  If $k\not\included m'$, then
  $k\not\included m$, so $m\included k$, but $m\neq k$, so
  $m\pincluded k$, and therefore $m'\included k$ by Lemma~\ref{lem:p}.
\end{proof}

\begin{lemma}\label{lem:distinct}
Elements of $\upomega$ are distinct from their successors.  
\end{lemma}

\begin{proof}
We prove that no element of $\upomega$ has an element that is equal to its
successor.  This is trivially true for the empty set.  Suppose it is
true for $m$.  If $k\in m'$, then either $k\in m$, or else $k=m$.  In
the former case, by inductive hypothesis, $k\neq k'$.  In the latter
case, if $k=k'$, then $m=k\cup\{k\}$, and in particular $k\in m$,
contary to inductive hypothesis.  Therefore no element of $m'$ is
equal to its successor.  This completes the induction.  Since every
element of $\upomega$ is an element of its successor, which is in $\upomega$,
no element of $\upomega$ is equal to its successor.
\end{proof}

\begin{theorem}
The iterative structure $(\upomega,\emptyset,{}')$ satisfies the Peano
Axioms.
\end{theorem}

\begin{proof}
We have observed that
 $(\upomega,\emptyset,{}')$ admits
induction.  Easily too, $\emptyset$ is not a successor.  By
Lemma~\ref{lem:<}, if $m\neq n$, we may assume $m\pincluded n$.  By
Lemmas~\ref{lem:p} and~\ref{lem:distinct}, we then have $m'\included
n\pincluded n'$.  Thus succession is injective.
\end{proof}

The elements of $\upomega$ are the \textbf{von Neumann natural numbers} \cite{von-Neumann}.
Henceforth we write $0$ for $\emptyset$, then $1$ for $0'$, and $2$
for $1'$, and  so on.  Thus we identify $\N$ with
$\upomega\setminus\{\emptyset\}$, so that
\begin{gather*}
  \upomega=\{0\}\cup\N,\\
\N=\{1,2,3,\dots\},\\
\upomega=\{0,1,2,\dots\}.
\end{gather*}
By the von-Neumann definition, we have  
 \begin{gather*}
   0=\emptyset;\qquad
1=\{0\};\qquad
2=\{0,1\};\qquad
3=\{0,1,2\},\quad\dots
 \end{gather*}
If $n\in\upomega$, then
\begin{equation*}
  n=\{0,\dots,n-1\}.
\end{equation*}
Note that this makes sense even when $n=0$.

\section{Structures}\label{sect:structures}

For us, the point of using the von-Neumann definition of the natural numbers is that, under
this definition, a natural number $n$ is a set with $n$ elements.
Since the set of functions from a set $B$ to a set $A$ can be
denoted by
\begin{equation*}
  A^B,
\end{equation*}
we have, in particular, that $A^n$ is the set of functions from
$\{0,\dots,n-1\}$ into $A$.  We can denote such a function by
$(x_0,\dots,x_{n-1})$; that is, 
\begin{equation*}
A^n=\{(x_0,\dots,x_{n-1})\colon x_i\in A\}.
\end{equation*}
Thus, $A^2$ can be identified with $A\times A$, and $A^1$ with $A$
itself.  There is exactly one function from $0$ to $A$, namely $0$; so
\begin{equation*}
  A^0=\{0\}=1.
\end{equation*}
An $n$-ary \textbf{relation}\index{relation} on $A$ is a subset of $A^n$;
an \textbf{$n$-ary}\index{n-ary@$n$-ary}
\textbf{operation}\index{operation} on $A$ is a function from $A^n$ to
$A$.  Relations and operations that are $2$-ary, $1$-ary, or $0$-ary
can be called 
\textbf{binary,}\index{binary} \textbf{singulary,}\index{singulary}
or \textbf{nullary,}\index{nullary} respectively; after the appropriate
identifications, this agrees with the terminology used in
\S \ref{sect:f}. 
A nullary operation on $A$ can be identified with an element of $A$.  

Generalizing the terminology used at the beginning of \S \ref{sect:N},
we define a \textbf{structure}\index{structure} as a set
together with some distinguished relations and operations on the set;
as before, the set is the \textbf{universe}\index{universe} of the
structure.  Again, if the 
universe is $A$, then
the whole structure might be denoted by $\str A$; if $B$, then $\str
B$.

The \textbf{signature}\index{signature} of a structure comprises a
symbol for each 
distinguished relation and operation of the structure.  For example,
the signature of an ordered field like $\R$ is $\{<,0,1,+,-,\cdot\}$.
If $s$ is a symbol of the signature of $\str A$, then the
corresponding relation or operation on $A$ can, for precision, 
be denoted by $s^{\str A}$.

A \textbf{homomorphism}\index{homomorphism} 
from a structure $\str A$ to a structure $\str
B$ of the same signature is a function $h$ from $A$ to $B$ that
\emph{preserves} the distinguished relations and operations: this
means
\begin{gather}\notag
  h(f^{\str A}(x_0,\dots,x_{n-1}))=f^{\str
    B}(h(x_0),\dots,h(x_{n-1})),\\\label{eqn:hom}
(x_0,\dots,x_{n-1})\in R^{\str A}\lto(h(x_0),\dots,h(x_{n-1}))\in
  R^{\str B},
\end{gather}
for all $n$-ary operation-symbols $f$ and relation-symbols $R$ of the
signature, for all $n$ in $\upomega$.  
To indicate that $h$ is a homomorphism from $\str A$ to $\str B$, we may write
\begin{equation*}
h\colon\str A\to\str B
\end{equation*}
(rather than simply $h\colon A\to B$).
A homomorphism is an
\textbf{embedding}\index{embedding} if it is injective and if the converse
of~\eqref{eqn:hom} also holds.  A surjective embedding is an
\textbf{isomorphism.}\index{isomorphism} 
A \textbf{substructure}\index{substructure} of $\str B$ is a structure $\str A$ of the
same signature such that $A\included B$ and the inclusion of $A$ in
$B$ is an embedding of $\str A$ in $\str B$.
 
%\newpage

\part{Groups}

\chapter{Basic properties of groups and rings}

We define both groups and rings in this chapter.  We define rings (in \S\ref{sect:rings}, p.~\pageref{sect:rings}), because at the beginning of the next chapter (\S\ref{sect:gl}, p.~\pageref{sect:gl}) we shall define certain groups---namely \emph{general linear groups}---in terms of rings.

\section{Symmetry groups}\label{sect:sym}

Given a set $A$, we may refer to a bijection from $A$ to itself as a
\textbf{symmetry}\index{symmetry} or \textbf{permutation}\index{permutation} of $A$.  Let us denote the set
of these symmetries by
\begin{equation*}
  \Sym A.
\end{equation*}
This set can be equipped with:
\begin{compactenum}[1)]
  \item
the element 
%(or nullary operation\footnote{It is a nullary operation on $\Sym A$, but a singulary operation on $A$.}) 
  $\id_{A}$, which is the
  \textbf{identity}\index{identity} on $A$; 
\item
the singulary operation $f\mapsto f\inv$, which is functional \textbf{inversion;}\index{inversion}
\item
the binary operation $(f,g)\mapsto f\circ g$, which is functional \textbf{composition.}\index{composition}
\end{compactenum}
The structure $(\Sym A,\id_A,{}\inv,\circ)$ is the \textbf{complete
  group of symmetries}\index{complete group of symmetries} of $A$.  A
substructure of this can be called 
simply a 
\textbf{group of symmetries}\index{group of symmetries} of $A$.  
  
We may use $\Sym A$ to denote the whole structure $(\Sym A,\id_A,{}\inv,\circ)$.  Then, when we speak of a \textbf{subgroup} of $\Sym A$, we mean a subset that contains the identity and is closed under inversion and composition.

In case $n\in\upomega$, the notation $\mathrm S_n$ is also used
for $\Sym n$.  However, when most people write $\mathrm S_n$, they
probably mean the complete group of symmetries of the set
$\{1,\dots,n\}$.  It does not really matter whether $\{0,\dots,n-1\}$
or $\{1,\dots,n\}$ is used; we just need a set with $n$ elements.  The
size of $\Sym n$ or $\mathrm S_n$ is $n\cdot(n-1)\dotsm
2\cdot 1$, which is denoted by $n!$ and called $n$ \textbf{factorial.} 

We shall consider the groups $\Sym n$ 
at greater length in \S\ref{sect:fin} (p.~\pageref{sect:fin}).  
Meanwhile, it may be worth our while to have a brief look at them now.  
The group $\Sym 0$ has a unique element, 
$\id_0$ (which is itself $\emptyset$ or $0$).
The group $\Sym 1$ has the unique element $\id_1$ (which is $\{(0,0)\}$).  
Suppose $\sigma\in\Sym n$ for some $n$.  Then
\begin{equation*}
  \sigma
=\bigl\{\bigl(0,\sigma(0)\bigr),\dots,\bigl(n-1,\sigma(n-1)\bigr)\bigr\}.
\end{equation*}
Now, there is no particular reason 
to list the entries of an ordered pair horizontally.
Instead of $(x,y)$, we could write
$
\begin{pmatrix}
  x\\y
\end{pmatrix}
$.
Then we have
\begin{equation*}
  \sigma=\left\{
\begin{pmatrix}
  0\\\sigma(0)
\end{pmatrix},
\dots,
\begin{pmatrix}
  n-1\\\sigma(n-1)
\end{pmatrix}
\right\}.
\end{equation*}
Here the parentheses (the round brackets) serve no particular purpose; 
we might as well write simply
\begin{equation*}
  \sigma=\left\{
\begin{matrix}
  0&\dots&n-1\\\sigma(0)&\dots&\sigma(n-1)
\end{matrix}
\right\}.
\end{equation*}
This is a set with $n$ elements, 
and each of those elements is an ordered pair, here written vertically.  
In particular, those $n$ elements can be written in a different order; 
but the entries in a particular element cannot.  
Thus, with this notation, 
the same permutation of $n$ can be written in $n!$ different ways, 
one for each permutation of the columns.

In fact the books that I know of 
replace the braces (the curly brackets) with parentheses, as in
\begin{equation*}
  \begin{pmatrix}
       0  &        1  & \cdots &        n-1\\
\sigma(0) & \sigma(1) & \cdots & \sigma(n-1)
  \end{pmatrix}.
\end{equation*}
However, this notation is potentially misleading, 
because it does not stand for a \emph{matrix} 
such as we shall define in \S\ref{sect:gl} (p.~\pageref{sect:gl}).  
In a matrix, the order of the columns (as well as the rows) matters.  
We could write $\sigma$ as the ordered $n$-tuple 
$\bigl(\sigma(0),\dots,\sigma(n-1)\bigr)$ 
or the $1\times n$ matrix $ 
\begin{pmatrix}
  \sigma(0)&\cdots&\sigma(n-1)
\end{pmatrix}
$; 
but we shall not do this, 
because of the potential confusion with a similar notation, 
to be introduced presently.

In case
\begin{equation*}
\sigma=
\left\{
  \begin{matrix}
    0 & 1 & \cdots & n-2 & n-1\\
    1 & 2 & \cdots & n-1 &   0
  \end{matrix}
\right\},
\end{equation*}
$\sigma$ can be called a \emph{cycle.}\index{cycle}  More
generally, if $2\leq m\leq n$, then the permutation 
\begin{equation*}
\left\{
  \begin{matrix}
    0 & 1 & \cdots & m-2 & m-1 & m & \cdots & n-1\\
    1 & 2 & \cdots & m-1 &   0 & m & \cdots & n-1
  \end{matrix}
\right\}
\end{equation*}
is a cycle too, or more precisely an \emph{$m$-cycle.}  
For the moment, let us refer to this cycle as $\sigma_m$.  
Then for all $k$ in $n$, we have 
\begin{equation*}
\sigma_m(k)=
\begin{cases}
	k+1,&\text{ if }k<m-1,\\
	0,&\text{ if }k=m-1,\\
	k,&\text{ if }m\leq k<n.
\end{cases}
\end{equation*}
In the most general sense, an element $\sigma$ of $\Sym n$ is called an
\textbf{$m$-cycle,} or a cycle of \textbf{length}\index{length} $m$,
if, for some $\tau$ in $\Sym n$, for all $k$ in $n$,
\begin{equation*}
\sigma(\tau(k))=
\begin{cases}
	\tau(k+1),&\text{ if }k<m-1,\\
	\tau(0),&\text{ if }k=m-1,\\
	\tau(k),&\text{ if }m\leq k<n.
\end{cases}
\end{equation*}
In this case
\begin{equation*}
\sigma=  
\left\{\begin{matrix}
\tau(0) &\tau(1) &\cdots &\tau(m-2) &\tau(m-1) &\tau(m) &\cdots &\tau(n-1)\\
\tau(1) &\tau(2) &\cdots &\tau(m-1) &  \tau(0) &\tau(m) &\cdots &\tau(n-1)
  \end{matrix}
\right\}.
\end{equation*}
Then $\sigma(\tau(k))=\tau(\sigma_m(k))$ for all $k$ in $n$, and so
\begin{equation*}
  \sigma=\tau\circ\sigma_m\circ\tau\inv.
\end{equation*}
We can now write $\sigma$ neatly as
\begin{equation*}
  \begin{pmatrix}
    \tau(0) & \dots & \tau(m-1)
  \end{pmatrix}.
\end{equation*}
All this means is 
that $\sigma$ takes each entry $\tau(k)$ to the next entry $\tau(k+1)$, 
except that it takes $\tau(m-1)$ to $\tau(0)$.  
So the expression above should be understood,
not as a matrix, 
but rather as a ring, a circle, indeed a \emph{cycle,}
as in Figure~\ref{fig:6} where $m=6$.
\begin{figure}[ht]
\centering
\psset{unit=7mm}
\begin{pspicture}(-2,-2)(2,2)
\rput(0,2){$\tau(0)$}
\rput(1.73,1){$\tau(1)$}
\rput(1.73,-1){$\tau(2)$}
\rput(0,-2){$\tau(3)$}
\rput(-1.73,-1){$\tau(4)$}
\rput(-1.73,1){$\tau(5)$}
\end{pspicture}
\caption{A cycle.}\label{fig:6}
\end{figure}
In general, 
the circle can be broken and written in one line in $m$ different ways, as
\begin{equation*}
  \begin{pmatrix}
    \tau(i) & \cdots & \tau(m-1) & \tau(0) & \cdots & \tau(i-1)
  \end{pmatrix}
\end{equation*}
for any $i$ in $m$.  

We have defined $m$-cycles when $m>1$.  
However, we can consider the identity $\id_n$ is a $1$-cycle.  
This might be denoted by $(0)$, 
or even by $(i)$ for any $i$ in $m$; 
but I shall use the notation $(\ )$.

Two arbitrary elements $\sigma$ and $\tau$ of
$\Sym n$ are \textbf{disjoint}\index{disjoint} if, for all $k$ in~$n$,
\begin{equation*}
  \sigma(k)\neq k\implies\tau(k)=k.
\end{equation*}
In this case, $\sigma\circ\tau=\tau\circ\sigma$, that is, the two permutations \textbf{commute.}%
\index{commute}
An arbitrary composite of
permutations is also called the 
\textbf{product}%
\index{product}
of the symmetries.
We shall show,\label{prom-prod-cyc} as Theorem~\ref{thm:prod-cyc} (p.~\pageref{thm:prod-cyc}), that every element of
$\Sym n$ is the product of a unique set of disjoint cycles of length $2$ or more.

When $n$ is small, we can just list the elements of $\Sym n$:
\begin{compactdesc}
\item[$\Sym 2$:] $(\ )$, $(0\; 1)$.
\item[$\Sym 3$:] $(\ )$, $(0\;1)$, $(0\;2)$, $(1\;2)$, $(0\;1\;2)$,
$(0\;2\;1)$. 
\item[$\Sym 4$:] $(\ )$, $(0\;1)$, $(0\;2)$, $(0\;3)$, $(1\;2)$, $(1\;3)$,
$(2\;3)$, $(0\;1\;2)$, $(0\;1\;3)$, $(0\;2\;3)$, $(1\;2\;3)$,
$(0\;1)(2\;3)$, $(0\;2)(1\;3)$, $(0\;3)(1\;2)$, $(0\;1\;2\;3)$,
$(0\;1\;3\;2)$, $(0\;2\;1\;3)$, $(0\;2\;3\;1)$, $(0\;3\;1\;2)$,
$(0\;3\;2\;1)$.
\end{compactdesc}

%$(\ )$,   $(0\;1)$,       $(0\;2)$,       $(1\;2)$,
%$(0\;1\;2)$, $(0\;2\;1)$, $(0\;3)$, $(0\;1\;3)$,    $(0\;2\;3)$,
%$(0\;3)(1\;2)$, $(0\;1\;2\;3)$, $(0\;2\;1\;3)$, $(1\;3)$,
%$(0\;3\;1)$,    $(0\;2)(1\;3)$, $(1\;2\;3)$, $(0\;3\;1\;2)$,
%$(0\;2\;3\;1)$, $(2\;3)$, $(0\;1)(2\;3)$, $(0\;3\;2)$,
%$(1\;3\;2)$,    $(0\;1\;3\;2)$, $(0\;3\;2\;1)$. 
For larger $n$, one might like to have some principles of organization.  But then the whole study of groups might be understood as a search for such principles (for organizing the elements of a group, or organizing all groups).

If $m\leq n$, there is an embedding $\sigma\mapsto\tilde{\sigma}$ of
the group $\Sym m$ in $\Sym n$, where $\tilde{\sigma}=\sigma\cup\id_{n\setminus
  m}$, so that
\begin{equation*}
  \tilde{\sigma}(k)=
  \begin{cases}
    \sigma(k),&\text{ if } k<m,\\
k,&\text{ if }m\leq k<n.
  \end{cases}
\end{equation*}
Similarly each $\Sym n$ embeds in $\Sym{\upomega}$; but the latter has
many elements that are not in the image of any $\Sym n$.

The main point to observe for now is the following.
  
\begin{theorem}\label{thm:sym}
For all sets $A$, for all elements $f$, $g$, and $h$ of a group of symmetries of $A$,
\begin{gather*}
f\circ\id_A=f,\\
\id_A\circ f=f,\\
f\circ f\inv=\id_A,\\
f\inv\circ f=\id_A,\\
(f\circ g)\circ h=f\circ(g\circ h).	
\end{gather*}
\end{theorem}  

\section{Groups}

A \textbf{group}\index{group} is a structure with the properties of a group of symmetries given by the last theorem, Theorem~\ref{thm:sym}.  That is, a group is a structure $(G,\gid,{}\inv,\cdot)$ in which the following equations are identities (that is, are true for all values of the variables):
\begin{gather*}
x\cdot\gid=x,\\
\gid\cdot x=x,\\
x\cdot x\inv=\gid,\\
x\inv\cdot x=\gid,\\
(x\cdot y)\cdot z=x\cdot(y\cdot z).
\end{gather*}
We may say also that these equations are the \emph{axioms} of groups: this means that their universal generalizations $\Forall xx\cdot\gid=x$ and so forth are true by definition in every group.

The operation $\cdot$ here is usually called \textbf{multiplication,} and we usually write $g\cdot h$ as $gh$.  The element $g\inv$ is the \textbf{inverse} of $g$.  The element $\gid$ is the \textbf{identity;} it is 
sometimes denoted by $1$ rather than $\gid$. 
Every element $g$ of $G$ determines a
singulary operation $\uplambda_g$ on $G$, given by
\begin{equation*}
  \uplambda_g(x)=gx.
\end{equation*}

\begin{theorem}[Cayley]\label{thm:Cay}%
\index{theorem!Cayley's Th---}
For every group $(G,\gid,{}\inv,\cdot)$ and every $g$ in $G$, the function $\uplambda_g$ belongs to $\Sym G$; moreover, the function $x\mapsto\uplambda_x$ embeds $(G,\gid,{}\inv,\cdot)$ in the group $(\Sym G,\id_G,{}\inv,\circ)$ of symmetries. 
\end{theorem}

\begin{proof}
Let $g\in G$.  We first establish $\uplambda_g\in\Sym G$.  We have
\begin{equation*}
\uplambda_{g\inv}(\uplambda_g(x))=g\inv(gx)=(g\inv g)x=\gid{}x=x,
\end{equation*} 
so $\uplambda_{g\inv}\circ\uplambda_g=\id_G$.  Likewise $\uplambda_g\circ\uplambda_{g\inv}=\id_G$.  Thus $\uplambda_g$ is invertible and therefore belongs to $\Sym G$.  Consequently
\begin{equation*}
x\mapsto\uplambda_x\colon G\to\Sym G
\end{equation*}
(recall the notational convention established above on page \pageref{eqn:f:B->A}).
We now check that $x\mapsto\uplambda_x$ is a homomorphism.  By what we
have already shown,
\begin{equation*}
(\uplambda_{g})\inv=\uplambda_{g\inv}.
\end{equation*}
We have also
$\uplambda_{\gid}(x)={\gid}x=x=\id_G(x)$, so 
\begin{equation*}
\uplambda_{\gid}=\id_G,
\end{equation*} 
and
$\uplambda_{gh}(x)=(gh)x=g(hx)=\uplambda_g(\uplambda_h(x))=(\uplambda_g\circ\uplambda_h)(x)$,
so
\begin{equation*}
\uplambda_{gh}=\uplambda_g\circ\uplambda_h.
\end{equation*}
Thus $x\mapsto\uplambda_x$ is indeed a homomorphism from the group $(G,\gid,{}\inv,\cdot)$ to $(\Sym G,\id_G,{}\inv,\circ)$.  It is an embedding, since if $\uplambda_g=\uplambda_h$, then in particular
\begin{equation*}
g=g\gid=\uplambda_g(\gid)=\uplambda_h(\gid)=h\gid=h.\qedhere
\end{equation*}
\end{proof}

%By our strict definition, a group is a structure in the signature $\{\gid,\inv,\cdot\}$ that satisfies certain axioms.


\section{The integers and rationals}\label{sect:Z}

In this section we define \emph{semigroups} and \emph{monoids.}  The structure $(\N,+)$ will be a semigroup, and $(\N,1,\cdot)$ and $(\upomega,0,+)$ will be monoids.  From these, we shall obtain the groups $(\Qp,1,{}\inv,\cdot)$ and $(\Z,0,-,+)$ respectively.  We then obtain the semigroup $(\Qp,+)$, from which we obtain the group $(\Q,0,-,+)$.  Then we shall have the monoid $(\Q,1,\cdot)$.  In fact $(\Q,0,-,+,1,\cdot)$ will be a \emph{ring} and even a \emph{field,} though the official definitions of these terms will come later.

The structure $(\N,1,\cdot)$ cannot be given an operation of inversion so that it becomes a group.  The structure is however a \emph{monoid.}
A \textbf{monoid}%
\index{monoid} is a structure $(M,{\gid},\cdot)$
satisfying the axioms
\begin{gather*}
	x\gid=x\\
	\gid x=x,\\
	(xy)z=x(yz).
\end{gather*}
In particular, if $(G,\gid,\inv,\cdot)$ is a group, then
$(G,\gid,\cdot)$ is a monoid.  

In general terms, the structure
$(G,\gid,\cdot)$ is a \textbf{reduct}%
\index{reduct}
of $(G,\gid,\inv,\cdot)$, and $(G,\gid,\inv,\cdot)$ is an
\textbf{expansion}
\index{expansion}
of $(M,\gid,\cdot)$.  The terms \emph{reduct} and \emph{expansion} imply no change in universe of a structure, but only a change in the signature.

Not every monoid is the reduct of a group: the example of $(\N,1,\cdot)$ shows this.  So does the example of a set $M$ with an element $\gid$ and at least one other element, if we define $xy$ to be $\gid$ for all $x$ and $y$ in $M$.

For another example, given an arbitrary set $A$, let us denote by
$\so A$
the set of functions from $A$ to itself (that is, the set of singulary
operations on $A$).  Then $(\so A,\id_A,\circ)$ is a monoid.  However, if $A$ has at least two elements, then $\so A$ has elements (for example, constant functions) that are not injective and are therefore not invertible.

If
$(M,\gid,\cdot)$ is a monoid, then by the proof of
Theorem~\ref{thm:Cay}, $x\mapsto\uplambda_x$ is a homomorphism from
$(M,\gid,\cdot)$ to $(\so M,\id_M,\circ)$; however, this homomorphism might not be an
embedding.

Even though the monoid $(\N,1,\cdot)$ does not expand to a group, it embeds in another monoid, which expands to a group, by the method of fractions learned in school.  The following theorem gives a special case of ``localization'', which will be worked out in full in \S\ref{sect:loc} (p.~\pageref{sect:loc}):

\begin{theorem}
Let $\approx$ be the binary relation on
$\N\times\N$ given by\footnote{As a binary relation on $\N\times\N$, the relation $\approx$ is a subset of $(\N\times\N)^2$, which we identify with $\N^4$.}
\begin{equation*}
  (a,b)\approx(x,y)\liff ay=bx.
\end{equation*}
Then $\approx$ is an equivalence-relation.  Let the equivalence-class of
$(a,b)$ be denoted by $a/b$, and let the set of such
equivalence-classes be denoted by $\Qp$.  Then $(\Qp,1,\inv,\cdot)$ is a
well-defined group according to the rules
\begin{gather*}
  1=1/1,\\
(x/y)\inv=y/x,\\
(x/y)(z/w)=(xz)/(yw).
\end{gather*}
Moreover, $(\N,1,\cdot)$ embeds in $(\Qp,1,\cdot)$ under the
map $x\mapsto x/1$.  
\end{theorem}

The set $\Qp$ in the theorem comprises the
\textbf{positive rational numbers.}\index{rationals}
The foregoing theorem is false if we replace the monoid $(\N,1,\cdot)$ with the monoid $(\so A,\id_A,\circ)$ for a set $A$ with at least two elements.  But the theorem works for $(\upomega,0,+)$.  In fact, after appropriate modifications, it will work for $(\N,+)$.

The structure $(\N,+)$ is a \emph{semigroup.}  In general, a \textbf{semigroup}\index{semigroup} is a structure $(S,\cdot)$
satisfying the identity
\begin{equation*}
(xy)z=x(yz).
\end{equation*}
If $(M,\gid,\cdot)$ is a monoid, then the reduct $(M,\cdot)$ is a semigroup.  But
not every semigroup is the reduct of a monoid: for example $(\N,+)$ and
$(\upomega,\cdot)$ are not reducts of monoids. 
Or let
$S$ be the set of all operations $f$ on $\so{\upomega}$ such that,
for all $n$ in $\upomega$, $f(n)>n$: then $S$ is closed under
composition, so $(S,\circ)$ is a semigroup; but it has no identity.

\begin{theorem}
Let $\sim$ be the binary relation on
$\N\times\N$ given by
\begin{equation*}
  (a,b)\sim(x,y)\liff a+y=b+x.
\end{equation*}
Then $\sim$ is an equivalence-relation.  Let the equivalence-class of
$(a,b)$ be denoted by $a-b$, and let the set of such
equivalence-classes be denoted by $\Z$.  Then $(\Z,0,-,+)$ is a
well-defined group according to the rules
\begin{gather*}
  0=1-1,\\
-(x-y)=y-x,\\
(x-y)+(z-w)=(x+z)-(y+w).
\end{gather*}
Moreover, $(\N,+)$ embeds in $(\Z,+)$ under the
map $x\mapsto(x+1)-1$.
\end{theorem}

Now we can obtain the set $\Q$ of all rational numbers from $\Qp$,
just as we have obtained $\Z$ from $\N$.  To do this, we need addition on $\Qp$:

\begin{theorem}
The set $\Qp$ is a semigroup with respect to an operation $+$, which can be well defined by
\begin{equation*}
\frac ab+\frac xy=\frac{ay+bx}{by}.
\end{equation*}
Then on $\Qp$,
\begin{equation*}
x(y+z)=xy+xz.
\end{equation*}
\end{theorem}

Now we obtain $\Q$ with its usual addition and multiplication.  The structure $(\Q,0,-,+,1,\cdot)$ is an example of a \emph{ring} (or more precisely associative ring); in fact it is a \emph{field,} and it embeds in the field $(\R,0,-,+,1,\cdot)$ of \emph{real numbers} (see \S\ref{sect:rings}, p.~\pageref{sect:rings}).

\section{Simplifications}\label{sect:simp}

If a semigroup $(G,\cdot)$ expands to a group $(G,\gid,{}\inv,\cdot)$, then often the semigroup $(G,\cdot)$ itself is often called a group.  But this usage must be justified.

\begin{theorem}\label{thm:u}
A semigroup can expand to a group in only one way.
\end{theorem}

\begin{proof}
Let $(G,\gid,\inv,\cdot)$ be a group.
If $\gid'$ were a second identity, then
\begin{align*}
\gid'x&=\gid x,& \gid'xx\inv&=\gid xx\inv,& \gid'&=\gid.
\end{align*}
If $a'$ were a second inverse of $a$, then
\begin{align*}
a'a&=a\inv a,& a'aa\inv&=a\inv aa\inv,&a'&=a\inv.\qedhere
\end{align*}
\end{proof}

Establishing that a particular structure is a group is made easier by the following.

\begin{theorem}\label{thm:left}
Any structure satisfying the identities
\begin{gather*}
	{\gid}x=x,\\
	x\inv x=\gid,\\
	x(yz)=(xy)z
\end{gather*}
is a group.
In other words, 
  any semigroup with a left-identity and with left-inverses is a group.  
\end{theorem}

\begin{proof}
We need to show $x\gid=x$ and $xx\inv=\gid$.  To establish the latter,
using the given identies we have
\begin{equation*}
(xx\inv)(xx\inv)=x(x\inv x)x\inv=x{\gid}x\inv=xx\inv,
\end{equation*}
and so
\begin{equation*}
xx\inv={\gid}xx\inv=(xx\inv)\inv(xx\inv)(xx\inv)=(xx\inv)\inv(xx\inv)={\gid}.
\end{equation*}
Hence also
\begin{equation*}
x{\gid}=x(x\inv x)=(xx\inv)x={\gid}x=x.\qedhere
\end{equation*}
\end{proof}

The theorem has an obvious ``dual'' involving right-identities and right-inverses.  By the theorem, the semigroups that expand to groups are precisely the semigroups that satisfy the axiom
\begin{gather*}
\Exists z(\Forall xzx=x\land\Forall x\Exists y yx=z),
\end{gather*}
which is logically equivalent to
\begin{equation}\label{eqn:sg-ax}
\Exists z\Forall x\Forall y\Exists u(zx=x\land uy=z).
\end{equation}
We shall show that this sentence is more complex than need be.

Thanks to Theorem~\ref{thm:u}, if a semigroup $(G,\cdot)$ does expand to a group, then we may unambiguously refer to $(G,\cdot)$ itself as a group.  Furthermore, we may refer to $G$ as a group: this is commonly done, although, theoretically, it may lead to ambiguity.

\begin{theorem}\label{thm:solutions}
Let $G$ be a nonempty semigroup.  The following are equivalent.
\begin{compactenum}
\item\label{item:exp}
$G$ expands to a group.
%\item\label{item:exp-u}
%$G$ expands uniquely to a group.
\item\label{item:sol}
Each equation $ax=b$ and $ya=b$ with parameters from $G$ has a
solution in $G$.
\item\label{item:sol-u}
Each equation $ax=b$ and $ya=b$ with parameters from $G$ has a
unique solution in $G$.
\end{compactenum}
\end{theorem}

\begin{proof}
Immediately \eqref{item:sol-u}$\lto$\eqref{item:sol}.  Almost as easily, \eqref{item:exp}$\lto$\eqref{item:sol-u}.  For, if $a$ and $b$ belong to some semigroup that expands to a group, we have $ax=b\liff x=a\inv b$; and we know by Theorem~\ref{thm:u} that $a\inv$ is uniquely determined.  Likewise for $ya=b$.

Finally we show \eqref{item:sol}$\lto$\eqref{item:exp}.
Suppose $G$ is a nonempty semigroup in which all equations $ax=b$ and $ya=b$ have solutions.  If $c\in G$, let $\gid$ be
a solution to $yc=c$.  If $b\in G$, let $d$ be a
solution to 
$cx=b$.  Then
\begin{equation*}
  {\gid}b={\gid}(cd)=({\gid}c)d=cd=b.
\end{equation*}
Since $b$ was chosen arbitrarily, $\gid$ is a left identity.  Since the equation $yc={\gid}$ has a solution, $c$ has a left inverse.  But $c$ is an arbitrary element of $G$.  By Theorem~\ref{thm:left}, we are done.
\end{proof}

Now we have that the semigroups that expand to groups 
are just the semigroups that satisfy the axiom
\begin{equation*}
  \Forall x\Forall y\Exists z\Exists w(xz=y\land wx=y).
\end{equation*}
This may not look simpler than \eqref{eqn:sg-ax}, but it is.  
It should be understood as 
\begin{equation*}
\Forall x\Forall y\Exists z\Exists w(xz=y\land wx=y), 
\end{equation*}
which is a sentence of the general form $\forall\exists$;
whereas \eqref{eqn:sg-ax} is of the form $\exists\forall\exists$).  

\begin{theorem}\label{thm:gp-hom}
  A map $f$ from one group to another is a homomorphism, provided it is a
  homomorphism of semigroups, that is, $f(xy)=f(x)f(y)$.
\end{theorem}

\begin{proof}
In a group, if $a$ is an element, then the identity is the unique
solution of $xa=a$, and $a\inv$ is the unique solution of $yaa=a$.  A
semigroup homomorphism $f$ takes solutions of
these equations to solutions of $xb=b$ and $ybb=b$, where $b=f(a)$. 
\end{proof}

\emph{Inclusion} of a substructure in a larger structure is a homomorphism. 
In particular, if $(G,\gid,{}\inv,\cdot)$ and $(H,\gid,{}\inv,\cdot)$ 
are groups, we have
\begin{equation*}
(G,\cdot)\included(H,\cdot)
\implies(G,\gid,{}\inv,\cdot)\included(H,\gid,{}\inv,\cdot).
\end{equation*}

If an arbitrary class of structures is axiomatized 
by $\forall\exists$ sentences, 
then the class is ``closed under unions of chains\label{chains}'' 
in the sense that, 
if $\str A_0\included\str A_1\included\str A_2\included\dotsb$, 
where each $\str A_k$ belongs to the class, 
then the union of all of these structures also belongs to the class.  
In fact the converse is also true, 
by the so-called Chang--\L o\'s--Suszko Theorem \cite{MR0103812,MR0089813}.  
With this theorem, 
and with Theorem~\ref{thm:gp-hom} in place of~\ref{thm:solutions}, 
we can still conclude 
that the theory of groups in the signature $\{\cdot\}$ 
has $\forall\exists$ axioms,
although we may not know what they are.

Theorem~\ref{thm:gp-hom} fails with monoids in place of groups.  
For example, $(\Z,1,\cdot)$ and $(\Z\times\Z,(1,1),\cdot)$ are monoids 
(the latter being the product of the former with itself 
as defined in \S\ref{sect:new}), 
and $x\mapsto(x,0)$ is an embedding 
of the semigroup $(\Z,\cdot)$ in $(\Z\times\Z,\cdot)$, 
but it is not an embedding of the monoids.


\section{Repeated multiplication}%\label{sect:repeat}

In a semigroup, a product $abc$ is unambiguous: whether it is understood as $(ab)c$ or $a(bc)$, the result is the same.  Then $abcd$ is also unambiguous, because $(abc)d$, $(ab)(cd)$, and $a(bcd)$ can be shown to be equal.  We are going to show by induction that every product $a_0\dotsm a_{n-1}$ is unambiguous.  The main point is to establish the homomorphisms in the last three theorems of this section.

Suppose there is a binary operation $\cdot$ on a set $A$.  We do not assume that the operation is associative.  For each $n$ in $\N$, we define a set $P_n$ consisting of certain
$n$-ary operations on $A$.  Our definition is recursive: 
\begin{compactenum}[1)]
	\item 
	$P_1=\{\id_A\}$;
	\item
	$P_{n+1}$ consists of the operations
	\begin{equation*}
(x_0,\dots,x_n)\mapsto f(x_0,\dots,x_{k-1})\cdot g(x_k,\dots,x_n),
\end{equation*}
for every $f$ in $P_k$ and $g$ in $P_{n+1-k}$, for every $k$ in $\N$ such that $k\leq n$.
\end{compactenum}
We now distinguish in each $P_n$ a particular element $f_n$, where
\begin{compactenum}[1)]
	\item 
	$f_1$ is $\id_A$,
	\item
	$f_{n+1}$ is $(x_0,\dots,x_n)\mapsto f_n(x_0,\dots,x_{n-1})\cdot x_n$.
\end{compactenum}
So 
\begin{equation*}
f_n(x_0,\dots,x_{n-1})=(\dotsm(x_0x_1)x_2\dotsm)x_{n-1}.  
\end{equation*}
For example, $f_5$ is $(x,y,z,u,v)\mapsto(((xy)z)u)v$.
But
$P_5$ also contains $(x,y,z,u,v)\mapsto(x(yz))(uv)$.  In
a semigroup, it is easy to show that this operation is the same as
$f_5$.  In general, we have: 

\begin{theorem}
If $A$ is a semigroup, then, in the notation above, $P_n=\{f_n\}$.
\end{theorem}

\begin{proof}
The claim is immediately true when $n=1$.  Suppose it is true when $1\leq n\leq s$.  Each element $g$ of $P_{s+1}$ is therefore
\begin{equation*}
(x_0,\dots,x_s)\mapsto f_n(x_0,\dots,x_{n-1})\cdot f_{s+1-n}(x_n,\dots x_s)
\end{equation*}
for some $n$, where $1\leq n\leq s$.  If $n=s$, then $g$ is $f_{n+1}$.  If $n<s$, then
\begin{align*}
g(x_0,\dots,x_s)
&=f_n(x_0,\dots x_{n-1}\cdot(f_{s-n}(x_n,\dots,x_{s-1})\cdot x_s)\\
&=(f_n(x_0,\dots x_{n-1}\cdot f_{s-n}(x_n,\dots,x_{s-1}))\cdot x_s\\
&=f_s(x_0,\dots,x_{s-1})\cdot x_s\\
&=f_{s+1}(x_0,\dots x_s),
\end{align*}
so again $g$ is $f_{s+1}$.  By induction, the claim is true for all $n$ in $\N$. 
\end{proof}

It follows that, in a semigroup, the product $a_0\dotsm a_{n-1}$ is
unambiguous: it is just $g(a_0,\dots,a_{n-1})$ for any element $g$ of
$P_n$, because that element must be the same as $f_n$.  We may write also
\begin{equation}\label{eqn:gen-prod}
a_0\dotsm a_{n-1}=\prod_{k=0}^{n-1}a_k=\prod_{k\in n}a_k.
\end{equation}


A group or monoid or semigroup is \textbf{abelian}\index{abelian} if it satisfies the identity
\begin{equation*}
  xy=yx.
\end{equation*}
Multiplication on an abelian group is often (though not always) called \textbf{addition}\index{addition} and denoted by $+$; in this case, the identity may be denoted by $0$, and the group is said to be written additively.  This is what we do in the case of $(\upomega,0,+)$, though not $(\N,1,\cdot)$.


In an abelian group, the product in \eqref{eqn:gen-prod} may be written as a sum:
\begin{equation*}
a_0+\cdots+a_{n-1}=\sum_{k=0}^{n-1}a_k=\sum_{k\in n}a_k.
\end{equation*}
We also use the notation
\begin{align*}
	\prod_{k\in n}a&=a^n,&
	\sum_{k\in n}a&=na.						
\end{align*}
The set $\so G$ in the following was defined in \S \ref{sect:simp} (p.~\pageref{sect:simp}).

\begin{theorem}
Suppose $(G,\cdot)$ is a semigroup, and $m$ and $n$ range over~$\N$.
\begin{compactenum}
\item 
On $G$,
\begin{equation*}
x^{m+n}=x^mx^n.
\end{equation*}
That is, if $a\in G$, then
\begin{equation*}
x\mapsto a^x\colon(\N,+)\to(G,\cdot).
\end{equation*}
%$x\mapsto a^x$ is a homomorphism from $(\N,+)$ to $(G,\cdot)$.
\item
On $G$,
\begin{equation*}
x^{mn}=(x^m)^n.
\end{equation*}
That is,
\begin{equation*}
x\mapsto(y\mapsto y^x)\colon(\N,1,\cdot)\to(\so G,\id_A,\circ).
\end{equation*}
%$x\mapsto(y\mapsto y^x)$ is a homomorphism from $(\N,1,\cdot)$ to $(\so G,\id_A,\circ)$. 
\end{compactenum}
\end{theorem}

\begin{proof}
Use induction: $a^{n+1}=a^n\cdot a=a^n\cdot a^1$, and if
$a^{n+m}=a^n\cdot a^m$, then 
\begin{equation*}
a^{n+(m+1)}=a^{(n+m)+1}=a^{n+m}\cdot a=a^na^ma=a^na^{m+1}.
\end{equation*}
Also, $a^{n\cdot 1}=a^n=(a^n)^1$, and if $a^{nm}=(a^n)^m$, then
\begin{equation*}
  a^{n(m+1)}=a^{nm+n}=a^{nm}a^n=(a^n)^ma^n=(a^n)^{m+1}.\qedhere
\end{equation*}
\end{proof}

In a monoid, we define
\begin{equation}\label{eqn:a^0}
a^0=\gid.
\end{equation}
Again, the set $\so G$ in the following was defined in \S \ref{sect:simp}.

\begin{theorem}
Suppose $(G,\gid,\cdot)$ is a monoid.
\begin{compactenum}
\item 
If $a\in G$, then
\begin{equation*}
x\mapsto a^x\colon(\upomega,0,+)\to(G,\gid,\cdot).
\end{equation*}
\item
$x\mapsto(y\mapsto y^x)\colon(\upomega,1,\cdot)\to(\so G,\id_A,\circ)$. 
\end{compactenum}
\end{theorem}

In a group, we define
\begin{equation*}
  a^{-n}=(a^n)\inv.
\end{equation*}

\begin{theorem}\label{thm:exp-in-groups}
Suppose $(G,\gid,{}\inv,\cdot)$ is a group.
\begin{compactenum}
\item 
If $a\in G$, then
\begin{equation*}
x\mapsto a^x\colon(\Z,0,+)\to(G,\gid,\inv,\cdot).
\end{equation*}
\item
$x\mapsto(y\mapsto y^x)\colon(\Z,1,\cdot)\to(\so G,\id_A,\circ)$. 
\end{compactenum}
\end{theorem}

\section{Rings}\label{sect:rings}

A homomorphism from a structure to itself is an
\textbf{endomorphism.}\index{endomorphism} 
The set of endomorphisms of an abelian group can be made into an
abelian group in which: 
\begin{compactenum}[1)]
\item 
the identity is the constant function $x\mapsto\gid$;
\item
additive inversion converts $f$ to $x\mapsto-f(x)$;
\item
addition converts $(f,g)$ to $x\mapsto f(x)+g(x)$.
\end{compactenum}
If $E$ is an abelian group, let the abelian group of its endomorphisms
be denoted by
\begin{equation*}
  \End E.
\end{equation*}
The set of endomorphisms of $E$ can also be made into
a monoid in which 
the identity is the identity function $\id_E$, and multiplication
is functional composition.
This multiplication
distributes in both senses over addition: 
\begin{align*}
  f(g+h)&=fg+fh,& (f+g)h&=fh+gh.
\end{align*}
We may denote the two combined structures---abelian group and
monoid together---by
\begin{equation*}
  (\End E,\id_E,\circ);
\end{equation*}
this is the \textbf{complete ring of
  endomorphisms}\index{complete ring of endomorphisms} of $E$.  
A substructure of $(\End E,\id_E,\circ)$ can be called
simply a \textbf{ring of endomorphisms}\index{ring of endomorphisms} of $E$.  

An \textbf{associative ring} is a structure $(R,0,-,+,1,\cdot)$ such that
\begin{compactenum}[1)]
\item
$(R,0,-,+)$ is an abelian group,
\item
$(R,1,\cdot)$ is a monoid,
\item
the multiplication distributes in both senses over addition.
\end{compactenum}
For now, we shall refer to associative rings simply as \textbf{rings.}  (In \S\ref{sect:nna-rings} we shall consider rings in a more general sense.)
As with a group, so with a ring: an element $a$ determines a singulary
operation $\uplambda_a$ on the ring, given by
\begin{equation*}
  \uplambda_a(x)=ax.
\end{equation*}

\begin{theorem}\label{thm:x-lambda_x}
  The function $x\mapsto\uplambda_x$ embeds a ring in the endomorphism
  ring of its underlying abelian group.
\end{theorem}

If, in a ring, the multiplication commutes---
\begin{equation*}
  xy=yx
\end{equation*}
---then the ring is a \textbf{commutative ring.}\index{commutative ring}
For example, $\Z$ is a commutative ring.


In a ring, an element with both a left and a right multiplicative inverse can be
called simply \textbf{invertible;}\index{invertible} it is also called
a \textbf{unit.}\index{unit}  

\begin{theorem}\label{thm:units}
In a ring, the units compose a group with respect to
multiplication.  In particular, a unit has a unique
left inverse, which is also a right inverse.
\end{theorem}

The group of units of a ring $R$ is denoted by
\begin{equation*}
  \unit R.
\end{equation*}
For example, $\unit{\Z}=\{1,-1\}$.  Evidently all two-element groups
are isomorphic to this one.

If $R$ is commutative, and
$\unit R=R\setminus\{0\}$, then $R$ is a \textbf{field.}\index{field}
Multiplication on $\Qp$ can be extended to $\Q$ so that this becomes a field.
There are several ways to construct from $\Q$ the field $\R$ of real numbers.  Then the field $\C$ can be defined as $\R\times\R$ with the appropriate operations.  (See p.~\pageref{C}.)  An example of a ring in which some elements have
right but not left inverses will be given in \S \ref{sect:prod-sum}.


\chapter{Groups}

\section{General linear groups}\label{sect:gl}

Given a commutative ring $R$ and an element $n$ of $\upomega$, we define
\begin{equation*}
\MatR
\end{equation*}
as the set of functions from $n\times n$ into $R$.  A typical such
function can be written as a \textbf{matrix}\index{matrix}
\begin{equation*}
  \begin{pmatrix}
    a^0_0&\cdots&a^0_{n-1}\\
\vdots&\ddots&\vdots\\
a^{n-1}_0&\cdots&a^{n-1}_{n-1}
  \end{pmatrix},
\end{equation*}
or as
\begin{equation*}
(a^i_j)^{i< n}_{j< n},
\end{equation*}
or simply as $(a^i_j)^{i}_{j}$ if the set over which $i$ and $j$ range
is clear.  Here the entries $a^i_j$ are from $R$.
We define an addition on $\MatR$ by
\begin{equation*}
  (a^i_j)^{i<n}_{j<n}+(b^i_j)^{i<n}_{j<n}
= (a^i_j+b^i_j)^{i<n}_{j<n}.
\end{equation*}
We define a multiplication on $\MatR$ by
\begin{equation*}
(a^i_j)^{i<n}_{j<n}(b^j_k)^{j<n}_{k<n}
=\Bigl(\sum_{j\in n}a^i_jb^j_k\Bigr)^{i<n}_{k<n}.
\end{equation*}
One particular element of $\MatR$ is called $(\updelta^i_j)^{i<n}_{j<n}$, where
\begin{equation*}
\updelta^i_j=\begin{cases}
	1,&\text{ if } i=j,\\
	0,&\text{ otherwise,}
\end{cases}
\end{equation*}
so that the element is a certain diagonal matrix, namely
\begin{equation*}
  \begin{pmatrix}
    1&&0\\
&\ddots&\\
0&&1
  \end{pmatrix}.
\end{equation*}


\begin{theorem}\label{thm:M}
If $R$ is a commutative ring, then
  $\MatR$ is a ring with multiplicative identity $(\updelta^i_j)^{i<n}_{j<n}$.
\end{theorem}

The group $\unit{\MatR}$ is called the \textbf{general linear
  group}\index{general linear group}
of degree $n$ over $R$; it is also denoted by
\begin{equation*}
\GLR.
\end{equation*}

We shall characterize the elements of this group in
\S \ref{sect:det}.  Meanwhile, since
\begin{equation*}
  \begin{pmatrix}
    a&b\\c&d
  \end{pmatrix}
  \begin{pmatrix}
    d&-b\\-c&a
  \end{pmatrix}
=\begin{pmatrix}
  ad-bc&0\\0&ad-bc
\end{pmatrix}
=
  \begin{pmatrix}
    d&-b\\-c&a
  \end{pmatrix}
  \begin{pmatrix}
    a&b\\c&d
  \end{pmatrix},
\end{equation*}
we may observe that the element 
$\begin{pmatrix}
 a&b\\c&d 
\end{pmatrix}$ 
of $\MatR$ is invertible if $ad-bc\in\unit R$.


\section{New groups from old}\label{sect:new}

\subsection{Products}

If $G$ and $H$ are two groups, then we can define a
multiplication on $G\times H$ termwise:
\begin{equation*}
(g_0,h_0)(g_1,h_1)=(g_0g_1,h_0h_1)
\end{equation*}
(that is, $(g_0\cdot^Gg_1,h_0\cdot^Hh_1)$).
The result is a group called the \textbf{direct product}\index{direct product} of
  $G$ and $H$ and also denoted by
\begin{equation*}
   G\times H.
\end{equation*}
If $G$ and $H$ are abelian, written additively, then their direct product is usually called a \textbf{direct sum,}\index{direct sum} 
denoted by
\begin{equation*}
G\oplus H.
\end{equation*}

\subsection{Quotients}

Suppose $\sim$ is an equivalence-relation
on a set $G$, so that it partitions $G$ into equivalence-classes
\begin{equation*}
  \{x\in G\setcolon x\sim a\};
\end{equation*}
such classes can be denoted by $a/\mathord{\sim}$ or $\eqc a$ or $\overline a$.  
The \textbf{quotient}\index{quotient} of $G$ by $\sim$
is the set of equivalence-classes with respect to $\sim$; this set can be denoted by 
\begin{equation*}
G\modsim.
\end{equation*}
If, for some $n$ in $\upomega$ and some set $A$, we have $f\colon G^n\to A$, and
\begin{equation*}
a_0\sim x_0\land\dots\land a_{n-1}\sim x_{n-1}\lto f(a_0,\dots,a_{n-1})=f(x_0,\dots,x_{n-1}),
\end{equation*}
then we say there is a \textbf{well-defined} function $\tilde f$ from $(G/\mathord{\sim})^n$ to $A$ given by
\begin{equation*}
\tilde f([a_0],\dots,[a_{n-1}])=f(a_0,\dots,a_{n-1}).
\end{equation*}
This terminology is unfortunate, especially when used in the form ``the function $([a_0],\dots,[a_{n-1}])\mapsto f(a_0,\dots,a_{n-1})$ on $G/\mathord{\sim}$ is well-defined''.  Indeed, if this function is \emph{not} well-defined, what this means is that there is no such function at all.  But when there \emph{is} such a function, and we call it $\tilde f$, then we have
\begin{equation}\label{eqn:tilde}
\tilde f\circ p=f,
\end{equation}
where $ p$ is the function $(x_0,\mapsto x_{n-1})\mapsto([x_0],\dots,[x_{n-1}])$ on $G^n$.  Another way to express the equation~\eqref{eqn:tilde} is to say that the following diagram \textbf{commutes:}\label{commutes}
\begin{equation*}
\xymatrix{
G^n\ar^f[r]\ar_{p}[d]&A\\
(G/\mathord{\sim})^n\ar_{\tilde f}[ur]&
}
\end{equation*}
We shall be particularly interested in the case where $G$ is a semigroup.  In this case, if there is a well-define multiplication on $G/\mathord{\sim}$ given by
\begin{equation*}
[x][y]=[xy],
\end{equation*}
then this multiplication is associative, so $G\modsim$ is a semigroup.
In this case, $\sim$ is called a
\textbf{congruence-relation}\index{congruence-relation} with respect to
the multiplication on $G$. 

\begin{theorem}\label{thm:cong}
  If $G$ is a group, and $\sim$ is a congruence-relation on $G$, then
  $G\modsim$ is a group.
\end{theorem}

For example, if $n\in\upomega$, then two integers $a$ and $b$ are
\textbf{congruent \emph{modulo} $n$} if $n\divides b-a$; in this case
one writes
\begin{equation*}
  a\equiv b\pmod n.
\end{equation*}

\begin{theorem}\label{thm:Z-mod-n}
  If $n\in\upomega$, then congruence \emph{modulo} $n$ is a
  congruence-relation on $\Z$ with respect to addition and
  multiplication, and the quotient is a commutative ring.  If $n$ is
  prime, then this ring is a field.
\end{theorem}

The commutative ring in the theorem can be denoted by
\begin{equation*}
  \Zmod n,
\end{equation*}
though sometimes this expression may denote merely the additive group.
Note that $\Zmod0$ is isomorphic to $\Z$.
The direct sum $\Zmod 2\oplus\Zmod 2$ is the \textbf{Klein four
  group,}\index{Klein four group} denoted by 
\begin{equation*}
\Kfg
\end{equation*}
(for \emph{Vierergruppe}\footnote{According to Wikipedia, Klein gave this name to the group in 1884, but the name was later applied to four-person anti-Nazi resistance groups.}).  This is the smallest group containing two elements
neither of which is a power of the other.

  There is a congruence-relation on $\R$ with respect to addition given by
  \begin{equation*}
  a\sim b\iff a-b\in\Z.
  \end{equation*}
Then there is a well-defined embedding
      $a\mapsto \exp(2\uppi\mi a)$ of $\R\modsim$ in
$\unit{\C}$. 

\subsection{Subgroups}

A \textbf{subgroup}\index{subgroup} of a group is just a substructure of a group, when the group is considered as having the full signature $\{\gid,{}\inv,\cdot\}$.  More informally, a subgroup of a group is a subset containing the identity that is
closed under multiplication and inversion.  Every group has both
itself and $\{\gid\}$ as subgroups.  Also $G\times\{\gid\}$ and
$\{\gid\}\times H$ are subgroups of $G\times H$, while $G\times G$ has
the subgroup $\{(x,x)\colon x\in G\}$.

\begin{theorem}\label{thm:subgp}
  A subset of a group is a subgroup if and only if it is non-empty and
  closed under the binary operation $(x,y)\mapsto xy\inv$.
\end{theorem}

If $ H$ is a subgroup of $G$, we write\footnote{One might write
  $ H\leqslant G$, if one wants to reserve the expression $H<G$ for the case where
  $H$ is a \emph{proper} subgroup of $G$.  I shall not do this.  However, I do think it is important to reserve the notation $A\pincluded B$ for the case where $A$ is a proper subset of $B$, writing $A\included B$ when $A$ is allowed to be equal to $B$.} 
\begin{equation*}
H\subgp  G.  
\end{equation*}

\begin{theorem}
  If $\sim$ is a congruence-relation on a group $G$, then the
  $\sim$-class of $\gid$ is a
subgroup of~$G$.
\end{theorem}

It is important to note that the converse of
the lemma is false in
general: there are groups $G$ with subgroups $H$ such that for no congruence-relation on $G$ is $H$ the congruence-class of the identity.  For example,\label{ex:32} let $G$ be $\Sym 3$, and let $H$ be the image of $\Sym2$ in $G$ under the obvious embedding mentioned in \S\ref{sect:sym}.  Then $H$ contains just the identity and $(0\;1)$.  If $\sim$ is a congruence-relation on $G$ such that $(0\;1)\sim\gid$, then
\begin{equation*}
(1\;2)(0\;1)(1\;2)\sim(1\;2)\gid(1\;2)\sim\gid;
\end{equation*}
but $(1\;2)(0\;1)(1\;2)=(0\;2)$, which is not in $H$.  See \S\ref{sect:normal} (p.~\pageref{sect:normal}) for the full story.

If $f$ is a homomorphism from $G$ to $H$, then the 
\textbf{kernel}%
\index{kernel} of $f$ is the
set
\begin{equation*}
  \{x\in G\setcolon f(x)=\gid\},
\end{equation*}
which can be denoted by $\Ker f$.  The \textbf{image}\index{image} of $f$ is
\begin{equation*}
  \{y\in H\setcolon y=f(x)\text{ for some $x$ in }G\},
\end{equation*}
that is, $\{f(x)\colon x\in G\}$; this can be denoted by $\im f$.

An embedding (that is, an injective homomorphism) is also called a 
\textbf{monomorphism.}%
\index{monomorphism}
A surjective homomorphism is called
an \textbf{epimorphism.}%
\index{epimorphism}

\begin{theorem}\label{thm:ker-im}
  Let $f$ be a homomorphism from $G$ to $H$.
  \begin{compactenum}
    \item
$\Ker f\subgp G$.
\item
$f$ is a monomorphism if and only if $\Ker f=\{\gid\}$.
\item
$\im f\subgp H$.
  \end{compactenum}
\end{theorem}

There is a monomorphism
 from $\R\oplus\R$ into $\Mat[2]{\R}$, namely
 \begin{equation*}
(x,y)\mapsto
\begin{pmatrix}
  x&y\\-y&x
\end{pmatrix}.
 \end{equation*}
One can define $\C$\label{C} to be the image of this monomorphism.  One shows
that $\C$ then is a sub-ring of $\Mat{\R}$ and is a field.  The
elements of $\C$ usually denoted by $1$ and $\mi$ are given by
\begin{align*}
1
&=  
\begin{pmatrix}
    1&0\\0&1
  \end{pmatrix},&
  \mi
&=
  \begin{pmatrix}
    0&1\\-1&0
  \end{pmatrix}.
\end{align*}
Then every element of $\C$ is $x+y\mi$ for some unique $x$ and $y$ in
$\R$.  The function $z\mapsto\bar z$ is an automorphism of $\C$, where
\begin{equation*}
  \overline{x+y\mi}=x-y\mi.
\end{equation*}
There is then a monomorphism from $\C\oplus\C$ into $\Mat[2]{\C}$,
namely
 \begin{equation*}
(x,y)\mapsto
\begin{pmatrix}
  x&y\\-\bar y&\bar x
\end{pmatrix};
 \end{equation*}
its image is denoted by
\begin{equation*}
  \Ham
\end{equation*}
in honor of its discoverer Hamilton: it
consists of the \textbf{quaternions.}\index{quaternion}  One shows
that $\Ham$ is a 
sub-ring of $\GL[2]{\C}$ and that all non-zero elements of $\Ham$ are
invertible, although $\Ham$ is not commutative.
The
element of $\Ham$ usually denoted by $\mj$ is given by
\begin{equation*}
  \mj=
  \begin{pmatrix}
    0&1\\-1&0
  \end{pmatrix}.
\end{equation*}

\begin{theorem}\label{thm:subgroups}
  An arbitrary intersection of subgroups is a subgroup.
\end{theorem}

\begin{proof}
This is an instance of the general observation that an arbitrary intersection of substructures is a substructure.
\end{proof}

Given a subset $A$ of (the universe of) a group $G$, we can ``close''
under the three group-operations, obtaining a subgroup, $\gpgen A$.
For a formal definition, we let
\begin{equation*}
  \gpgen A=\bigcap\family S,
\end{equation*}
where $\family S$ is the set of all subgroups of $G$ that
include $A$.  Note that $\gpgen{\emptyset}=\{\gid\}$.

If $G=\gpgen A$, then $G$ is
\textbf{generated}\index{generated} by $A$.  If
$A=\{a_0,\dotsc,a_{n-1}\}$, we may write
\begin{equation*}
  \gpgen{a_0,\dotsc,a_{n-1}}
\end{equation*}
for $\gpgen A$, and say that $G$ has the $n$ \textbf{generators}\index{generators}
$a_0$, \dots, $a_{n-1}$.  In particular, $G$ is \textbf{finitely
  generated}\index{finitely generated} 
in this case.  The subgroup $\gpgen{\mi,\mj}$ of $\unit{\Ham}$
is the \textbf{quaternion group,}\index{quaternion group} denoted by
\begin{equation*}
\quat;
\end{equation*}
it has eight elements: $\pm1$, $\pm\mi$, $\pm\mj$, and $\pm\mk$, where
$\mk=\mi\mj$.

In case $n=0$, the group $\gpgen{a_0,\dotsc,a_{n-1}}$ should logically be denoted by $\gpgen{\ }$.  Probably most people write $\gpgen{\gid}$ instead.  This is not wrong, but is redundant, since every group contains an identity, and the angle brackets indicate that a group is being given.  If one really wants to see something between the angle brackets, again one can write $\gpgen{\emptyset}$.

\section{Cyclic groups}

The \textbf{order}\index{order!--- of a group} of a group is its
size (or cardinality).  The order 
of $G$ is therefore denoted by
\begin{equation*}
  \order G.
\end{equation*}
A group is called \textbf{cyclic}\index{cyclic
  group}\index{group!cyclic ---} if generated by a single element.  If
$a$ is
an element of a group $G$, then $\gpgen a$ is a cyclic subgroup of
$G$, and the \textbf{order}\index{order!--- of an element} of
  $a$, denoted by
\begin{equation*}
  \order a,
\end{equation*}
is defined to be the order of $\gpgen a$.  In the next theorem, $x\mapsto a^x$ is the homomorphism from $\Z$ to $G$ as in Theorem~\ref{thm:exp-in-groups} (p.~\pageref{thm:exp-in-groups}).


\begin{theorem}\label{thm:cyc}
  If $a$ is an element of a group $G$, then
  \begin{equation*}
    \gpgen a=\im{x\mapsto a^x}.
%\{x\in G\setcolon x=a^n\text{ for some $n$ in }\Z\}.
  \end{equation*}
\end{theorem}

\begin{proof}
Let $f$ be the homomorphism $x\mapsto a^x$.  We have
to show $\gpgen a=\im f$.  Since $\gpgen a$ is a group, we know that
$a^0\in\gpgen a$, and if $a^n\in\gpgen a$, then $a^{-n}\in\gpgen a$.  Also $a\in\gpgen a$, so that, if
$a^n\in\gpgen a$, then $a^{n+1}\in\gpgen a$.
By induction, $\im f\included\gpgen a$.  Since $a\in\im f$, we have
$\gpgen a\included\im f$ by definition of $\gpgen a$.
\end{proof}

\begin{theorem}%\label{thm:el-ord}
  If $a$ is a group-element of finite order, then $a^{\order a}=\gid$.
\end{theorem}

\begin{proof}
The subset $\{\gid,a,a^2,\dots,a^{\order a}\}$ of $\gpgen a$ has size at
most $\order a$.
Hence, for some $i$ and $j$, we have $0\leq i<j\leq \order a$, but $a^i=a^j$.
Therefore $\gid=a^{j-i}$, and hence $a^k=a^{\ell}$ whenever $k\equiv\ell\pmod{j-i}$.  Consequently $\gpgen a$ has at most $j-i$ elements, that is, $\order a\leq j-i$.  Since also $j-i\leq\order a$, we have $\order a=j-i$, and in particular $a^{\order a}=a^{j-i}=\gid$.
\end{proof}

\begin{theorem}\label{thm:Z-subg}
  All subgroups of $\Z$ are cyclic.  All nontrivial subgroups of $\Z$
  are isomorphic to one another.
\end{theorem}

\begin{proof}
Say $G\subgp \Z$ and $G\neq\gpgen{\ }$.  
Let $m$ be the least positive element of $G$.  Then $\gpgen m<G$.  

Let $n$ be an arbitrary element of $G$.  Then
$n=km+r$ for some $k$ and $r$ such that $0\leq r<m$.  Since $r=n-km$, we must have $r\in G$, so $r=0$ by minimality of $m$.  Hence $n=km$.  Thus $G<\gpgen m$.  Therefore $G=\gpgen m$.

  The map $x\mapsto mx$ from $\Z$ to $G$ is an
epimorphism, by Theorem~\ref{thm:cyc}.  The kernel of this map is trivial, simply because $mx=0\lto x=0$.  Therefore the map is an
isomorphism, by Theorem~\ref{thm:ker-im}. 
\end{proof}

\begin{theorem}
  Every cyclic group is isomorphic to some $\Zmod n$.
\end{theorem}

\begin{proof}
  Say $G=\gpgen a$.  By Theorem~\ref{thm:Z-subg}, the epimorphism
  $x\mapsto a^x$ from $\Z$ to $G$ has kernel $\gpgen n$ for some $n$;
  therefore  
  \begin{equation*}
    a^r=a^s\iff a^{r-s}=\gid \iff r-s\in\gpgen n\iff n\divides r-s.
  \end{equation*}
Hence the map $[x]\mapsto a^x$ is well-defined on $\Zmod n$ and has
trivial kernel.
\end{proof}

\section{Cosets}

Suppose $H\subgp G$.  If $a\in G$, let
\begin{gather*}
  aH=\{ax\colon x\in H\},\\
Ha=\{xa\colon x\in H\}.
\end{gather*}
Each of the sets $aH$ is a \textbf{left coset}\index{left!---
  coset}\index{coset} 
of $H$, and the set $\{xH\colon x\in G\}$ of left cosets
is denoted by 
\begin{equation*}
  G/H.
\end{equation*}
Each of the sets $Ha$ is a \textbf{right coset}\index{right!--- coset} of $H$, and the set $\{Hx\colon x\in G\}$ of right cosets
is denoted by 
\begin{equation*}
  H\backslash G.
\end{equation*}
Note that $H$ itself is both a left and a right coset of itself.

Sometimes, for each $a$ in $G$, we have $aH=Ha$.  For example, this is the case when $G=G_0\times G_1$, and $H=G_0\times\{\gid\}$, so that, if $a=(g_0,g_1)$, then
\begin{equation*}
aH=H\times\{g_1\}=Ha.
\end{equation*}
Sometimes left and right cosets are different, as in the example\label{ex:32again} on page~\pageref{ex:32}, where $G=\Sym 3$, and $H$ is the image of $\Sym 2$ in $G$.  In this case
\begin{align*}
(0\;2)H&=\{(0\;2),(0\;1\;2)\},& H(0\;2)&=\{(0\;2),(0\;2\;1)\},\\
(1\;2)H&=\{(1\;2),(0\;2\;1)\},& H(1\;2)&=\{(1\;2),(0\;1\;2)\}.
\end{align*}
Moreover, there are no other cosets of $H$, besides $H$ itself, by the next theorem; so in the example, no left coset, besides $H$, is a right coset.

\begin{theorem}\label{thm:cosets}
Suppose $H<G$.
The left cosets of $H$ in $G$ compose a partition of $G$.  Likewise for the right cosets.  All
cosets of $H$ have the same size; also, $G/H$ and
  $H\backslash G$ have the same size.
\end{theorem}

\begin{proof}
  We have $a\in aH$.  Suppose $aH\cap bH\neq\emptyset$.  Then $ah=bh_1$ for some $h$ and $h_1$ in $H$, so that $a=bh_1h\inv$, which is in $bH$.  Thus $a\in bH$, and hence $aH\included bH$.  By symmetry of the argument, we have also $bH\included aH$, and therefore $aH=bH$.  Hence the left cosets compose a partition of $G$.  By symmetry again, the same is true for the right cosets.
  
All cosets of $H$ have the same size as $H$, since
  the map $x\mapsto ax$ from $H$ to $aH$ is a bijection with inverse $x\mapsto a\inv H$, and likewise $x\mapsto xa$ from $H$ to $Ha$ is a bijection.  (One might see this as an application of Cayley's Theorem, Theorem~\ref{thm:Cay}, p.~\pageref{thm:Cay}.)    
  
   Inversion is a
  permutation of $G$ taking $aH$ to $Ha\inv$, so $G/H$ and
  $H\backslash G$ must have the same size.
\end{proof}

\begin{corollary}
If $H<G$, then the relation $\sim$ on $G$ defined by
\begin{equation*}
a\sim x\iff aH=xH
\end{equation*}
is an equivalence-relation, and
\begin{equation*}
G/H=G/\mathord{\sim}.
\end{equation*}
\end{corollary}

\begin{corollary}
If $H\subgp G$ and $aH=Hb$, then $aH=Ha$.
\end{corollary}

\begin{proof}
Under the assumption, $a\in Hb$, so $Ha\included Hb$, and therefore $Ha=Hb$.
\end{proof}

The size of $G/H$ (or of $H\backslash G$) is called the \textbf{index}\index{index} of $H$ in
  $G$ and can be denoted by
\begin{equation*}
  [G:H].
\end{equation*}
This is a \emph{cardinality,} though if it is infinite, we shall not generally be interested in which cardinality it is.
If $G$ is finite, then by the last theorem,
\begin{equation*}
[G:H]=\frac{\size G}{\size H}.
\end{equation*}
However, $[G:H]$ may be finite, even though $G$ is not.  In this case, $H$ must also be infinite, and indeed the last equation may be understood to say this, since an infinite cardinal divided by a finite cardinal should still be infinite.

Of the next theorem, we shall be particularly interested in a special case, Lagrange's Theorem, in the next section.

\begin{theorem}\label{thm:KHG}
  If $K\subgp H\subgp G$, then $[G:K]=[G:H][H:K]$.
\end{theorem}

\begin{proof}
Every left coset of $K$ is included in a left coset of $H$.
  Indeed, if $bK\cap aH\neq\emptyset$, then as in the proof of
  Theorem~\ref{thm:cosets}, $bK\included aH$.
  Moreover, every left coset of $H$ includes the same number of left cosets of $K$.  For, the bijection $x\mapsto ax$ that takes $H$ to $aH$ also takes each coset $bK$ of $K$ to a coset $abK$ of $K$.
\end{proof}

The remaining theorems of this section will not be needed later, though the ideas may be useful.  In the next theorem and elsewhere, $HK$ has the obvious meaning of $\{xy\colon x\in H\land y\in K\}$.  It need not be a group.  For example, in $\Sym 3$, if $H=\gpgen{(0\;1)}$ and $K=\gpgen{(0\;2)}$, then $HK=\{\gid,(0\;1),(0\;2),(0\;2\;1)\}$, which is not a group.

\begin{theorem}
  If $H$ and $K$ are finite subgroups of some group, then
  \begin{equation*}
    \order{HK}=\frac{\order H\order K}{\order{H\cap K}}.
  \end{equation*}
\end{theorem}

\begin{proof}
Since $H\cap K$ is a group by Theorem~\ref{thm:subgroups}, and $H\cap K\included H$, we have $H\cap K\subgp H$.  By Theorem~\ref{thm:cosets}, for some $n$ in $\N$, for some $a_0$, \dots, $a_{n-1}$ in $H$, we now have
\begin{equation*}
H=a_0(H\cap K)\cup\dotsb\cup a_{n-1}(H\cap K),
\end{equation*}
the union being disjoint.  Then
  $\order H=n\order{H\cap K}$.  Also, immediately
  \begin{equation*}
a_0K\cup\dotsb\cup a_{n-1}K\included HK.
\end{equation*}
We have also the reverse inclusion, since if $h\in H$ and $k\in K$, then $h=a_ik_1$ for some $i$ in $n$ and some $k_1$ in $H\cap K$, so that $hk=a_ik_1k$, which is in $a_iK$.  Thus
  \begin{equation*}
    a_0K\cup\dotsb\cup a_{n-1}K=HK.
  \end{equation*}
This union is disjoint.  For, suppose $a_ik_i=a_jk_j$, where $k_i$ and
$k_j$ are in $K$.
Then $a_j{}\inv a_i=k_jk_i{}\inv$, which belongs both to $H$ and to $K$.  Thus $a_j{}\inv a_i\in H\cap K$.  Hence we must have $a_i(H\cap K)=a_j(H\cap
K)$, so that $a_i=a_j$.  So the union above is disjoint, and therefore $\order{HK}=n\order K$.
\end{proof}

Note that in the foregoing theorem and proof, we have no need to name the group of which $H$ and $K$ are subgroups.  If this group is $G$, then we have $\size G\geq\size{HK}$, and so
\begin{equation*}
[G:H]=\frac{\size G}{\size H}\geq\frac{\size K}{\size{H\cap K}}=[K\colon H\cap K].
\end{equation*}
We proved this under the assumption that $H$ and $K$ are finite; but we can do without this assumption as follows:

\begin{theorem}\label{thm:HHK}
  Suppose $H$ and $K$ are subgroups of a group $G$.
  Then
  \begin{equation}\label{eqn:KHK}
    [H:H\cap K]\leq[G:K].
  \end{equation}
  If $[G:K]$ is finite, then it is equal to $[H:H\cap K]$ if and only if $G=HK$.
\end{theorem}

\begin{proof}
  In the proof of the last theorem, we showed in effect that the function
  $x(H\cap K)\mapsto xK$ from $H/(H\cap K)$ to $G/K$ is injective.  This gives \eqref{eqn:KHK}.  The function is
  surjective if and only if $G=HK$.
\end{proof}

\begin{theorem}
Suppose $H$ and $K$ are subgroups of a group $G$.
Then
  \begin{equation*}
    [G:H\cap K]\leq[G:H][G:K],
  \end{equation*}
If $[G:H]$ and $[G:K]$ are finite, then their product is equal to $[G:H\cap K]$
if and only if $G=HK$.
\end{theorem}

\begin{proof}
  By Theorems~\ref{thm:KHG} and~\ref{thm:HHK},
  \begin{equation*}
[G:H\cap
    K]=[G:H][H:H\cap K]\leq [G:H][G:K].
\end{equation*}
Similarly the rest follows.
\end{proof}

\section{Lagrange's Theorem}


\begin{theorem}[Lagrange]\label{thm:Lagrange}\index{Lagrange's
    Theorem}\index{theorem!Lagrange's Th---}
If $H<G$ and $G$ is finite, then $\order H$ divides $\order G$.
\end{theorem}

\begin{proof}
Use Theorem~\ref{thm:KHG} when $K=\gpgen{\gid}$.
\end{proof}

\begin{corollary}
  Groups of prime order are cyclic.
\end{corollary}

\begin{proof}
  Say $\order G=p$.  There is $a$ in $G\setminus\gpgen{\gid}$, so $\order
  a>1$; but $\order a$ divides $p$, so $\order a=p$, and therefore $G=\gpgen a$.
\end{proof}

\begin{corollary}
  If $G$ is finite and $a\in G$, then $a^{\order G}=\gid$.
\end{corollary}

\begin{proof}
$a^{\order a}=\gid$ and $\order a$ divides $\order G$.
\end{proof}

The first
Sylow Theorem (Theorem~\ref{thm:Sylow-1}) is a partial converse of Lagrange's Theorem.
An application of Lagrange's Theorem is the remaining two theorems of this section.  The theorems are part of number theory; but their proofs can be streamlined with group theory.

\begin{lemma}
  $\Zmodu n=\{[x]\in\Zmod n\setcolon \gcd(x,n)=1\}$. 
\end{lemma}

\begin{proof}
  $\gcd(m,n)=1$ if and only if $am+bn=1$ for some integers $a$ and
  $b$; but this just means $[a][m]=1$ for some $a$.
\end{proof}

\begin{theorem}[Fermat]%\label{thm:Fermat}
If the prime $p$ is not a factor of $a$, then
\begin{equation}\label{eqn:Fermat}
  a^{p-1}\equiv 1\pmod p.
\end{equation}
Hence for all integers $a$,
\begin{equation}\label{eqn:Fermat2}
a^p\equiv a\pmod p.
\end{equation}
\end{theorem}

\begin{proof}
  By the lemma, the order of $\Zmodu p$ is $p-1$.  Hence \eqref{eqn:Fermat} holds if $[a]\in\Zmodu p$.  Also by the lemma, if $p\ndivides a$, then $[a]\in\Zmodu p$.  This proves the first claim, which implies \eqref{eqn:Fermat2} if $p\ndivides a$.  If $p\divides a$, then \eqref{eqn:Fermat2} holds easily.
\end{proof}

If $n\neq0$, let the order of $\Zmodu n$ be denoted by
\begin{equation*}
  \upphi(n).
\end{equation*}

\begin{theorem}[Euler]%\label{thm:Euler}
\index{Euler's Theorem}
  \index{theorem!Euler's Th---} 
  If $\gcd(a,n)=1$, then $a^{\upphi(n)}\equiv 1\pmod n$.
\end{theorem}

\begin{proof}
If $\gcd(a,n)=1$, then by the lemma, $[a]\in\Zmodu n$.
\end{proof}

\section{Normal subgroups}\label{sect:normal}

If $H\subgp G$, we investigate the possibility of defining a multiplication on $G/H$ so that
\begin{equation}\label{eqn:xHyH}
(xH)(yH)=xyH.
\end{equation}
In any case, each member of this equation is a well-defined subset of $G$.  The question is when they are the same.  Continuing with the example from pages \pageref{ex:32} and \pageref{ex:32again}, where $G=\Sym3$ and $H=\gpgen{(0\;1)}$, we have
\begin{gather*}
(1\;2)H(1\;2)H=\{\gid,(0\;1),(0\;2),(0\;1\;2)\},\\
(1\;2)(1\;2)H=H=\{\gid,(0\;1))\},
\end{gather*}
so \eqref{eqn:xHyH} fails in this case.

As a corollary to Theorem~\ref{thm:cosets} (p.~\pageref{thm:cosets}), we have that the relation $\sim$ on $G$ given by
\begin{equation*}
a\sim x\iff aH=xH
\end{equation*}
is an equivalence-relation.  Then there is a multiplication on $G/H$ as desired if and only if this equivalence-relation is a congruence-relation (with respect to the multiplication on $G$).  In this case, by Theorem~\ref{thm:cong} (p.~\pageref{thm:cong}), $G/H$ is a group with respect to the proposed multiplication.

\begin{theorem}\label{thm:n}
  Suppose $H\subgp G$.  The following are equivalent:
  \begin{compactenum}
    \item
$G/H$ is a group whose multiplication is given by \eqref{eqn:xHyH}.
\item
Every left coset of $H$ is a right coset.
\item
$aH=Ha$ for all $a$ in $G$.
\item
$a\inv Ha=H$ for all $a$ in $G$.
  \end{compactenum}
\end{theorem}

\begin{proof}
Immediately the last two conditions are equivalent, and they imply the second.  The second implies the third, by a corollary to Theorem~\ref{thm:cosets}.

Suppose now the first condition holds.  For all $h$ in $H$, since $hH=H$, we have
\begin{equation*}
aH=\gid aH=\gid HaH=hHaH=haH,
\end{equation*}
hence $a\inv haH=H$, so $a\inv ha\in H$.  Thus $a\inv Ha\included H$, so $a\inv Ha=H$.

Conversely, if the third condition holds, then $(xH)(yH)=xHHy=xHy=xyH$.
\end{proof}

A subgroup $H$ of $G$ meeting any of these equivalent conditions is
called \textbf{normal,}\index{normal!--- subgroup} and in this case we write
\begin{equation*}
  H\nsubgp G.
\end{equation*}
Of abelian groups, all subgroups are normal.
In general, if $N\nsubgp G$, then the group $G/N$ is called the
\textbf{quotient-group}% 
\index{quotient!--- group}\index{group!quotient ---} of
$G$ by $N$.  In this case, we can write the group also as
\begin{equation*}
\frac GN.
\end{equation*}

\begin{theorem}\label{thm:NGHG}
If $N\nsubgp G$ and $H\subgp G$, then $N\cap H\nsubgp H$.
(That is, normality is preserved in subgroups.)
\end{theorem}

\begin{proof}
  The defining property of normal subgroups is universal.  That is,
  $N\nsubgp G$ means that the sentence
  \begin{equation*}
\Forall x\Forall y(x\in N\to yxy\inv\in N)
\end{equation*}
is true in the structure $(G,N)$.  Therefore the same sentence is true in every substructure of $(G,N)$.  If $H<G$, then $(G,N\cap H)$ is a substructure of $(G,N)$.
\end{proof}

\begin{theorem}
If $N\nsubgp G$ and $H\subgp G$, 
then $\gpgen{N\cup H}=NH$.  
\end{theorem}

\begin{proof}
Since
\begin{equation*}
N\cup H\included NH\included\gpgen{N\cup H},
\end{equation*}
it is enough to show $NH<G$.
Suppose $n\in N$ and $h\in H$.  
Then $nh=hh\inv nh$.
Since $N\nsubgp\gpgen{N\cup H}$, we have
$h\inv nh\in N$, so $nh\in HN$. 
Thus $NH\included HN$, so by symmetry $NH=HN$.  Therefore
\begin{equation*}
NH(NH)\inv=NHH\inv N\inv=NHHN\included NHN=NNH\included NH, 
\end{equation*}
that is,
$NH$ is closed under $(x,y)\mapsto xy\inv$.  Since $NH$ also contains
$\gid$, it is a subgroup of $G$ by Theorem~\ref{thm:subgp}. 
\end{proof}

\begin{theorem}\label{thm:isdp}
Suppose $N\nsubgp G$ and $H\subgp G$ and
$N\cap H=\gpgen{\gid}$.  Then the surjection $(x,y)\mapsto xy$ from
$N\times H$ to $NH$ is a bijection.  
\end{theorem}

\begin{proof}
If $g$ and $h$ are in $H$, and $m$ and $n$ are in $N$, and $gm=hn$,
then  
\begin{equation*}
  h\inv g=nm\inv,
\end{equation*}
so each side must be $\gid$, and hence $g=h$ and $m=n$.  
\end{proof}

In the theorem, $NH$ is the \textbf{internal semidirect
  product}\index{internal semidirect product} of
$N$ and $H$.  Note well that the bijection between $N\times H$ and
$NH$ need not be an isomorphism of groups, since in $N\times H$
\begin{equation*}
  (m,g)(n,h)=(mn,gh),
\end{equation*}
while in $NH$
\begin{equation}\label{eqn:sdp}
(mg)(nh)=(mgng\inv)(gh), 
\end{equation}
and $mgng\inv$ need not be equal to $mn$, because $gng\inv$ need not be equal to $n$.
Theorem~\ref{thm:wdp} on page \pageref{thm:wdp} below establishes conditions under which the
bijection between $N\times H$ and $NH$ \emph{is} an isomorphism.  Semidirect products in general
are treated in \S \ref{sect:semidirect} (p.~\pageref{sect:semidirect}).

\begin{theorem}
  The normal subgroups of a group are precisely the kernels of
  homomorphisms on the group.
\end{theorem}

\begin{proof}
  If $f$ is a homomorphism from $G$ to $H$, then for all $n$ in $\Ker f$,
  \begin{equation*}
f(ana\inv)=f(a)f(n)f(a)\inv=\gid,
\end{equation*}
so $a(\Ker
  f)a\inv\included \Ker f$; thus $\Ker f\nsubgp G$.  Conversely,
if $N\nsubgp G$, then the map $x\mapsto xN$ from $G$ to $G/N$ is a
homomorphism with kernel~$N$.
\end{proof}

In the proof, the map $x\mapsto xN$ is the 
\textbf{canonical projection}%
\index{projection}\index{canonical!--- projection} 
or the \textbf{quotient map}\index{quotient map}
of $G$ onto $G/N$; it may be denoted
  by $p$ or $\uppi$.

\begin{theorem}\label{thm:hom-n}
  If $f$ is a homomorphism from $G$ to $H$, and $N$ is a normal
  subgroup of $G$ such that $N\subgp \Ker f$, then there is a unique
  homomorphism
  $\tilde f$ from $G/N$ to $H$ such that $f=\tilde f\circ \uppi$, that
  is, the following diagram commutes\index{commutes} (see page~\pageref{commutes}).
  \begin{equation*}
\xymatrix{
G \ar[r]^{\uppi} \ar[d]_f & G/N \ar[dl]^{\tilde f}\\
H&
}
  \end{equation*}
\end{theorem}

\begin{proof}
If $\tilde f$ exists, it must be given by
\begin{equation*}
\tilde f(xN)=f(x).
\end{equation*}
Such $\tilde f$ does exist,
 since if $xN=yN$, then $xy\inv\in N$, so $xy\inv\in\Ker f$, hence $f(xy\inv)=\gid$, and therefore
 $f(x)=f(y)$. 
\end{proof}

\begin{corollary}[First Isomorphism Theorem]%
\index{isomorphism!I--- Theorems|(}\index{theorem!Isomorphism Th---s|(}  
For every homomorphism $f$ on a group $G$,
\begin{equation*}
G/\Ker f\cong \im f.
\end{equation*}
\end{corollary}

\begin{proof}
  Let $N=\Ker f$; then $\tilde f$ is the
  desired homomorphism.
\end{proof}

\begin{corollary}
  If $f$ is a homomorphism from $G$ to $H$, and $N$ is a normal subgroup
  of $G$, and $M\nsubgp H$, and $f[N]\subgp M$, then there is a
  homomorphism $\tilde f$ from $G/N$ to $H/M$ such that the following
  diagram commutes:
  \begin{equation*}
    \xymatrix{
G \ar[r]^{\uppi}\ar[d]_f & G/N\ar[d]^{\tilde f}\\
H \ar[r]_{\uppi}         & H/M
}
  \end{equation*}
\end{corollary}

\begin{proof}
$N<\Ker{\uppi\circ f}$.
\end{proof}

\begin{theorem}[Second Isomorphism]
  If $H\subgp G$ and $N\nsubgp G$, then 
  \begin{equation*}
%  H/(H\cap N)\cong NH/N.
\frac H{H\cap N}\cong\frac{HN}N.
  \end{equation*}
\end{theorem}

\begin{proof}
The map $h\mapsto hN$ from $H$ to $HN/N$ is surjective with kernel
$H\cap N$.  So the claim follows by the First Isomorphism Theorem (a corollary to Theorem~\ref{thm:hom-n}).
\end{proof}

For example,
In $\Z$, since $\gpgen n\cap\gpgen m=\gpgen{\lcm(n,m)}$ and $\gpgen
n+\gpgen m=\gpgen{\gcd(n,m)}$, we have
\begin{equation*}
  \frac{\gpgen n}{\gpgen{\lcm(n,m)}}
\cong\frac{\gpgen{\gcd(n,m)}}{\gpgen m}.
\end{equation*}

\begin{theorem}[Third Isomorphism]
  If $N$ and $K$ are normal subgroups of $G$ and $N\subgp K$, then
  $K/N\nsubgp G/N$ and
  \begin{equation*}
    \frac{G/N}{K/N}\cong G/K.
  \end{equation*}
\end{theorem}

\begin{proof}
By (a corollary to) Theorem~\ref{thm:hom-n}, the map $xN\mapsto xK$ from $G/N$ to $G/K$
  is a well-defined epimorphism.  The kernel contains $xN$ if and only
  if 
  $x\in K$, that is, $xN\in K/N$.  Again the claim now follows
  by the First Isomorphism Theorem (a corollary to Theorem~\ref{thm:hom-n}).
\end{proof}
\index{isomorphism!I--- Theorems|)}\index{theorem!Isomorphism Th---s|)}

Theorem~\ref{thm:hom-n} will also be used to prove von Dyck's Theorem (Theorem~\ref{thm:vD}, p.~\pageref{thm:vD}).

\begin{lemma}
  If $f$ is an epimorphism from $G$ onto $H$, then there is a
  one-to-one correspondence $K\mapsto f[K]$ between subgroups of $G$
  that include 
  $\Ker f$ and subgroups of $H$; under this, normal subgroups
  correspond.
  \begin{equation*}
    \xymatrix{
K  \ar[r]\ar[d] & G \ar@{>>}[d]^f\\
f[K] \ar[r] & H
}
  \end{equation*}
\end{lemma}

\begin{theorem}
  If $N\nsubgp G$, then every subgroup of $G/N$ is $K/N$ for some
  subgroup $K$ of $G$ that includes $N$, and moreover $K/N$ is normal
  in $G/N$
  if and only if $K$ is normal in~$G$.
  \begin{equation*}
    \xymatrix{
K  \ar[r]\ar[d] & G \ar@{>>}[d]^f\\
K/N \ar[r] & G/N
}
  \end{equation*}
\end{theorem}

\begin{proof}
Use the lemma in case $H$ is $G/N$ and $f$ is $\uppi$.
\end{proof}

\section{Finite groups}\label{sect:fin}

Since every group can be considered as a symmetry group of \emph{itself,}
every \emph{finite} group $G$ can be considered as a symmetry group of
finite set.  In particular, $G$ can be considered as a subgroup of $\Sym
n$ for some $n$ in $\upomega$.
As promised on page~\pageref{prom-prod-cyc}, we now show:

%EDIT FROM HERE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

\begin{theorem}\label{thm:prod-cyc}
  Every element of $\Sym n$ is a composite of disjoint cycles of length
  at least $2$, uniquely up to order of factors.
\end{theorem}

\begin{proof}
  Let $\sigma\in\Sym n$.  If $k\in n$, let
  \begin{equation*}
    [k]=\{\sigma^{\ell}(k)\colon \ell\in\Z\}.
  \end{equation*}
Then the sets $[k]$ partition $n$: we have
\begin{equation*}
  n=[k_0]\cup\cdots\cup[k_{\ell-1}]
\end{equation*}
for some $\ell$, the union being disjoint.  If $i\in\ell$,
define $\sigma_i$ by
\begin{equation*}
  \sigma_i(x)=
  \begin{cases}
    \sigma(x),&\text{ if }x\in[k_i],\\
x,&\text{ otherwise.}
  \end{cases}
\end{equation*}
If $[k_i]$ has size $\ell_i$, then
$\sigma_i$ is the $\ell_i$-cycle 
$
\begin{pmatrix}
  k & \sigma(k) & \cdots & \sigma^{\ell_i-1}(k)
\end{pmatrix}
$.
Finally, $\sigma$ is the composite of all of
the $\sigma_i$ such that $\ell_i>1$.
\end{proof}

\begin{theorem}
  The order of a finite permutation is the least common multiple of
  the orders of its disjoint cyclic factors.
\end{theorem}

A $2$-cycle is also called a \textbf{transposition.}\index{transposition}

\begin{theorem}
  Every finite permutation is a product of transpositions.
\end{theorem}

\begin{proof}
$\begin{pmatrix}
    0 & 1 & \cdots & m-1
  \end{pmatrix}
=
\begin{pmatrix}
  0 & m-1
\end{pmatrix}
\dotsm
\begin{pmatrix}
  0 & 2
\end{pmatrix}
\begin{pmatrix}
  0 & 1
\end{pmatrix}$.
\end{proof}

Let the set of $2$-element subsets of $n$ by denoted by
\begin{equation*}
  \unordered{2}{n}.
\end{equation*}
If $\sigma\in\Sym n$, and $\{i,j\}\in\unordered 2n$, then we can
define 
\begin{equation*}
  \sigma(\{i,j\})=\{\sigma(i),\sigma(j)\}.
\end{equation*}
Thus we have a homomorphism from $\Sym n$ to $\Sym{\unordered 2n}$.
Understanding $n$ as the subset $\{0,\dots,n-1\}$ of $\Q$, we have a
function $X\mapsto\sq X$ from $\unordered 2n$ to $\unit{\Q}$ given by
\begin{equation*}
  \sq{\{i,j\}}=\frac{\sigma(i)-\sigma(j)}{i-j}.
\end{equation*}
Then we can define the
function $\sigma\mapsto\sgn{\sigma}$ from $\Sym n$
into $\unit{\Q}$ by
\begin{equation*}
  \sgn{\sigma}=
\prod_{X\in \unordered{2}{n}}
\sq X.
\end{equation*}

\begin{theorem}
  The function $\sigma\mapsto\sgn{\sigma}$ is an homomorphism
from  $\Sym n$ onto the subgroup $\gpgen{-1}$ of $\unit{\Q}$; it takes
every transposition to $-1$. 
\end{theorem}

\begin{proof}
If $\sigma=
\begin{pmatrix}
  k&\ell
\end{pmatrix}$, then
\begin{align*}
  \sgn{\sigma}
  &=\sq{\{k,\ell\}}\prod_{i\in
    n\setminus\{k,\ell\}}(\sq{\{i,\ell\}}\sq{\{k,i\}}) \\
&=\frac{\ell-k}{k-\ell}\cdot\prod_{i\in
  n\setminus\{k,\ell\}}\Bigl(\frac{i-k}{i-\ell}\cdot\frac{\ell-i}{k-i}\Bigr)
  =-1.  
\end{align*}
If $\sigma$ and $\tau$ are arbitrary elements of $\Sym n$, then
\begin{align*}
  \sgn{\sigma\tau}
&=\prod_{\{i,j\}\in
  \unordered{2}{n}}\frac{\sigma(\tau(i))-\sigma(\tau(j))}{i-j}\\
&=\prod_{\{i,j\}\in
  \unordered{2}{n}}\left(\frac{\sigma(\tau(i))-\sigma(\tau(j))}
{\tau(i)-\tau(j)}\cdot
\frac{\tau(i)-\tau(j)}{i-j}\right)\\
&=\prod_{X\in
  \unordered{2}{n}}\sq{\tau(X)}\cdot\sgn{\tau}\\
&=\sgn{\sigma}\sgn{\tau}
\end{align*}
since $\tau$ permutes $\unordered 2n$.
\end{proof}

The value $\sgn{\sigma}$ can now be called the \textbf{signum}\index{signum} of
$\sigma$; it is $1$ if and only if $\sigma$ is the product of an even
number of transpositions.  Such a product is itself called
\textbf{even;}\index{even} the other permutations, with signum $-1$, are called
\textbf{odd.}\index{odd} 

The \textbf{alternating
  group}\index{alternating}\index{group!alternating ---} of degree 
$n$ is the kernel of 
$\sigma\mapsto\sgn \sigma$ on
$\Sym n$ and is denoted by
\begin{equation*}
  \Alt n.
\end{equation*}
Hence $\Alt n\nsubgp\Sym n$ and $[\Sym n:\Alt n]=2$.

A group is \textbf{simple}\index{simple group}%
\index{group!simple ---} if it has no proper nontrivial normal
subgroups.  
For example,
  $\Zmod n$ is simple just in case $\abs n$ is prime.  Hence the
  only simple abelian groups are the $\Zmod p$, where $p$ is prime.

\begin{lemma}
  $\Alt n$ is generated by the $3$-cycles in $\Sym n$.
\end{lemma}

\begin{proof}
The group $\Alt n$ is generated by the products
$\begin{pmatrix}
  a&b
\end{pmatrix}
\begin{pmatrix}
  a&c
\end{pmatrix}$ and
$\begin{pmatrix}
  a&b
\end{pmatrix}
\begin{pmatrix}
  c&d
\end{pmatrix}$, where $a$, $b$, $c$, and $d$ are distinct elements of
$n$.  But 
\begin{gather*}
  \begin{pmatrix}
    a & b
  \end{pmatrix}
  \begin{pmatrix}
    a & c
  \end{pmatrix}=
\begin{pmatrix}
    a & c & b
  \end{pmatrix},\\
\begin{pmatrix}
 a & b   
  \end{pmatrix}
  \begin{pmatrix}
    c & d
  \end{pmatrix}
=
  \begin{pmatrix}
    b & c & a
  \end{pmatrix}
  \begin{pmatrix}
    c & d & b
  \end{pmatrix}.
\end{gather*}
Hence all $3$-cycles belong to $\Alt n$, and this group is generated
by these cycles.
\end{proof}

\begin{lemma}
  $\Alt n$ is generated by the $3$-cycles 
$\begin{pmatrix}
    0 & 1 & k
  \end{pmatrix}$,
where $1<k<n$.
\end{lemma}

\begin{proof}
If $a$, $b$, and $c$ are distinct elements of $n\setminus\{0,1\}$, then
\begin{align*}
  \begin{pmatrix}
    0 & a & b
  \end{pmatrix}
&= \begin{pmatrix}
    0 & 1 & b
  \end{pmatrix}
  \begin{pmatrix}
    a & 1 & 0
  \end{pmatrix}
= \begin{pmatrix}
    0 & 1 & b
  \end{pmatrix}
  \begin{pmatrix}
    0 & 1 & a
  \end{pmatrix}\inv,\\
\begin{pmatrix}
1 & a & b
\end{pmatrix}
&= \begin{pmatrix}
    1 & 0 & b
  \end{pmatrix}
  \begin{pmatrix}
    a & 0 & 1
  \end{pmatrix}
= \begin{pmatrix}
    0 & 1 & b
  \end{pmatrix}\inv
  \begin{pmatrix}
    0 & 1 & a
  \end{pmatrix},\\
\begin{pmatrix}
a & b & c  
\end{pmatrix}
&=
\begin{pmatrix}
  c&1&0
\end{pmatrix}
\begin{pmatrix}
  0 & a & b
\end{pmatrix}
\begin{pmatrix}
  0 & 1 & c
\end{pmatrix}.\qedhere
\end{align*}
\end{proof}

\begin{lemma}
Any normal subgroup of $\Alt n$ containing a $3$-cycle \emph{is} $\Alt
  n$. 
\end{lemma}

\begin{proof}
We show that every $3$-cycle is conjugate in $\Alt n$ to a cycle
$\begin{pmatrix}
  0&1&k
\end{pmatrix}$.  It is enough to note that
\begin{equation*}
  \begin{pmatrix}
    a & b & d
  \end{pmatrix}=
\underbrace{\begin{pmatrix}
    a & b
  \end{pmatrix}
  \begin{pmatrix}
    c & d
  \end{pmatrix}}
  \begin{pmatrix}
    c & b & a
  \end{pmatrix}
\underbrace{
  \begin{pmatrix}
    c & d
  \end{pmatrix}
  \begin{pmatrix}
    a & b
  \end{pmatrix}}.\qedhere
  \end{equation*}
\end{proof}

\begin{lemma}
  If $n>4$, then a normal subgroup of $\Alt n$ contains a $3$-cycle,
  provided it has 
  a nontrivial element whose factorization into disjoint cycles
  contains one of the following:
  \begin{compactenum}
    \item
a cycle of length at least $4$;
\item
two cycles of length $3$;
\item
transpositions, only one $3$-cycle, and no other cycles; or
\item
only transpositions.
  \end{compactenum}
\end{lemma}

\begin{proof}
\begin{asparaenum}
\item
  If $k\geq 4$, and $\sigma$ is disjoint from 
$\begin{pmatrix}
      0 & 1 & \dots & k-1
    \end{pmatrix}$, then
  \begin{multline*}
  \begin{pmatrix}
      0 & 1 & 2
    \end{pmatrix}
    \begin{pmatrix}
      0 & 1 & \dots & k-1
    \end{pmatrix}\sigma
    \begin{pmatrix}
      2 & 1 & 0
    \end{pmatrix}
\sigma\inv
    \begin{pmatrix}
k-1&\dots & 1 & 0
    \end{pmatrix}
    \\
    =
    \begin{pmatrix}
      0 & 1 & 3
    \end{pmatrix}.
  \end{multline*}
\item
If $\sigma$ is disjoint from $\begin{pmatrix}
    0 & 1 & 2
  \end{pmatrix}
  \begin{pmatrix}
    3 & 4 & 5
  \end{pmatrix}$, then we reduce to the previous case:
  \begin{multline*}
  \begin{pmatrix}
    0 & 1 & 3
  \end{pmatrix}
\underbrace{
  \begin{pmatrix}
    0 & 1 & 2
  \end{pmatrix}
  \begin{pmatrix}
    3 & 4 & 5
  \end{pmatrix}}\sigma
  \begin{pmatrix}
    3 & 1 & 0
  \end{pmatrix}
\sigma\inv
\underbrace{
  \begin{pmatrix}
    5 & 4 & 3
  \end{pmatrix}
  \begin{pmatrix}
    2 & 1 & 0
  \end{pmatrix}}\\
  =
  \begin{pmatrix}
    0 & 1 & 4 & 2 & 3
  \end{pmatrix}.
  \end{multline*}
 \item
If $\sigma$ is disjoint from 
$\begin{pmatrix}
  0 & 1 & 2
\end{pmatrix}$ and is the product of transpositions, then
\begin{equation*}
\left[\begin{pmatrix}
  0 & 1 & 2
\end{pmatrix}\sigma\right]^2=
\begin{pmatrix}
2 & 1 & 0
\end{pmatrix}.
\end{equation*}
\item
If $\sigma$ is a product of transpositions disjoint from 
$\begin{pmatrix}
  0&1
\end{pmatrix}$ and
$\begin{pmatrix}
 2&3 
\end{pmatrix}$, then
\begin{gather*}
  \begin{pmatrix}
  0 & 1 & 2
\end{pmatrix}
\underbrace{
\begin{pmatrix}
  0 & 1
\end{pmatrix}
\begin{pmatrix}
  2 & 3
\end{pmatrix}
\sigma}
\begin{pmatrix}
  2 & 1 & 0
\end{pmatrix}
\underbrace{
\sigma
\begin{pmatrix}
  3 & 2
\end{pmatrix}
\begin{pmatrix}
  1 & 0
\end{pmatrix}}
=
\begin{pmatrix}
  0 & 2
\end{pmatrix}
\begin{pmatrix}
  1 & 3
\end{pmatrix},\\
\begin{pmatrix}
  0 & 2 & 4
\end{pmatrix}
\underbrace{
\begin{pmatrix}
  0 & 2
\end{pmatrix}
\begin{pmatrix}
  1 & 3
\end{pmatrix}}
\begin{pmatrix}
  4 & 2 & 0
\end{pmatrix}
\underbrace{
\begin{pmatrix}
  3 & 1
\end{pmatrix}
\begin{pmatrix}
  2 & 0
\end{pmatrix}}
=
\begin{pmatrix}
  0 & 4 & 2
\end{pmatrix}.\qedhere
\end{gather*}
\end{asparaenum}
\end{proof}

% END OF DAY 6 (October 13, 2008)

\begin{theorem}
  $\Alt n$ is simple if and only if $n\neq 4$.
\end{theorem}

\begin{proof}
  $\Alt 1$ and $\Alt 2$ are trivial, and $\Alt 3\cong\Zmod 3$.
The case when $n>4$ is handled by the previous lemmas.  Finally, every
element of $\Alt 4$ (in fact, of $\Sym 4$) can be considered as a
permutation of the set
\begin{equation*}
  \Bigl\{
\bigl\{\{0,1\},\{2,3\}\bigr\},
\bigl\{\{0,2\},\{1,3\}\bigr\},
\bigl\{\{0,3\},\{1,2\}\bigr\}\Bigr\}.
\end{equation*}
Thus we get an epimorphism from $\Alt 4$ to $\Sym 3$ whose kernel is
therefore a proper nontrivial normal subgroup.
\end{proof}

The normal subgroup of $\Alt 4$ found in the proof is
  \begin{equation*}
    \gpgen{
      \begin{pmatrix}
	0 & 1
      \end{pmatrix}
      \begin{pmatrix}
	2 & 3
      \end{pmatrix}, 
      \begin{pmatrix}
	0 & 2
      \end{pmatrix}
      \begin{pmatrix}
	1 & 3
      \end{pmatrix}, 
      \begin{pmatrix}
	0 & 3
      \end{pmatrix}
      \begin{pmatrix}
	1 & 2
      \end{pmatrix}}.
  \end{equation*}
We can obtain it by considering $\Alt 4$ as the group of
rotational symmetries of the regular tetrahedron.  The vertices of
this tetrahedron can be taken as $4$ of the $8$ vertices of a cube:
say, the vertices with coordinates $(1,1,1)$, $(1,-1,-1)$, $(-1,1,-1)$,
and $(-1,-1,1)$.  Then a symmetry of the tetrahedron determines a
permutation of the $3$ coordinate axes, hence an element of $\Sym 3$.

\section{Determinants}\label{sect:det}

Let $R$ be a commutative ring.
We define the
function $X\mapsto\det(X)$ from $\MatR$ to $R$ by
\begin{equation*}
  \det((a^i_j)^{i<n}_{j<n})=\sum_{\sigma\in\Sym
    n}\sgn{\sigma}\prod_{i<n}a^i_{\sigma(i)}.
\end{equation*}

\begin{theorem}
  The function $X\mapsto\det(X)$ is a multiplicative homomorphism,
  that is,
  \begin{equation*}
    \det(XY)=\det(X)\det(Y).
  \end{equation*}
\end{theorem}

\begin{proof}
  We shall use the identity
  \begin{equation*}
    \prod_{i<k}\sum_{j<n}f(i,j)=\sum_{\phi\colon k\to n}\prod_{i<k}f(i,\phi(i)).
  \end{equation*}
Let $A=(a^i_j)^{i<n}_{j<n}$ and $B=(b^i_j)^{i<n}_{j<n}$.  Then
\begin{align*}
  \det(AB)
&=\det((\sum_{j<n}a^i_jb^j_k)^{i<n}_{k<n})\\
&=\sum_{\sigma\in\Sym n}\sgn{\sigma}\prod_{i<n}\sum_{j<n}a^i_jb^j_{\sigma(i)}\\
&=\sum_{\sigma\in\Sym n}\sgn{\sigma}\sum_{\phi\colon n\to
    n}\prod_{i<n}(a^i_{\phi(i)}b^{\phi(i)}_{\sigma(i)})\\
&=\sum_{\phi\colon n\to n}
\prod_{i<n}a^i_{\phi(i)}
 \sum_{\sigma\in\Sym n}\sgn{\sigma}\prod_{i<n}b^{\phi(i)}_{\sigma(i)}. 
\end{align*}
We shall eliminate from the sum those terms in any $\phi$ that is not
injective. 
Suppose $k<\ell<n$, but
$\phi(k)=\phi(\ell)$.  The function 
$\sigma\mapsto\sigma\circ
\begin{pmatrix}
  k&\ell
\end{pmatrix}$ is a bijection between $\Alt n$ and $\Sym
n\setminus\Alt n$.  Writing $\sigma'$ for $\sigma\circ
\begin{pmatrix}
  k&\ell
\end{pmatrix}$, we have
\begin{equation*}
\sum_{\sigma\in\Sym n}\sgn{\sigma}\prod_{i<n}b^{\phi(i)}_{\sigma(i)}
=\sum_{\sigma\in\Alt n}\sgn{\sigma}
(\prod_{i<n}b^{\phi(i)}_{\sigma(i)}-\prod_{i<n}b^{\phi(i)}_{\sigma'(i)}).
\end{equation*}
Each term of the last sum is $0$,
since $\sigma$ and $\sigma'$ agree on $n\setminus\{k,\ell\}$, while
\begin{equation*}
b^{\phi(k)}_{\sigma(k)}b^{\phi(\ell)}_{\sigma(\ell)}
= b^{\phi(\ell)}_{\sigma'(\ell)}b^{\phi(k)}_{\sigma'(k)}
= b^{\phi(k)}_{\sigma'(k)}b^{\phi(\ell)}_{\sigma'(\ell)}.
\end{equation*}
Therefore, continuing with the computation above, we have
\begin{equation*}
\det(AB)
=\sum_{\tau\in\Sym n}\prod_{i<n}a^i_{\tau(i)}
 \sum_{\sigma\in\Sym n}\sgn{\sigma}\prod_{i<n}b^{\tau(i)}_{\sigma(i)}.
\end{equation*}
Since each $\tau$ in $\Sym n$ permutes $n$, we have also
\begin{align*}
\prod_{i<n}b^{\tau(i)}_{\sigma(i)}
&=\prod_{i<n}b^i_{\sigma\tau\inv(i)},&
\sgn{\sigma}&=\sgn{\tau}\sgn{\sigma\tau\inv}.
\end{align*}
Putting this all together, we have
\begin{align*}
\det(AB)
&=\sum_{\tau\in\Sym n}\prod_{i<n}a^i_{\tau(i)}
 \sum_{\sigma\in\Sym n}\sgn{\tau}\sgn{\sigma\tau\inv}\prod_{i<n}b^i_{\sigma\tau\inv(i)}\\
&=\sum_{\tau\in\Sym n}\sgn{\tau}\prod_{i<n}a^i_{\tau(i)}
 \sum_{\sigma\in\Sym n}\sgn{\sigma\tau\inv}\prod_{i<n}b^i_{\sigma\tau\inv(i)}\\
&=\sum_{\tau\in\Sym n}\sgn{\tau}\prod_{i<n}a^i_{\tau(i)}
 \sum_{\sigma\in\Sym n}\sgn{\sigma}\prod_{i<n}b^i_{\sigma(i)}\\
 &=\det A\det B,
\end{align*}
since $\sigma\mapsto\sigma\tau\inv$ is a permutation of $\Sym n$.
\end{proof}

\begin{corollary}
  An element $A$ of $\MatR$ has an inverse only if $\det(A)\in\unit R$.
\end{corollary}

\begin{theorem}
  An element $A$ of $\MatR$ has an inverse if $\det(A)\in\unit R$.
\end{theorem}

\begin{proof}
Let $A=(a^i_j)^{i<n}_{j<n}$.  If $i<n$, then
\begin{align*}
  \det(A)
&=\sum_{\sigma\in\Sym
    n}\sgn{\sigma}\prod_{\ell<n}a^{\ell}_{\sigma(\ell)}\\
&=\sum_{\sigma\in\Sym
    n}\sgn{\sigma}a^i_{\sigma(i)}\prod_{\ell\in
    n\setminus\{i\}}a^{\ell}_{\sigma(\ell)}\\
&=\sum_{j<n}a^i_j
\sum_{\substack{\sigma\in\Sym n\\\sigma(i)=j}}\sgn{\sigma}\prod_{\ell\in
    n\setminus\{i\}}a^{\ell}_{\sigma(\ell)}\\
&=\sum_{j<n}a^i_jb^j_i,
\end{align*}
where
\begin{equation*}
  b^j_k=
\sum_{\substack{\sigma\in\Sym n\\\sigma(k)=j}}\sgn{\sigma}\prod_{\ell\in
    n\setminus\{k\}}a^{\ell}_{\sigma(\ell)}.
\end{equation*}
However, if $i\neq k$, then
\begin{align*}
  \sum_{j<n}a^i_jb^j_k
&=\sum_{j<n}a^i_j\sum_{\substack{\sigma\in\Sym
      n\\\sigma(k)=j}}\sgn{\sigma}\prod_{\ell\in 
    n\setminus\{k\}}a^{\ell}_{\sigma(\ell)}\\
&=\sum_{\sigma\in\Sym n}\sgn{\sigma}a^i_{\sigma(k)}\prod_{\ell\in 
    n\setminus\{k\}}a^{\ell}_{\sigma(\ell)}\\
&=\sum_{\sigma\in\Sym n}\sgn{\sigma}a^i_{\sigma(k)}a^i_{\sigma(i)}\prod_{\ell\in 
    n\setminus\{i,k\}}a^{\ell}_{\sigma(\ell)}=0,
\end{align*}
since the map $\sigma\mapsto\sigma\circ
\begin{pmatrix}
  i&k
\end{pmatrix}$ is a bijection between $\Alt n$ and $\Sym
n\setminus\Alt n$.
Thus
\begin{equation*}
  A(b^j_k)^{j<n}_{k<n}=(\det(A)\updelta^i_k)^{i<n}_{k<n}.
\end{equation*}
Finally,
\begin{align*}
  \sum_{j<n}b^i_ja^j_k
&=\sum_{j<n}\sum_{\substack{\sigma\in\Sym
      n\\\sigma(j)=i}}\sgn{\sigma}\prod_{\ell\in
    n\setminus\{j\}}a^{\ell}_{\sigma(\ell)}a^j_k\\
&=\sum_{\sigma\in\Sym n}\sgn{\sigma}\prod_{\ell\in
    n\setminus\{\sigma\inv(i)\}}a^{\ell}_{\sigma(\ell)}a^{\sigma\inv(i)}_k\\
&=\sum_{\sigma\in\Sym n}\sgn{\sigma}\prod_{\ell\in
    n\setminus\{i\}}a^{\sigma\inv(\ell)}_{\ell}a^{\sigma\inv(i)}_k,
\end{align*}
which is $\det(A)$ if $i=k$, but is otherwise $0$, so
\begin{equation*}
  (b^i_j)^{i<n}_{j<n}A=(\det(A)\updelta^i_k)^{i<n}_{k<n}.
\end{equation*}
In particular, if $\det(A)$ is invertible, then so is $A$, and
\begin{equation*}
  A\inv=(\det(A)\inv b^j_k)^{j<n}_{k<n}. \qedhere
\end{equation*}
\end{proof}


\section{Dihedral groups}

We can consider the elements of $n$ as vertices of a regular $n$-gon.
The group of symmetries of this object is called a \textbf{dihedral
  group}\index{dihedral group} and is denoted by
\begin{equation*}
\Dih n.
\end{equation*}
Formally, this is the subgroup $\gpgen{\sigma_n,\beta}$ of $\Sym n$,
where as in the last section $\sigma_n$ is the $n$-cycle
$\begin{pmatrix}
      0&1&\dots&n-1
    \end{pmatrix}$, while
\begin{equation*}
\beta=
\begin{cases}
      \begin{pmatrix}
      1&n-1
    \end{pmatrix}
    \begin{pmatrix}
      2&n-2
    \end{pmatrix}\dotsm
    \begin{pmatrix}
      m-1&m+1
    \end{pmatrix},& \text{if } n=2m,\\
    \begin{pmatrix}
      1&n-1
    \end{pmatrix}
    \begin{pmatrix}
      2&n-2
    \end{pmatrix}\dotsm
    \begin{pmatrix}
      m&m+1
    \end{pmatrix},&\text{ if }n=2m+1.
\end{cases}
\end{equation*}
Note that both $\beta$ and $\sigma_n\beta$ here have order $2$.

\begin{theorem}\label{thm:Dn}
If $n>2$, and $G=\gpgen{a,b}$, where $\order a=n$ and $\order
b=2=\order{ab}$, then $G\cong\Dih n$.
\end{theorem}

\begin{proof}
Assume $n\geq 2$.
Since $abab=\gid$ and $b\inv=b$, we have
\begin{align*}
  ba&=a\inv b,&
ba\inv&=ab.
\end{align*}
Therefore $ba^k=a^{-k}b$ for all integers
  $k$.  This shows 
\begin{equation*}
G=\{a^ib^j\setcolon(i,j)\in n\times 2\}.
\end{equation*}
It remains to show $\order G=2n$.
  Suppose 
  \begin{equation*}
  a^ib^j=a^kb^{\ell},
  \end{equation*}
  where $(i,j)$ and $(k,\ell)$ are in $n\times 2$.  Then 
  \begin{equation*}
  a^{i-k}=b^{\ell-j}.
  \end{equation*}
If $b^{\ell-j}=\gid$, then $\ell=j$ and $i=k$.  The alternative is that
 $b^{\ell-j}=b$.  In this case,
\begin{equation*}
n\divides2(i-k).  
\end{equation*}
If $n\divides i-k$, then $i=k$ and hence $j=\ell$.  The only other
possibility is that $n=2m$ for some $m$, and $i-k=\pm m$, so that $a^m=b$.  
But then $aa^maa^m=a^2$, while $abab=\gid$, so $n=2$.
\end{proof}


\chapter{Category theory}

\section{Products and sums}\label{sect:prod-sum}

\begin{theorem}\label{thm:prod}
  Let $G_0$, $G_1$ and $H$ be groups.  For each $i$ in $2$, let
  $\uppi_i$ be the homomorphism $(x_0,x_1)\mapsto x_i$ from $G_0\times
  G_1$ to $G_i$, and let $f_i$ be a homomorphism from $H$ to $G_i$.
Then there is a homomorphism
\begin{equation*}
  x\mapsto(f_0(x),f_1(x))
\end{equation*}
 from $H$ to $G_0\times G_1$, and this 
 is the unique homomorphism $f$ from $H$ to $G_0\times G_1$ such
  that, for each $i$ in~$2$,
  \begin{equation*}
    \uppi_if=f_i
  \end{equation*}
---that is, the following diagram commutes:
\begin{equation*}
  \xymatrix{
G_0 & \ar[l]_-{\uppi_0} G_0\times G_1 \ar[r]^-{\uppi_1} & G_1\\
& \ar[ul]^{f_0} \ar[u]_f H \ar[ur]_{f_1} &
}
\end{equation*}
\end{theorem}

\begin{proof}
If $u\in G_0\times G_1$, then $u=(\uppi_0(u),\uppi_1(u))$.  Hence,
  if $f\colon H\to G_0\times G_1$, then $f(x)=(\uppi_0f(x),\uppi_1f(x))$.
  In particular then, $f$ is as desired if and only if
  $f(x)=(f_0(x),f_1(x))$. 
\end{proof}

We can generalize this theorem by considering an indexed family
$(G_i\colon i\in I)$ of groups.  The \textbf{direct product}\index{direct product}
of this family is denoted by
\begin{equation*}
  \prod_{i\in I}G_i.
\end{equation*}
This is, first of all, the set
whose elements are $(x_i\colon i\in I)$ (that is, functions $i\mapsto x_i$ on~$I$) such
that $x_i\in G_i$ for each $i$ in $I$.  An operation of multiplication
on this set is given by
\begin{equation*}
  (x_i\colon i\in I)(y_i\colon i\in I)=(x_iy_i\colon i\in I).
\end{equation*}
Under this multiplication, $\prod_{i\in I}G_i$ becomes a group.
If $i\in I$, we define a homomorphism $\uppi_i$ from $\prod_{i\in I}G_i$
to $G_i$ by
\begin{equation*}
  \uppi_i(x_j\colon j\in I)=x_i.
\end{equation*}
In case $I=n$, we may write $\prod_{i\in I}G_i$ also as
\begin{equation*}
  G_0\times\cdots\times G_{n-1},
\end{equation*}
and a typical element of this as
\begin{equation*}
  (x_0,\dots,x_{n-1}).
\end{equation*}
To the previous theorem we have:

\begin{porism}
  Suppose $(G_i\colon i\in I)$ is an indexed family of groups, and $H$
  is a group, and for each $i$ in $I$ there is a homomorphism from $H$
  to $G_i$.  Then there is a homomorphism
  \begin{equation*}
    x\mapsto(f_i(x)\colon i\in I)
  \end{equation*}
 from $H$ to
  $\prod_{i\in I}G_i$, and this
is the unique homomorphism $f$ from $H$ to
  $\prod_{i\in I}G_i$ such that, for each $i$ in~$I$,
  \begin{equation*}
    \uppi_if=f_i.
  \end{equation*}
\end{porism}

The direct product of a family of abelian groups is an abelian group.
When we restrict attention to abelian groups, then we can
reverse the arrows in Theorem~\ref{thm:prod}: 

\begin{theorem}\label{thm:oplus}
  Let $G_0$, $G_1$ and $H$ be abelian groups.  Let $\iota_0$ be the
  homomorphism $x\mapsto(x,0)$ from $G_0$ to $G_0\oplus G_1$, and let
  $\iota_1$ be $x\mapsto(0,x)$ from $G_1$ to $G_0\oplus G_1$.
For each $i$ in $2$, let $f_i$ be a homomorphism from $G_i$ to $H$.
Then there is a homomorphism
\begin{equation*}
  (x_0,x_1)\mapsto f_0(x_0)+f_1(x_1)
\end{equation*}
 from $G_0\oplus G_1$ to $H$, and this is
 the unique homomorphism $f$ from $G_0\oplus G_1$ to $H$ such
  that, for each $i$ in~$2$,
  \begin{equation*}
    f\iota_i=f_i
  \end{equation*}
---that is, the following diagram commutes:
\begin{equation*}
  \xymatrix{
G_0 \ar[r]^-{\iota_0} \ar[dr]_{f_0} & G_0\oplus G_1 \ar[d]^f &
\ar[l]_-{\iota_1} \ar[dl]^{f_1} G_1\\ 
& H &
}
\end{equation*}
\end{theorem}

\begin{proof}
Every element $(x_0,x_1)$ of $G_0\oplus G_1$ is
$\iota_0(x_0)+\iota_1(x_1)$, so that, if $f$ is a homomorphism on $G_0\oplus G_1$, then
\begin{equation}\label{eqn:f+}
  f(x_0,x_1)=f\iota_0(x_0)+f\iota_1(x_1).
\end{equation}
Hence $f$ is as desired if and only if
$f(x_0,x_1)=f_0(x_0)+f_1(x_1)$.  The function so defined is indeed a
homomorphism, since 
\begin{multline*}
  f((x_0,x_1)+(u_0,u_1))
=f(x_0+u_0,x_1+u_1)
=f_0(x_0+u_0)+f_1(x_1+u_1)\\
\begin{aligned}
&=f_0(x_0)+f_0(u_0)+f_1(x_1)+f_1(u_1)\\
&=f_0(x_0)+f_1(x_1)+f_0(u_0)+f_1(u_1)
=f(x_0,x_1)+f(u_0,u_1),
\end{aligned}
\end{multline*}
because $H$ is abelian.
\end{proof}

In the proof, the definition of $f$ in~\eqref{eqn:f+} relies on the
\emph{finiteness} of the family $(G_i\colon i\in 2)$; more precisely,
it relies on the finiteness of $\{i\in 2\colon x_i\neq e)$.  Of an
arbitrary indexed family $(G_i\colon i\in I)$ of groups, we define the
\textbf{\emph{weak} direct product}\index{weak direct product} to be
the subgroup, denoted by  
\begin{equation*}
\weakprod_{i\in I}G_i,
\end{equation*}
of $\prod_{i\in I}G_i$ comprising those elements $(x_i\colon i\in I)$ such that $\{i\in I\colon x_i\neq\gid\}$ is finite.  
We define a homomorphism $\iota_i$ from each $G_i$ to
$\textweakprod_{j\in I}G_j$ by
\begin{equation*}
\iota_i(x)=(x_j\colon j\in I), 
\end{equation*}
where
\begin{equation*}
  x_j=
  \begin{cases}
    x,&\text{ if }j=i;\\
\gid,&\text{ otherwise.}
  \end{cases}
\end{equation*}
If $I$ is finite, then the weak direct product is the same as the (full)
direct product.  If $I$ is infinite, and the groups $G_i$ are nontrivial for infinitely many $i$ in $I$, then the weak direct product is \emph{not} the same as the direct product; but the proof uses the Axiom of Choice.\label{ac}

Proving that $f$ as in~\eqref{eqn:f+} is a \emph{homomorphism} uses that $H$ is abelian.
The weak direct product of a family $(G_i\colon i\in I)$ of abelian groups is called the \textbf{direct sum}\index{direct sum} and is denoted by 
\begin{equation*}
  \sum_{i\in I}G_i.
\end{equation*}
In case $I=n$, we may write $\sum_{i\in I}G_i$ also as
\begin{equation*}
  G_0\oplus\cdots\oplus G_{n-1}.
\end{equation*}
To the previous theorem we have:

\begin{porism}
  Suppose $(G_i\colon i\in I)$ is an indexed family of abelian groups, and $H$
  is an abelian group, and for each $i$ in $I$ there is a homomorphism $f_i$ from
  $G_i$ to $H$.  Then the map
  \begin{equation*}
    x\mapsto\sum_{i\in I}f_i(x_i)
  \end{equation*}
  from $\sum_{i\in I}G_i$ to $H$ is the unique homomorphism $f$ from
  $\sum_{i\in I}G_i$ to $H$
  such that, for each $i$ in~$I$, 
  \begin{equation*}
    f\iota_i=f_i.
  \end{equation*}
\end{porism}

Now we can provide an example promised in \S \ref{sect:rings}.  Let
$E$ be the abelian group $\sum_{n\in\upomega}\Z$. 
Suppose $f$ is a singulary operation on $\upomega$.  An element
$f^*$ of $\End E$ is induced, given by
\begin{equation*}
  f^*(x_n\colon n\in\upomega)=(x_{f(n)}\colon n\in\upomega).
\end{equation*}
Then $f^*\iota_{f(n)}=\iota_n$.
Let
$f$ be the operation $x\mapsto x+1$ on $\upomega$, and let $g$ be the
operation given by
\begin{equation*}
  g(x)=
  \begin{cases}
    y,&\text{ if }f(y)=x,\\
    0,&\text{ if }x=0.
  \end{cases}
\end{equation*}
Then $gf(x)=x$, so $f^*g^*=(gf)^*$, the identity in $\End E$; but
$g^*f^*$ is not the identity, since it is $(fg)^*$, and
$fg(0)=1=fg(1)$. 

We have two kinds
of products so far, related as follows.

\begin{theorem}
Let $(G_i\colon i\in I)$ be an indexed family of groups.  Then
\begin{align*}
   \iota_j[G_j]&\nsubgp\weakprod_{i\in I}G_i,&  
\weakprod_{i\in I}G_i&\nsubgp\prod_{i\in I}G_i,&
   \iota_j[G_j]&\nsubgp\prod_{i\in I}G_i.\qedhere
\end{align*}
\end{theorem}

Theorem~\ref{thm:oplus} and its porism can be generalized to some
cases of arbitrary groups:

\begin{theorem}
  Suppose $(G_i\colon i\in I)$ is an indexed family of groups, and $H$
  is a group, and for each $i$ in $I$ there is a homomorphism $f_i$ from
  $G_i$ to $H$.  Suppose further that, for all $i$ and $j$ in $I$,
  \begin{equation*}
    f_i(x)f_j(y)=f_j(y)f_i(x).
  \end{equation*}
Then the map
\begin{equation*}
  x\mapsto\prod_{i\in I}f_i(x_i)
\end{equation*}
from $\textweakprod_{i\in I}G_i$ to $H$ is the unique
homomorphism $f$
from $\textweakprod_{i\in I}G_i$ to $H$
such that, for each $i$ in~$I$, 
  \begin{equation*}
    f\iota_i=f_i.
  \end{equation*}
\end{theorem}

As a special case of this theorem, we have the next theorem below, by
means of the following:

\begin{lemma}
  If $M$ and $N$ are normal subgroups of $G$, and 
  \begin{equation*}
  M\cap N=\gpgen{\gid},
  \end{equation*}
  then each element $m$ of $M$ commutes with each element $n$ of $N$,
  that is,
  \begin{equation*}
  mn=nm.
  \end{equation*}
\end{lemma}

\begin{proof}
We can analyze $mnm\inv n\inv$ both as the element
$(mnm\inv)n\inv$ of $N$ and as the element $m(nm\inv n\inv)$ in $M$; so
  the element is $\gid$, and therefore
$mn=(m\inv n\inv)\inv=nm$.
\end{proof}

\begin{theorem}\label{thm:wdp}
  If $(N_i\colon i\in I)$ is an indexed family of normal subgroups of
  a group, and for each $j$ in $I$,
\begin{equation}
  N_j\cap\Bigl\langle\bigcup_{i\in
  I\setminus\{j\}}N_i\Bigr\rangle=\gpgen{\gid},
\end{equation}
then 
\begin{equation*}
\Bigl\langle\bigcup_{i\in I}N_i\Bigr\rangle
\cong\weakprod_{i\in I}N_i.
\end{equation*}
\end{theorem}

\begin{proof}
Say the $N_i$ are normal subgroups of $G$.
Since $N_i\cap N_j=\gpgen{\gid}$ whenever $i\neq j$,
the last theorem and the lemma guarantee that there is a homomorphism
$h$ from $\textweakprod_{i\in I}N_i$ 
  into $G$ such that, for each $i$ in $I$, the composition
  $h\iota_i$ is just the inclusion of $N_i$ in $G$.  Then the range of $h$ is
$\Bigl\langle\bigcup_{i\in I}N_i\Bigr\rangle$.
To show that $h$ is injective, note that, if $n\in\textweakprod_{i\in
  I}N_i$ and $h(n)=\gid$, then, for each $j$ in $I$, we have
\begin{equation*}
  n_j{}\inv=\prod_{i\in I\setminus\{j\}}n_i.
\end{equation*}
The left member is in $N_j$, the right in
$\Bigl\langle\bigcup_{i\in
  I\setminus\{j\}}N_i\Bigr\rangle$, so each side is $\gid$; in
particular, $n_j=\gid$.  Therefore $n=\gid$.
\end{proof}

In the conclusion of the theorem, $G$ is the
\textbf{\emph{internal} weak direct product}\index{internal weak
  direct product} of the~$N_i$. 


\section{Free groups}

The direct sum $\sum_{i\in I}\Z$ has elements $\gid^i$, namely $\iota_i(1)$ or $(\updelta_j^i\colon j\in I)$, where
\begin{equation*}
\updelta_j^i=
\begin{cases}
1,&\text{ if }j=i,\\
0,&\text{ otherwise.}
\end{cases}
\end{equation*}
An arbitrary element of $\sum_{i\in I}$ is a \textbf{`formal
  sum',}\index{formal sum}
\begin{equation*}
\sum_{i\in I}x_i\gid^i.
\end{equation*}

\begin{theorem}\label{thm:free-ab}
  Suppose $G$ is an abelian group, $I$ is a set, and $f$ is a
  function from $I$ to $G$.  Then the map
  \begin{equation*}
  \sum_{i\in I}x_i\gid^i\mapsto\sum_{i\in I}x_if(i)
  \end{equation*}
 from $\sum_{i\in I}\Z$ to $G$ is the unique homomorphism $\tilde f$ from $\sum_{i\in I}$ to $G$ such that, for each $i$ in~$I$,
  \begin{equation*}
    \tilde f(\gid^i)=f(i)
  \end{equation*}
---that is, the following diagram commutes, where $\iota$ is the map $i
\mapsto\gid^i$:  
\begin{equation*}
  \xymatrix{
I \ar[r]^-{\iota} \ar[d]_f & \ar[dl]^{\tilde f} \displaystyle\sum_{i\in I}\Z \\
G &
}
\end{equation*}
\end{theorem}

The direct sum $\sum_{i\in I}\Z$ in the theorem is the \textbf{free
  abelian group}\index{free abelian group} on $I$ with respect to the map
$i\mapsto\gid^i$.  
There is also a \textbf{free group}\index{free group} on $I$, which we may denoted by
\begin{equation*}
  \free I.
\end{equation*}
This is the group of \emph{reduced words} on $I$.  A \textbf{word}\index{word} on $I$ is a 
finite nonempty string $t_0t_1\cdots t_n$,
where
each entry $t_k$ is either $\gid$, or else $a$ or $a\inv$ for some $a$
in $I$.  A word is \textbf{reduced}\index{reduced} if
$a$ and $a\inv$ are never adjacent in it, and
$\gid$ is never adjacent to any other entry (so $\gid$ can appear only
  in the string $\gid$).
We make $\free I$
  into a group when the multiplication is defined as juxtaposition followed by \textbf{reduction,}\index{reduction} namely, replacement of each occurrence of $aa\inv$ or $a\inv a$ with $\gid$, and replacement of each occurrence of $x\gid$ or $\gid x$ with $x$.  Thus,
  when an element $a$ of $I$ is written as $a^{+1}$, we have
  \begin{equation*}
    (a_{m}^{\epsilon(m)}\cdots a_{0}^{\epsilon(0)})
(b_{0}^{\zeta(0)}\cdots b_{n}^{\zeta(n)})=
a_{m}^{\epsilon(m)}\cdots a_{j}^{\epsilon(j)}b_j^{\zeta(j)}\cdots
b_{n}^{\zeta(n)},
  \end{equation*}
where $j$ is maximal such that, if $i<j$, then
 $a_i^{\epsilon(i)}= b_i^{-\zeta(i)}$.  We consider $I$ as a
subset of $\free I$.  An element of the latter other than $\gid$ can be written also as
\begin{equation*}
a_0{}^{n(0)}\cdots a_m{}^{n(m)},
\end{equation*}
where $a_i$ and $a_{i+1}$ are always distinct elements of $I$, and each $n(i)$ is in $\Z\setminus\{0\}$.

\begin{theorem}\label{thm:free-gp}
  Suppose $G$ is a group, $I$ is a set, and $f$ is a
  function from $I$ to $G$.  Then the map
  \begin{equation*}
  a_0^{\epsilon(0)}\cdots
  a_n^{\epsilon(n)}\mapsto f(a_0)^{\epsilon(0)}\cdots
  f(a_n)^{\epsilon(n)} 
\end{equation*}
  from $\free I$ to $G$ is
  the unique homomorphism
  $\tilde f$ from $\free I$ to $G$ such that
  \begin{equation*}
    \tilde f\restriction I=f
  \end{equation*}
---that is, the following diagram commutes, where $\iota$ is the
inclusion of $I$ in $\free I$:  
\begin{equation*}
  \xymatrix{
I \ar[r]^-{\iota} \ar[d]_f & \ar[dl]^{\tilde f} \free I \\
G &
}
\end{equation*}
\end{theorem}

The \textbf{free product}\index{free product} of a family
$(G_i\setcolon i\in I)$ of groups is the group, denoted by
  \begin{equation*}
    \freeprod_{i\in I}G_i,
  \end{equation*}
  comprising the string $\gid$ together with strings   
   $t_0\cdots t_m$, where each entry $t_i$ is an ordered pair
  $(g,n(i))$ such that $n(i)\in I$ and $g\in
  G_{n(i)}\setminus\{\gid\}$, and $n(i)\neq n(i+1)$.  This complicated
  definition allows for the possibility that $G_i$ might be the same
  as $G_j$ for some distinct $i$ and $j$; the groups $G_i$ and $G_j$
  must be considered as distinct in the formation of the free
  product. 
Multiplication on $\textfreeprod_{i\in I}G_i$, as on $\free I$, is
juxtaposition followed by reduction, so that if $(g,i)$ is followed
directly by $(h,i)$, then they are replaced with $(gh,i)$, and all
instances of $(\gid,i)$ are deleted, or replaced with $\gid$ if there
is no other entry. 
Each $G_j$ embeds in $\textfreeprod_{i\in I}G_i$ under $\iota_j$,
namely $x\mapsto(x,j)$.  We now have 
the following analogue of the porism to Theorem~\ref{thm:oplus}.

\begin{theorem}\label{thm:free-prod}
  Let $(G_i\colon i\in I)$ be an indexed family of groups, and let $H$
  be a group.  Suppose for each $i$ in $I$ there is a homomorphism
  $f_i$ from $G_i$ to $H$.  Then there is a homomorphism
    \begin{equation*}
    (g_0,n(0))\cdots(g_m,n(m))\mapsto f_{n(0)}(g_0)\cdots f_{n(n)}(g_n)
  \end{equation*}
from $\textfreeprod_{i\in I}G_i$ to $H$; this is the unique
homomorphism $f$ from $\textfreeprod_{i\in I}G_i$ to $H$ such that, for each $i$ in $I$,
\begin{equation*}
f\iota_i=f_i
\end{equation*}
---that is, the following diagram commutes:
\begin{equation*}
  \xymatrix{
G_j \ar[dr]_{f_j} \ar[r]^-{\iota_j} & \displaystyle\freeprod_{i\in I}G_i \ar[d]^f\\
& H
}
\end{equation*}
\end{theorem}

\section{Categories}\label{sect:category}


For any two groups $G$ and $H$ there is a set
\begin{equation*}
  \Hom{G,H}
\end{equation*}
comprising the homomorphisms from $G$ to $H$.  There is a map
\begin{equation*}
  (g,f)\mapsto g\circ f
\end{equation*}
from $\Hom{H,K}\times\Hom{G,H}$ to $\Hom{G,K}$,
and there is an element $\id_H$ of $\Hom {H,H}$, such that
\begin{equation*}
  \id_H\circ f=f,\quad g\circ\id_H=g,\quad k\circ(g\circ f)=(k\circ
  g)\circ f
\end{equation*}
whenever $f\in\Hom{G,H}$, $g\in\Hom{H,K}$, and $k\in\Hom{K,L}$.
Understood in this way, groups with their 
homomorphisms compose a prototypical example of a \emph{category.}

A \textbf{directed graph}\index{directed graph} is a certain kind of quadruple 
\begin{equation*}
  (\class C_0,\class C_1,t,h),
\end{equation*}
where $\class C_0$ and $\class C_1$ are classes, and $t$
and $h$ are 
functions from $\class C_1$ to $\class C_0$.  We may refer to each
element of $\class C_0$ as a \textbf{node,}\index{node} and to each element of
$\class C_1$ as an \textbf{arrow.}\index{arrow}  If $a$ is an arrow, then $t(a)$ is its
\textbf{tail,}\index{tail} and $h(a)$ is its \textbf{head,}\index{head} and $a$ is an
arrow \textbf{from}
$t(a)$ \textbf{to} $t(b)$.  If $f$ is an arrow from $A$ to $B$, we
may express this by writing 
\begin{align*}
f&\colon A\longrightarrow B&
&\text{ or }&
&A\overset{f}{\longrightarrow}B.  
\end{align*}
We require the arrows from $A$ to $B$ to compose a \emph{set} (as
opposed to a proper class, like the class of all sets that do not
contain themselves).  We can define
\begin{equation*}
  \class C_2=\{(f,g)\in\class C_1{}^2\setcolon t(f)=h(g)\};
\end{equation*}
this is the class of paths of length $2$.
More generally,
\begin{equation*}
\class C_{n+1}=\Bigl\{(f_0,\dots,f_n)\in\class
G_1{}^{n+1}\setcolon \bigwedge_{i<n}t(f_i)=h(f_{i+1})\Bigr\}. 
\end{equation*}
The graph above is a \textbf{category}\index{category}
if there are
\begin{compactenum}
  \item
a function
$A\mapsto\id_A$ from $\class C_0$ to $\class C_1$, and
\item
a function $(f,g)\mapsto f\circ g$ from $\class C_2$ to $\class C_1$,
\end{compactenum}
such that
\begin{align*}
t(\id_A)&=A=h(\id_A),&
t(f\circ g)&=t(g),&
h(f\circ g)&=h(f),
\end{align*}
and also
\begin{align}\label{eqn:cat}
  f\circ\id_{t(f)}&=f,&
  \id_{h(g)}\circ g&=g,&
h\circ(g\circ f)&=(h\circ g)\circ f
\end{align}
whenever these are defined.  In particular then, the category is a
sextuple
\begin{equation}\label{cat}
  (\class C_0,\class C_1,t,h,\id,\circ),
\end{equation}
meeting the conditions that we have discussed.  In this case, the nodes---the elements of $\class C_0$---are called
\textbf{objects.}%
\index{object}
Conditions~\eqref{eqn:cat} can be
diagrammed as follows.
\begin{align*}
  &\xymatrix{
A \ar[r]^f \ar[d]_f & B \ar[d]^g\\
B \ar[ur]|{\id_B} \ar[r]_g & C}&
&\xymatrix{
B \ar[d]_g & A \ar[l]_f \ar[dl]|{g\circ f} \ar[d]%|{h\circ g\circ f} 
\ar[r]^f & B \ar[dl]|{h\circ g} \ar[d]^g\\
C \ar[r]_h & D & C \ar[l]^h}
\end{align*}
These are \textbf{commutative diagrams}\index{commutative diagram}%
\index{diagram!commutative ---}
in the sense that any two paths from one vertex to another represent
the same arrow.\footnote{One can define commutative diagrams
    formally.  A \textbf{diagram}\index{diagram} is a
homo\-morph\-ism from a directed graph to a category.  One then thinks of the
diagram as the graph with its nodes and arrows labelled with their
images in the category.  The diagram is
\textbf{commutative} if every 
path in the graph with the same tail and head is sent to the same
arrow in the category.}
The
arrows of a category are also called \textbf{morphisms.}\index{morphism}  
The class of morphisms from $A$ to $B$ can be denoted by
\begin{equation*}
  \Hom{A,B}.
\end{equation*}
The morphism
$f\circ g$ is the \textbf{composite}\index{composite} of $f$ and $g$.

A
category is \textbf{concrete}\index{concrete} if each of its objects has an underlying
set and the morphisms are functions in the way suggested by the notation.
For example,
  the class of sets, with the class of functions, is a concrete
  category; likewise the class of groups, with homomorphisms, and the
  class of topological spaces, with continuous functions. 
However, not all categories are concrete.  For example,
if $G$ is a group, then its elements can be considered as objects of
  a category in which $\Hom{a,b}=\{ba\inv\}$, $\id_a=1$, and $c\circ
  d=cd$.


In a category, a morphism $f$ is an \textbf{isomorphism}\index{isomorphism} if
\begin{equation*}
  g\circ f=\id_{t(f)}\quad\text{ and }\quad f\circ g=\id_{h(f)}
\end{equation*}
for some morphism $g$; then $g$ is an \textbf{inverse}\index{inverse} of $f$.

\begin{theorem}
  In a category, inverses are unique.
\end{theorem}

\begin{proof}
  If $g$ and $h$ are inverses of $f$, then
  $g=g\circ\id_{h(f)}=g\circ(f\circ h)=(g\circ f)\circ h=
  \id_{t(f)}\circ h=h$.
\end{proof}

If it exists, then the inverse of $f$ is $f\inv$.  It is immediate
then that $(f\inv)\inv=f$.


Suppose we have an arbitrary category as in~\eqref{cat}
and an element $(A_i\colon i\in I)$ or
$A$ of $\class C_0{}^I$ for some index-set $I$.  If it exists, the
\textbf{product}\index{product} of $A$ in the category is an element
\begin{equation*}
  \Bigl(\prod A,i\mapsto\uppi_i\Bigr)
\end{equation*}
of $\class C_0\times\class C_1{}^I$, where
\begin{equation*}
  \uppi_i\colon\prod A\to A_i
\end{equation*}
for each $i$ in $I$, such that,
whenever $(B,i\mapsto f_i)\in\class C_0\times\class C_1{}^I$, where
$f_i\colon B\to A_i$ for each $i$ in $I$, then there is a \emph{unique}
morphism $f$ from $B$ to $\prod A$ such that
\begin{equation*}
  \uppi_i\circ f=f_i
\end{equation*}
for each $i$ in $I$.  Again this condition is expressed by a
commutative diagram.
\begin{equation*}
  \xymatrix%@!
{
&\prod A \ar[d]^{\uppi_j}\\
H\ar[ur]^f \ar[r]_{f_j} &A_j}
\end{equation*}
The morphisms $\uppi_i$ are the \textbf{canonical
  projections.}\index{projection}\index{canonical!--- projection}

\begin{theorem}
  Any two products of the same family of objects in the same category
  are isomorphic.\hfill\qedsymbol 
\end{theorem}

The porism to Theorem~\ref{thm:prod} is that direct products are
products in the category of groups \emph{and} in the category of
abelian groups.

Every category has a \textbf{dual,}%
\index{category!dual ---}%
\index{dual category}
in which the arrows are reversed.  To be precise, the dual of
$(\class C_0,\class C_1,t,h,\id,\circ)$ is
$(\class C_0,\class C_1,h,t,\id,\circ')$, where $f\circ'g=g\circ f$.
A \textbf{co-product}\index{product!co---} or \textbf{sum}\index{sum} in a
category is a product in the 
dual.  The co-product of $A$ may be denoted by
\begin{equation*}
  \Bigl(\coprod A,i\mapsto\iota_i\Bigr)\quad \text{ or }\quad\Bigl(\sum
  A,i\mapsto\iota_i\Bigr); 
\end{equation*}
the morphisms $\iota_i$ are the \textbf{canonical
  injections.}%
\index{injection}%
\index{canonical!--- injection}
The relevant commutative diagram is the following.
\begin{equation*}
  \xymatrix{
A_j\ar[r]^{f_j} \ar[d]_{\iota_j} & H\\
\coprod A \ar[ur]_f&}
\end{equation*}
Thus the coproduct of an indexed family of objects should be the
`simplest' object that contains all
of the `information' contained in each of the original objects.

The porism to Theorem~\ref{thm:oplus} is that direct sums are
coproducts in the category of abelian groups.
Theorem~\ref{thm:free-prod} is that free products are coproducts in
the category of groups.

Suppose $F$ is an object in a concrete category and $\fggen$ is a set.
Then $F$ is called 
\textbf{free}%
\index{free!--- group}%
\index{group!free ---} on $\fggen$
with respect to a function $\iota$ from $\fggen$ to $F$ if for any
function $f$ from 
$\fggen$ to an object $B$, there is a unique morphism
$\tilde f$ from $F$ to $B$ such that 
\begin{equation*}
  \tilde f\circ\iota=f.
\end{equation*}
That is, the
following diagram 
commutes (where the nodes
and arrows, except $\tilde f$, are from the category of sets):
\begin{equation*}
\xymatrix{
\fggen \ar[r]^{\iota} \ar[dr]_f & F \ar[d]^{\tilde f} \\
&B
}
\end{equation*}

Theorem~\ref{thm:free-ab} shows that free objects exist in the
category of abelian groups; Theorem~\ref{thm:free-gp}, in the category
of groups. 

\section{Presentation of groups}

\begin{theorem}
  Every group is isomorphic to a quotient of a free group.
\end{theorem}

\begin{proof}
Since every group $G$ is an image of the free group $\free G$, the claim follows by the First Isomorphism Theorem (a corollary to Theorem~\ref{thm:hom-n}).
\end{proof}

Suppose $G$ is a group, $A$ is a set, $f\colon A\to G$, and $G=\gpgen{f(a)\colon a\in A}$.  Suppose further
$B\included\free A$, and $N$ is the intersection of the set of normal subgroups of $\free A$ that include $B$.
The quotient $F/N$, denoted by
\begin{equation*}
  \gpres AB,
\end{equation*}
is referred to as the group with \textbf{generators}\index{generator} $A$ and
\textbf{relations}\index{relation} $B$, even though, strictly, $F/N$
here is generated, not by (the elements of) $A$, but by the cosets
$aN$, where $a\in A$.  If there is an isomorphism from $\gpres AB$ to
$G$ taking each of these cosets $aN$ to $f(a)$, then $\gpres AB$ is
called a \textbf{presentation}\index{presentation} of $G$.   

In this definition, rather than assuming $A\included G$, we use the
map $f$ so as to allow the possibility that $f$ is not injective.
Also, if $A=\{a_0,\dots,a_n\}$, and $B=\{w_0,\dots,w_m\}$, then
$\gpres AB$ can be written as $\gpres{a_0,\dots,a_n}{w_0,\dots,w_m}$. 

For example, $\free A$ can be presented as $\gpres A{\emptyset}$, and
in particular $\Z$ can be presented as $\gpres a{\emptyset}$, but also
as $\gpres{a,b}{ab\inv}$. 
The group $\Zmod n$ has the presentation $\gpres a{a^n}$.

\begin{theorem}[von Dyck\footnote{Walther von Dyck (1856--1934) gave an
      early (1882--3) definition of abstract groups \cite[ch.~49,
  p.~1141]{MR0472307}.}]\label{thm:vD}
   Suppose $G$ is a group, $A$ is a set, and $f\colon A\to G$, and let
   $\tilde f$ be the induced homomorphism from $\free A$ to $G$.
   Suppose further $B\included\free A$ and $\gpres AB=F/N$.  If
   $\tilde f(w)=\gid$ for each $w$ in $B$, then there is a
   well-defined homomorphism $g$ from $\gpres A B$ to $G$ such that
   $g(aN)=f(a)$ for each $a$ in $A$.  If $G=\gpgen{f(a)\colon a\in
     A}$, then $g$ is an epimorphism. 
  \begin{equation*}
  \xymatrix{
  A \ar[r]^f \ar[d] & G \\
  \free A \ar[ur]|{\tilde f} \ar[r]_{\uppi} & \gpres AB \ar[u]_g
%    \xymatrix{
%F \ar[r]^f \ar[d]_h & G\\
%\gpres{\fggen}{\gprels} \ar[ur]_g &
}
  \end{equation*}
\end{theorem}

\begin{proof}
By definition of $N$, it is included in the kernel of $\tilde f$, so
$g$ is well-defined by Theorem~\ref{thm:hom-n}. 
\end{proof}

\begin{theorem}
If $n>2$, then $\Dih n$ has the presentation $\gpres{a,b}{a^n,b^2,abab}$.
\end{theorem}

\begin{proof}
Let $G=\gpres{a,b}{a^n,b^2,abab}$.  Then the order of (the image of)
$a$ in $G$ divides $n$, and the order of $b$ divides $2$. 
But by von Dyck's Theorem and Theorem~\ref{thm:Dn}, $G$ maps onto
$\Dih n$, and hence $n$ divides the order of $a$ in $G$, and $2$
divides the order of $b$.  Therefore 
$\Dih n\cong G$.
\end{proof}

\begin{theorem}
The group $\gpres{\mi,\mj}{\mi^4,\mi^2\mj^2,\mi\mj\mi^3\mj}$ has order
$8$, and its elements are (the images of) $\pm 1$, $\pm\mi$, $\pm\mj$,
$\pm\mk$, where $1=\gid$ and $\mk=\mi\mj$ and $-x=\mi^2x$. 
\end{theorem}

\begin{proof}
Let the group be called $G$.  In $G$, we have $\mj^2=\mi^{-2}=\mi^2$,
so $\mj^4=1$.  Hence also $\mk=\mi\mj=\mj^3\mi$, so $\mi^3\mj=\mj\mi$.
This shows that every element of $G$ can be written as $\mi^n\mj^m$,
where $n\in4$ and $m\in 2$; hence it is one of the given elements. 
\end{proof}


\section%[Fin.~gen.~ab.~groups]
{Finitely generated abelian groups} 

To \textbf{classify}\index{classify} a collection of groups is to find a function $f$ such
that
\begin{equation*}
  f(G)=f(H)\iff G\cong H
\end{equation*}
for all groups $G$ and $H$ in the collection.  We do this now with the
finitely generated abelian groups, and in particular with the finite
abelian groups.  The next theorem will be needed for
Theorem~\ref{thm:Zp-cross}. 


\begin{theorem}\label{thm:fin-gen-ab}
  For every abelian group $G$ on $n$ generators, there is a unique
  element $k$ of $n$, along with positive integers $d_0$, \dots,
  $d_{k-1}$, where  
  \begin{equation}\label{d}
  d_0\divides\dotsm\divides d_{k-1},
  \end{equation}
such that
  \begin{equation}\label{FH}
G\cong    \Zmod{d_0}\oplus\dotsb\oplus\Zmod{d_{k-1}}\oplus
    \underbrace{\Z\oplus\dotsb\oplus\Z}_{n-k}.
  \end{equation}
\end{theorem}

\begin{proof}
%Suppose $G$ is an abelian group with a generating set of size $n$.  
Let $F$ be the free abelian group $\sum_{i\in n}\Z$.  Then
\begin{equation*}
  G\cong F/N,
\end{equation*}
where $N$ is the kernel of the induced epimorphism from $F$ onto $G$.
As before, each element of $F$ can be understood as a formal sum $\sum_{i\in n}x_i\gid^i$.  Then $F$ itself is $\gpgen{\gid^0,\dots,\gid^{n-1}}$.
If $N=\gpgen{d_0\gid^0,\dots,d_{k-1}\gid^{k-1}}$, then $G$ is as in~\eqref{FH}.
Not every subgroup of $F$ is given to us so neatly, but we can use linear algebra to put it into this form.
Every element of $F$, considered as a formal sum, can be written also as a matrix product:
\begin{equation*}
  x_0a^0+\dotsb+x_{n-1}a^{n-1}=
  \begin{pmatrix}
  x_0&\cdots&x_{n-1}
  \end{pmatrix}
  \begin{pmatrix}
  {\gid}^0\\\vdots\\{\gid}^{n-1}
  \end{pmatrix}=\tuple x\mathbf e.
\end{equation*}
%for some \emph{unique} $n$-tuple $(x_0,\dots,x_{n-1})$ of integers.
The generators of a (finitely generated) subgroup of $F$ can be considered as the entries of a column vector, and this column can be considered as the product of a matrix over $\Z$ with $\mathbf e$: 
%Considering finite columns of such elements, we have
\begin{equation*}
  \begin{pmatrix}
    x_0^0{\gid}^0+\dotsb+x_{n-1}^0{\gid}^{n-1}\\
\vdots\\
    x_0^{m-1}{\gid}^0+\dotsb+x_{n-1}^{m-1}{\gid}^{n-1}
  \end{pmatrix}
=\begin{pmatrix}
    x_0^0&\dots&x_{n-1}^0\\
\vdots&\ddots&\vdots\\
    x_0^{m-1}&\dots&x_{n-1}^{m-1}
  \end{pmatrix}
\begin{pmatrix}
  {\gid}^0\\\vdots\\{\gid}^{n-1}
\end{pmatrix}
=X\mathbf e.
\end{equation*}
The subgroup of $F$ generated by the rows of $X\mathbf e$ can be
denoted by $\gpgen {X\mathbf e}$.  If
$P$ is an $m\times m$ matrix with integer entries, then
\begin{equation*}
  \gpgen{PX\mathbf e}\included\gpgen{X\mathbf e}.
\end{equation*}
If also $P$ is \emph{invertible}---that is, $\det(P)=\pm1$---then
\begin{equation*}
  \gpgen{PX\mathbf e}=\gpgen{X\mathbf e}.
\end{equation*}
We can therefore perform the following row-operations on $X$, without
changing the group $\gpgen{X\mathbf e}$.  We can
\begin{compactenum}
  \item
interchange two rows,
\item
multiply a row by $-1$,
\item
add an integer multiple of one row to another.
\end{compactenum}
These operations allow us to perform Gaussian elimination.
Adding rows of zeros as necessary, we may also assume that $m\geq n$.
Then for some invertible integer matrix $P$, we have
\begin{equation*}
PX=
  \begin{pmatrix}
    T\\\hline0
  \end{pmatrix},
\end{equation*}
where $T$ is an $n\times n$ upper-triangular matrix,
\begin{equation*}
T=
\begin{pmatrix}
  *&\cdots&*\\
   &\ddots&\vdots\\
  0&      &*
\end{pmatrix}.
\end{equation*}
By using also invertible \emph{column}-operations, we can diagonalize $T$.  That
is, there are invertible integer matrices $P$ and $Q$ such that
\begin{equation*}
  PXQ=
  \begin{pmatrix}
    D\\\hline0
  \end{pmatrix},
\end{equation*}
where
\begin{equation*}
  D=
  \begin{pmatrix}
    d_0&      &0\\
       &\ddots&\\
      0&      &d_{n-1}
  \end{pmatrix}.
\end{equation*}
We now have
\begin{equation*}
  \gpgen{X\mathbf e}=\gpgen{PXQQ\inv\mathbf e}=\gpgen{DQ\inv\mathbf e}\cong\gpgen{D\mathbf e}.
\end{equation*}
Working further on $D$ with invertible row- and column- operations, we
may assume~\eqref{d} holds, while $d_k=\cdots=d_{n-1}=0$.
Indeed, suppose $b,c\in\Z$ and $\gcd(b,c)=d$.  By invertible
operations, from
  \begin{equation*}
  \begin{pmatrix}
    b&0\\0&c
  \end{pmatrix}
  \end{equation*}
   we obtain
  $\begin{pmatrix}
    b&0\\c&c
  \end{pmatrix}$ and then
$\begin{pmatrix}
  d&e\\0&f
\end{pmatrix}$,
where $e$ and $f$ are multiples of $c$ and hence of $d$; hence, with
an invertible column-operation, we get
\begin{equation*}
  \begin{pmatrix}
    d&0\\0&f
  \end{pmatrix}.
\end{equation*}
where again $d\divides f$.  Applying such
transformations as needed to pairs of entries in $D$
yields~\eqref{d}. 
\end{proof}


\begin{porism}
 Every subgroup of a free abelian group on $n$ generators is free abelian on $n$ generators or fewer.
\end{porism}

We can show uniqueness of the numbers $d_j$ by an alternative analysis.

\begin{theorem}[Chinese Remainder]\label{thm:CRT}
  If $\gcd(m,n)=1$, then the homomorphism $x\mapsto(x,x)$ from
  $\Zmod{mn}$ to $\Zmod m\oplus\Zmod n$ is an isomorphism. 
\end{theorem}

\begin{proof}
  If $x\equiv0\pmod m$ and $x\equiv 0\pmod n$,
  then $x\equiv0\pmod{mn}$.  Hence the given homomorphism is
  injective.  Its surjectivity follows by counting.
\end{proof}

The Chinese Remainder Theorem will be generalized as
Theorem~\ref{thm:CRT-R}. 
In the usual formulation of the theorem, every system
\begin{align*}
  x&\equiv a\pmod m,&x&\equiv b\pmod n
\end{align*}
has a unique solution \emph{modulo} $mn$; but this solution is just
the inverse image of $(a,b)$ under the isomorphism $x\mapsto(x,x)$.

\begin{theorem}
For every finite abelian group, there are unique primes $p_0$, \dots,
$p_{k-1}$, not necessarily distinct, along with unique positive
integers $m(0)$, \dots, $m(k-1)$, such that 
\begin{equation*}
G\cong  \Zmod{p_0{}^{m(0)}}\oplus\dotsb\oplus\Zmod{p_{k-1}{}^{m(k-1)}}.
\end{equation*}
\end{theorem}

\begin{proof}
To obtain the analysis, apply the Chinese Remainder Theorem to
Theorem~\ref{thm:fin-gen-ab}. 
The analysis is
unique, provided it is unique in the case where all of the $p_j$ are
the same.  But in this case, the analysis is unique, by repeated
application of the observation that the order of the group is the
highest prime power appearing in the factorization.
\end{proof}

\section{Semidirect products}\label{sect:semidirect}

An isomorphism from a structure to itself is an \textbf{automorphism.}\index{automorphism}

\begin{theorem}
The automorphisms of a group $G$ compose a subgroup of
$\Sym G$.
\end{theorem}

The subgroup in the theorem is denoted by
\begin{equation*}
  \Aut G.
\end{equation*}

\begin{theorem}
For every group $G$,
there is a homomorphism 
\begin{equation*}
g\mapsto(x\mapsto gxg\inv)
\end{equation*}
from $G$ to $\Aut G$.
\end{theorem}

An automorphism $x\mapsto gxg\inv$ as in the theorem is
\textbf{conjugation}\index{conjugation} by $g$ and is an \textbf{inner
  automorphism}\index{inner automorphism} of $G$.  The kernel of the homomorphism in the theorem
is the \textbf{center}\index{center} of $G$, denoted by\footnote{An alternative
  formulation of the center of a group is given and generalized in
  \S \ref{sect:nilpotent}.} 
\begin{equation*}
  \centr G.
\end{equation*}
Then $G$ is \textbf{centerless}\index{centerless} if $\centr G$ is trivial.
Repeating
the process of forming inner automorphisms, we obtain a chain
\begin{equation*}
  G\to\Aut G\to\Aut{\Aut G}\to\dotsb,
\end{equation*}
called the \textbf{automorphism tower}\index{automorphism tower} of
$G$.  The tower reaches a fixed point, perhaps after
transfinitely many steps: Simon Thomas~\cite{MR801316} shows this in
case $G$ is centerless; Joel Hamkins~\cite{MR1487370},
in the general case.

\begin{theorem}\label{thm:GNG}
For every group $G$, if $N\nsubgp G$, then there is a homomorphism
\begin{equation*}
g\mapsto(x\mapsto gxg\inv) 
\end{equation*}
from $G$ to $\Aut N$.
\end{theorem}

In the theorem, let the homomorphism be $g\mapsto\sigma_g$.  Suppose
also $H\subgp G$, and $N\cap H=\gpgen{\gid}$.  Then the conditions
of Theorem~\ref{thm:isdp} are met, and $NH$ is an
internal semidirect product.  Equation~\eqref{eqn:sdp}, describing
multiplication on $NH$, can be rewritten as 
\begin{equation*}
  (mg)(nh)=(m\cdot\sigma_g(n))(gh).
\end{equation*}

\begin{theorem}
  Suppose $N$ and $H$ are groups, and $g\mapsto\sigma_g$ is a
  homomorphism from $H$ to $\Aut N$.  Then the set $N\times H$ becomes
  a group when multiplication is defined by
  \begin{equation*}
  (m,g)(n,h)=(m\cdot\sigma_g(n),gh).    
  \end{equation*}
\end{theorem}

\begin{proof}
  To check that the multiplication is associative means checking that 
  \begin{equation*}
    \uplambda_{(m,g)}\uplambda_{(n,h)}=\uplambda_{(m,g)(n,h)}.
  \end{equation*}
We can write $\uplambda_{(m,g)}$ as $\uplambda_m\sigma_g\times
\uplambda_g$.  Then
\begin{align*}
  \uplambda_{(m,g)}\uplambda_{(n,h)}
=(\uplambda_m\sigma_g\times\uplambda_g)(\uplambda_n\sigma_h\times\uplambda_h)
&=\uplambda_m\sigma_g\uplambda_n\sigma_h\times\uplambda_g\uplambda_h\\
&=\uplambda_m\uplambda_{\sigma_g(n)}\sigma_g\sigma_h\times\uplambda_{gh}\\
&=\uplambda_{m\cdot\sigma_g(n)}\sigma_{gh}\times\uplambda_{gh}\\
&=\uplambda_{(m\cdot\sigma_g(n),gh)}\\
&=\uplambda_{(m,g)(n,h)}.
\end{align*}
Finally, $(\gid,\gid)$ is an
identity, and $(\sigma_{h\inv}(n\inv),h\inv)$ is an inverse of $(n,h)$.
\end{proof}

The group given by the theorem is the \textbf{semidirect
  product}\index{semidirect product} of
$N$ and $H$ with respect to $\sigma$; it can be denoted by
\begin{equation*}
  N\rtimes_{\sigma}H.
\end{equation*}
The bijection in Theorem~\ref{thm:isdp} is an isomorphism from
$N\rtimes_{\sigma}H$ to $NH$ when $\sigma$ is as in
Theorem~\ref{thm:GNG}.
% $g\mapsto(x\mapsto gxg\inv)$. 

\begin{theorem}\label{thm:Zp-cross}
If $p$ is prime, then $\Zmodu p\cong\Zmod{p-1}$.
\end{theorem}

\begin{proof}
The group $\Zmodu p$ has order $p-1$ and,
by Theorem~\ref{thm:fin-gen-ab}, is isomorphic to
  \begin{equation*}
    \Zmod {d_0}\oplus\Zmod{d_{k-1}}\oplus \Zmod m,
  \end{equation*}
where $d_0\divides\cdots\divides d_{k-1}\divides m$.
Hence every element of $\Zmodu p$ is a root of the polynomial $x^m-1$.
But this polynomial can have at most $m$ roots in $\Zmod
p$, since this is a \emph{field.}  Hence $p-1\leq m$, so $m=p-1$, and
$k=0$.
\end{proof}

\begin{theorem}
The embedding $x\mapsto\uplambda_x$ of a ring $(E,\cdot)$
in $(\End E,\circ)$ restricts to an embedding of
$\unit{(E,\cdot)}$ in $\Aut E$.  In case $E$ is $\Zmod n$, each
embedding is an isomorphism.  In particular, if $a$ is an element of
$\Zmodu n$ of order $m$, and $m\divides t$, then $\Zmod t$ acts on
$\Zmod n$ by $(x,y)\mapsto a^xy$.  Conversely, if some $\Zmod t$ acts
on $\Zmod n$, then the action is so given for some such $a$.
\end{theorem}

\begin{theorem}\label{thm:pq}
  For every odd prime $p$, for every prime divisor $q$ of $p-1$, there is
  a non-abelian semidirect product $\Zmod p\rtimes_{\sigma}\Zmod q$,
  which is unique up to isomorphism.
\end{theorem}

\begin{proof}
  As $\Zmodu p$ is cyclic, it has a unique subgroup $G$ of order $q$.
  As $q$ is prime, every nontrivial element of $G$ is a generator.
  If $a\in G\setminus\{1\}$, let $\sigma$ be the homomorphism
  $x\mapsto(y\mapsto a^xy)$ from $\Zmod q$ to $\Aut{\Zmod p}$.
  Then
  we can form
  \begin{equation*}
    \Zmod p\rtimes_{\sigma}\Zmod q.
  \end{equation*}
If $\Zmod p\rtimes_{\tau}\Zmod q$ is some other non-abelian
semidirect product, then  $\tau_1$ is $x\mapsto b\cdot x$ for some
$b$ in $G\setminus\{1\}$.  But then $b^n=a$ for some $n$, so there is
an isomorphism from $\Zmod p\rtimes_{\sigma}\Zmod q$ to
$\Zmod p\rtimes_{\tau}\Zmod q$ that takes $(x,y)$ to $(x,ny)$.
\end{proof}

Because of its uniqueness, we may refer to the semidirect product of
the theorem as
\begin{equation*}
 \Zmod p\rtimes\Zmod q.
\end{equation*}
In case $q=2$, this group is $\Dih p$.
The next section develops the tools used in \S \ref{sect:class-small}
to show that there is no other way to obtain a group of order $pq$
for distinct primes $p$ and $q$. 

\chapter{Finite groups}

\section{Actions of groups}\label{sect:actions}

\begin{theorem}
  Let $G$ be a group, and $\setactedon$ a set.  There is a one-to-one
  correspondence between
  \begin{compactenum}
    \item
homomorphisms $g\mapsto(a\mapsto ga)$ from $G$ into
$\Sym{\setactedon}$, and 
\item
functions $(g,a)\mapsto ga$ from $G\times A$ into $A$ such that
\begin{gather}\label{act:1}
\gid a=a,\\\label{act:gha}
(gh)a=g(ha).
\end{gather}
for all $h$ and $h$ in $G$ and $a$ in $A$.
  \end{compactenum}
\end{theorem}

\begin{proof}
If $g\mapsto(a\mapsto ga)$ maps $G$ homomorphically into
$\Sym{\setactedon}$, then~\eqref{act:1} and~\eqref{act:gha} follow.
Suppose conversely that these hold.  Then, in particular,
\begin{equation*}
g(g\inv a)=(gg\inv)a=\gid a=a
\end{equation*}
 and likewise $g\inv(ga)=a$, so $a\mapsto g\inv a$ is the inverse of
$a\mapsto ga$, and the function $g\mapsto(a\mapsto ga)$ does map $G$
into $\Sym{\setactedon}$, homomorphically by~\eqref{act:gha}.  
\end{proof}

Either of two functions that correspond as in the theorem is a
\textbf{(left) action}\index{action}\index{left!--- action} of $G$ on
$A$.  Examples include the following. 

\begin{asparaenum}[1.]
  \item
A symmetry group of a set acts on the set in the obvious way, by
\begin{equation*}
(\sigma,x)\mapsto\sigma(x). 
\end{equation*}
\item
An arbitrary group $G$ acts on itself by left
multiplication: 
\begin{equation*}
(g,x)\mapsto\uplambda_g(x).
\end{equation*}
\item
If $H\subgp G$, then $G$ acts on
the set $G/H$ by
\begin{equation*}
  (g,xH)\mapsto gxH.
\end{equation*}
\item
Finally, $G$ acts on itself by conjugation:
\begin{equation*}
  (g,x)\mapsto x\mapsto gxg\inv.
\end{equation*}
\end{asparaenum}

Suppose $(g,x)\mapsto gx$ is an arbitrary action of $G$ on
$\setactedon$.  
If $a\in\setactedon$, then
the subset $\{g\setcolon ga=a\}$ of $G$ is the \textbf{stabilizer}\index{stabilizer} of $a$,
denoted by 
\begin{equation*}
G_a;
\end{equation*}
the subset $\{ga\setcolon g\in G\}$ of ${\setactedon}$ is the
\textbf{orbit}\index{orbit} of $a$, denoted by 
\begin{equation*}
  Ga.
\end{equation*}
The subset $\{x\setcolon G_x=G\}$ of ${\setactedon}$ can be denoted by
\begin{equation*}
  {\setactedon}_0.
\end{equation*}
See\label{app-ref} Appendix~\ref{App:ga} for an alternative development of these
notions. 

\begin{theorem}\label{thm:action}
  Let $G$ act on ${\setactedon}$ by
 $(g,x)\mapsto gx$.
  \begin{compactenum}
\item
The orbits partition $\setactedon$;
    \item
$G_a\subgp G$;
\item\label{GGa}
$[G:G_a]=\size {Ga}$;
  \end{compactenum}
\end{theorem}

\begin{proof}
  For~\eqref{GGa}, we establish a bijection between $G/G_a$ and $Ga$
  by noting that
  \begin{equation*}
    gG_a=hG_a\iff h\inv g\in G_a\iff ga=ha;
  \end{equation*}
so the bijection is $gG_a\mapsto ga$.
\end{proof}

\begin{corollary}
If there are only finitely many orbits in $\setactedon$ under $G$,
then
\begin{equation}\label{eqn:class}
  \size {\setactedon}=\size{{\setactedon}_0} +\sum_{a\in X}[G:G_a]
\end{equation}
for some set $X$ of elements of $\setactedon$ whose orbits are nontrivial.  
\end{corollary}

Equation~\eqref{eqn:class} is the \textbf{class equation.}\index{class
  equation} 
For example,
suppose $G$ acts on itself by conjugation, and $g\in G$.
Then $Gg$ is the \textbf{conjugacy class}\index{conjugacy class} of $g$, while
$G_g$ is the \textbf{centralizer}\index{centralizer} of $g$, denoted
by\footnote{More generally, if $H<G$, then
  $\centralizer gH=\{h\in H\colon hgh\inv=g\}$.}
  \begin{equation}
  \centralizer gG.
  \end{equation}
Finally, $G_0$ is the \textbf{center}\index{center} of $G$, denoted by
    \begin{equation*}
\centr G.      
    \end{equation*}
The class equation for the present case can now be written as
\begin{equation*}
  \size G=\size{\centr G}+\sum_{a\in X}[G:\centralizer aG].
\end{equation*}

A \textbf{finite $p$-group}\index{finite
  $p$-group}\index{p-group@$p$-group} is a finite group whose 
order is a power of $p$.

\begin{theorem}\label{thm:act-cong}
  If ${\setactedon}$ is acted on by a $p$-group, then $\size
  {\setactedon}\equiv\size{{\setactedon}_0}\pmod p$.
\end{theorem}

\begin{proof}
In the class equation,  $[G:G_a]$ is a multiple of $p$ in each
  case.
\end{proof}

A first application of this theorem is

\begin{theorem}[Cauchy]\label{thm:Cauchy}
  If $p$ divides $\size G$, then $\order g=p$ for some $g$ in $G$.
\end{theorem}

\begin{proof}[Proof (J. H. McKay \cite{MR0098777}).]
Suppose $p$ divides $\size G$.  We seek a nontrivial solution in $G$
  of the equation
  \begin{equation*}
  x^p=\gid.  
  \end{equation*}
  Let $\setactedon$ be the set
  \begin{equation*}
    \{\tuple x\in G^p\setcolon x_0\dotsm x_{p-1}=\gid\};
  \end{equation*}
so we seek $g$ in $G$ such that $(g,\dots,g)\in\setactedon$ and
$g\neq\gid$. 
If $(g_0,\dots,g_{p-1})\in\setactedon$ and $k<p$, then
\begin{align*}
  (g_0\dotsm g_{k-1})(g_k\dotsm g_{p-1})&=\gid,
&(g_k\dotsm g_{p-1})(g_0\dotsm g_{k-1})&=\gid,
\end{align*}
and therefore
\begin{equation*}
(g_k,\dots,g_{p-1},g_0,\dots,g_{k-1})\in\setactedon.
\end{equation*}
Thus $\Zmod p$ acts on $\setactedon$ by
\begin{equation*}
  (k,(g_0,\dots,g_{p-1})\mapsto
  (g_k,\dots,g_{p-1},g_0,\dots,g_{k-1}).
\end{equation*}
With respect to this action,
\begin{equation*}
    {\setactedon}_0=\{(g,\dots,g)\colon g^p=\gid\};
\end{equation*}
also $\Zmod p$ is a finite $p$-group, 
Now, the map
\begin{equation*}
(g_1,\dots,g_{p-1})\longmapsto\bigl((g_1\dotsm
  g_{p-1})\inv,g_1,\dots,g_{p-1}\bigr) 
\end{equation*}
is a bijection
from $G^{p-1}$ onto ${\setactedon}$, so
$\size {\setactedon}$ is a multiple of~$p$; hence
$\size{{\setactedon}_0}$ 
is a multiple of~$p$, by Theorem~\ref{thm:act-cong}.  Since ${\setactedon}_0$
contains $(\gid,\dots, \gid)$, it contains some $(g,\dots,g)$, where $\order
g=p$. 
\end{proof}

\begin{corollary}
  A finite group is a $p$-group if and only if the order of every
  element is a power of~$p$. 
\end{corollary}

\begin{proof}
  If $\ell$ is a prime dividing $\order g$, then $\ell$ divides $\size
  G$.  Conversely, if $\ell$ divides $\size G$, then $G$ has an
  element of order $\ell$.
\end{proof}

Hence an arbitrary group is a \textbf{$p$-group} if the order of its
every element is a power of~$p$.  

\begin{theorem}\label{thm:p-ntc}
  Every nontrivial $p$-group has nontrivial center.
\end{theorem}

\begin{proof}
  By Theorem~\ref{thm:act-cong},
  \begin{equation*}
    \size G\equiv\size{\centr G}\pmod p,
  \end{equation*}
so $p$ divides $\size{\centr G}$.  Since $\centr G$ contains at least one
element, it contains at least $p$ of them.
\end{proof}

\begin{theorem}
  All groups of order $p^2$ are abelian.
\end{theorem}

\begin{proof}
  Let $G$ have order $p^2$.  Then either $\centr G$ is
  all of $G$, or else $\size{\centr G}=p$, by the previous theorem.
  In any case, there is $a$ in $G$ such that
  \begin{equation*}
    G=\gpgen{\{a\}\cup\centr G}.
  \end{equation*}
But elements of $\centr G$ commute with all elements of $G$; and
powers of $a$ commute with each other (and with elements of $\centr
G$); hence $G$ is abelian.
\end{proof}

Supposing $G$ is an arbitrary group and $H\subgp G$, let $\setactedon$
be the set
\begin{equation*}
  \{gHg\inv\colon g\in G\}
\end{equation*}
of conjugates of $H$.  Then $G$ acts on $\setactedon$ by
conjugation,
\begin{equation*}
  (g,K)\mapsto gKg\inv.
\end{equation*}
The stabilizer of $H$ under this action is the \textbf{normalizer}\index{normalizer} of $H$
in $G$, denoted by\footnote{More generally, if also $K\subgp G$, then
  $\normalizer HK=\{k\in K\colon kHk\inv=H\}$.}
\begin{equation*}
\normalizer HG.
\end{equation*}
If $H\subgp K\subgp G$, then 
\begin{equation*}
  H\nsubgp K\iff K\subgp \normalizer HG.
\end{equation*}

\begin{theorem}\label{thm:normalizer}
Suppose $G$ is a group with subgroups $H$ and $K$.
Under the action of $H$ on $G/K$ by left multiplication,
\begin{equation*}
  gK\in(G/K)_0\iff H\subgp gKg\inv.
\end{equation*}
In case $H=K$, a finite group,
\begin{equation*}
(G/H)_0=\normalizer HG/H.
\end{equation*}
\end{theorem}

\begin{proof}
We compute:
\begin{align*}
gK\in(G/K)_0
&\iff hgK=gK&&\text{for all $h$ in $H$}\\
&\iff g\inv hgK=K&&\text{for all $h$ in $H$}\\
&\iff g\inv hg\in K&&\text{for all $h$ in $H$}\\
&\iff h\in gKg\inv&&\text{for all $h$ in $H$}\\
&\iff H\subgp gKg\inv.&&
%\\
%&\iff g\inv Hg=H&&\\
%&\iff g\inv\in\normalizer HG&&\\
%&\iff g\in\normalizer HG&&\\
%&\iff gH\in\normalizer HG/H.&&
\end{align*}
If $H$ is finite, then 
\begin{equation*}
H<gHg\inv\iff H=gHg\inv\iff g\in\normalizer HG.\qedhere
\end{equation*}
\end{proof}

A \textbf{$p$-subgroup} of a group is a subgroup that is a
$p$-group. 

\begin{lemma}%\label{thm:HpGNGH}
  If $H$ is a $p$-subgroup of $G$, then
  \begin{equation*}
    [G:H]\equiv[\normalizer HG:H]\pmod p.
  \end{equation*}
\end{lemma}

\begin{proof}
Theorems~\ref{thm:normalizer} and~\ref{thm:act-cong}.
\end{proof}

\begin{lemma}
  If $H$ is a $p$-subgroup of $G$, and $p$ divides $[G:H]$, then $H$
  is a normal subgroup of some $p$-subgroup $K$ of $G$ such that
  $[K:H]=p$.  
\end{lemma}

\begin{proof}
By the last lemma, $p$ divides $[\normalizer HG:H]$.  
Since $H\nsubgp\normalizer HG$, the quotient $\normalizer HG/H$ is a group.
By Cauchy's Theorem (Theorem~\ref{thm:Cauchy},
this group has an element $gH$ of order
$p$.  So $\gpgen{\{g\}\cup H}$ is the desired $K$.  
\end{proof}

 A \textbf{Sylow $p$-subgroup}\index{Sylow!--- subgroup}
is a maximal $p$-subgroup.  The following is a partial converse to
Lagrange's Theorem (Theorem~\ref{thm:Lagrange}).

\begin{theorem}[Sylow I]\label{thm:Sylow-1}\index{Sylow!--- Theorems|(}
  \index{theorem!Sylow Th---s|(} 
For every finite group of order $p^nm$, where $p\ndivides m$, there is
a chain 
\begin{equation*}
  H_1\subgp H_2\subgp\cdots<H_n
\end{equation*}
of subgroups, where $\size{H_1}=p$ and in each case $H_i\nsubgp
H_{i+1}$ and $[H_{i+1}:H_i]=p$. 
Every $p$-subgroup of such a group appears on such a chain.
In particular, every $p$-subgroup is included in a Sylow subgroup,
whose index is indivisible by $p$. 
\end{theorem}

\begin{proof}
Cauchy's Theorem (Theorem~\ref{thm:Cauchy}) and repeated application of
the last lemma.
\end{proof}

\begin{corollary}
  The conjugate of a Sylow $p$-subgroup is a
  Sylow $p$-subgroup.
  A \emph{unique} Sylow $p$-subgroup is normal.
\end{corollary}

A converse to the corollary is the following.

\begin{theorem}[Sylow II]\label{thm:Sylow-2}
  All Sylow $p$-subgroups are conjugate.
\end{theorem}

\begin{proof}
  Say $H$ and $P$ are $p$-subgroups of $G$, where $P$ is maximal.
  Then $H$ acts
  on  the set $G/P$ by left multiplication.  By
  Theorem~\ref{thm:act-cong}, since $[G:P]$ is not a multiple of $p$,
  the set $(G/P)_0$ has an element $aH$.  By
  Theorem~\ref{thm:normalizer}, $H\subgp aPa\inv$.  If $H$ is
also Sylow, then $H=aPa\inv$ by Theorem~\ref{thm:Sylow-1}.
\end{proof}

\begin{theorem}[Sylow III]\label{thm:Sylow-3}
The number of Sylow $p$-subgroups of a finite group is congruent to
$1$ \emph{modulo} $p$ and divides the order of the group.
\end{theorem}

\begin{proof}
Let $A$ be the set of Sylow $p$-subgroups of a finite group $G$.
Then $G$ acts on $A$ by conjugation.  Let $H\in A$.  By
Theorem~\ref{thm:Sylow-2},  the orbit of
  $H$ is precisely $A$.  The stabilizer of $H$ is $\normalizer
  HG$.  Then by Theorem~\ref{thm:action} \eqref{GGa},
  \begin{equation*}
    [G:\normalizer HG]=\size A,
  \end{equation*}
so $\size A$ divides $\size G$.

Now consider $H$ as acting on $A$ by conjugation.  
Then the following are equivalent:
\begin{compactenum}
  \item
$P\in A_0$,
\item
$H\subgp \normalizer PG$,
\item
$H$ is a Sylow subgroup of $\normalizer PG$,
\item
$H=P$,
\end{compactenum}
since $P\nsubgp\normalizer PG$, so $P$ is the unique Sylow $p$-subgroup
of $\normalizer PG$.  
Therefore $A_0=\{H\}$, so by
Theorem~\ref{thm:act-cong}
\begin{equation*}
  \size A\equiv \size{A_0}\equiv1\pmod p.\qedhere
\end{equation*}
\end{proof}\index{Sylow!--- Theorems|)} \index{theorem!Sylow Th---s|)}


\section{Classification of small groups}\label{sect:class-small}

We can now complete the work, begun in \S \ref{sect:semidirect}, of
classifying the groups of order $pq$ for primes $p$ and $q$.

\begin{lemma}
  Suppose $p$ and $q$ are distinct primes such that
  \begin{equation*}
q\not\equiv 1\pmod p
\end{equation*}
and $\size G=pq$.  Then $G$ has a unique Sylow $p$-subgroup,
  which is therefore normal.
\end{lemma}

\begin{proof}
  Let $A$ be the set of Sylow $p$-subgroups of $G$.  Then $\size
  A\equiv 1\pmod p$ by Theorem~\ref{thm:Sylow-3}, so $\size A$ is not
  $q$ or $pq$; but $\size A$ divides $pq$;  so $\size A=1$.
\end{proof}

\begin{theorem}
    Suppose $p$ and $q$ are primes, where $p<q$, so that $p\not\equiv
    1\pmod q$, and $G$ is a group of
    order $pq$.
    \begin{compactenum}\renewcommand{\labelenumi}{\theenumi.}
    \item 
If $q\not\equiv 1\pmod p$, then $G$ is cyclic.  
    \item 
If $q\equiv 1\pmod p$, then either $G$ is cyclic group, or else $G$ is
the unique non-abelian semidirect product $\Zmod p\rtimes\Zmod q$.  
    \end{compactenum}
In particular, every non-abelian group of order $2q$ is isomorphic to
$\Dih q$.
\end{theorem}

\begin{proof}
By the lemma, $G$ has a normal subgroup $N$ of order $q$, and $N$ is
cyclic by a corollary to Lagrange's Theorem (Theorem~\ref{thm:Lagrange}).
By the first Sylow Theorem (Theorem~\ref{thm:Sylow-1}), $G$ has a
Sylow $p$-subgroup $H$, which has order $p$ and is
therefore cyclic.  Then $N\cap H=\gpgen{\gid}$, so $G=NH$ by
Theorem~\ref{thm:isdp} and counting.
  \begin{asparaenum}[1.]
    \item
If $q\not\equiv 1\pmod p$, then $H\nsubgp G$ by the lemma, so 
$G=N\times H$ by Theorem~\ref{thm:wdp}.  The product is
cyclic by the Chinese Remainder Theorem (Theorem~\ref{thm:CRT}). 
\item
  If $q\not\equiv 1\pmod p$, then $G$ might still be $N\times H$;
  otherwise, $G$ is isomorphic to $\Zmod p\rtimes\Zmod q$ by
  Theorem~\ref{thm:pq}. \qedhere
  \end{asparaenum}
\end{proof}

We now know all groups of order less than $36$, but different from
$8$, $12$, $16$, $18$, $20$, $24$, $27$, $28$, $30$, and $32$.

\begin{theorem}
  Every group of order $8$ is isomorphic to one of
  \begin{align*}
    &\Zmod 8,&
&\Zmod 2\oplus\Zmod 4,&
&\Zmod 2\oplus\Zmod 2\oplus\Zmod 2,&
&\Dih 4,&
&\quat.
  \end{align*}
\end{theorem}

\begin{proof}
  Say $\order G=8$.  If $G$ is abelian, then its possibilities are
  given by Theorem~\ref{thm:fin-gen-ab}.  Suppose $G$ is not abelian.
  Then $G$ has an element $a$ of order greater than $2$
  by \cite[Exercise I.1.13, p.~30]{MR600654}, and so $\order a=4$ (since
  $G\ncong\Zmod 8$).  Then
  $\gpgen a\nsubgp G$ by \cite[Exercise I.5.1, p.~45]{MR600654}.  Let
  $b\in G\setminus\gpgen a$.  Then $b^2$ is either $\gid$ or $a^2$
  (since otherwise $b$ would generate $G$).  In the former case,
  $G=\gpgen a\rtimes\gpgen b$, so $G\cong\Dih 4$.  In the latter case,
  $G\cong\quat$. 
\end{proof}

\begin{theorem}
  Every group of order $12$ is isomorphic to one of
  \begin{align*}
    \Zmod{12},&
&\Zmod 2\oplus\Zmod 6,&
&\Alt 4,&
&\Dih 6,&
&\gpres{a,b}{a^6,a^3b^2,bab\inv a}.
  \end{align*}
\end{theorem}

\begin{proof}
  Suppose $\order G=12$, but $G$ is not abelian.  A Sylow
  $3$-subgroup of $G$ has order $3$, so it is $\gpgen a$ for some $a$.
  Then $G$ acts on $G/\gpgen a$ by left multiplication, 
  and $[G:\gpgen a]=4$, so there is a homomorphism from $G$ to $\Sym 4$.  If
  this is an embedding, then $G\cong\Alt 4$.  Assume is is not an
  embedding.  Then the kernel must be $\gpgen a$, so $\gpgen a\nsubgp
  G$.

Let $H$ be a Sylow $2$-subgroup of $G$.  Then $H$ is isomorphic to
$\Zmod 4$ or $\Zmod 2\oplus\Zmod 2$.  In any case, $H$ has two
elements $b$ and $c$ such that none of $b$, $c$, or $bc$ is $\gid$.
Since $G$ is not $\gpgen a\times H$, we may assume
\begin{equation*}
  bab\inv=a^2.
\end{equation*}
If also $cac\inv=a^2$, then $bcac\inv b\inv=a$.  Thus $H$ has an
element that commutes with $a$.  Hence $G$ has a subgroup $K$ isomorphic
to $\Zmod 6$.  If $G\setminus K$ has an element of order $2$, then
$G\cong\Dih 6$; otherwise, $G$ is the last possibility above.
\end{proof}


\section{Nilpotent groups}\label{sect:nilpotent}

For a group, what is the next best thing to being abelian?
A group $G$ is abelian if and only if $\centr G=G$.  (See
\S \ref{sect:semidirect}.)  To weaken this
condition, we define
the \textbf{commutator}\index{commutator} of two elements $a$ and $b$ of $G$ to be
\begin{equation*}
  aba\inv b\inv;
\end{equation*}
this can be denoted by
\begin{equation*}
[a,b].
\end{equation*}
Then
\begin{equation*}
  \centr G=\{g\in G\setcolon \Forall x[g,x]=\gid\}.
\end{equation*}
We now generalize this by defining
\begin{gather*}
  \cseries 0G=\gpgen{\gid},\\
\cseries{n+1}G=\{g\in G\setcolon \Forall x[g,x]\in\cseries nG\}.
\end{gather*}
Then $\centr G=\cseries 1G$.

\begin{theorem}\label{thm:central}
Let $G$ be a group.
\begin{compactenum}
\item\label{item:C1}
$\cseries nG\nsubgp G$.
\item\label{item:C2}
$\cseries nG\subgp \cseries{n+1}G$.
\item\label{item:C3}
$\cseries{n+1}G/\cseries nG=\centr{G/\cseries nG}$.
\end{compactenum}
\end{theorem}

\begin{proof}
We use induction to prove~\ref{item:C1}, and
incidentally~\ref{item:C2} and~\ref{item:C3}.
  Trivially, $\cseries 0G\nsubgp G$.  Suppose $\cseries kG\nsubgp G$.
  Then the following are equivalent:
  \begin{gather*}
g\in \cseries{k+1}G;\\
\Forall x[g,x]\in\cseries kG;\\
\Forall x gxg\inv x\inv\in\cseries kG;\\
\Forall x\cseries kGgx=\cseries kGxg;\\
\cseries kGg\in\centr{G/\cseries kG}.
  \end{gather*}
Thus $\cseries kG\subgp \cseries {k+1}G$, and $\cseries{k+1}G/\cseries
kG=\centr{G/\cseries kG}$; in particular, 
\begin{equation*}
\cseries{k+1}G/\cseries
kG\nsubgp G/\cseries kG, 
\end{equation*}
so $\cseries{k+1}G\nsubgp G$. 
\end{proof}

The \textbf{ascending central series}\index{ascending central series}
of $G$ is the sequence $(\cseries 
nG\colon n\in\upomega)$, usually written out as
\begin{equation*}
  \gpgen{\gid}\nsubgp\centr G\nsubgp\cseries 2G\nsubgp\cseries 3G\nsubgp\dotsb.
\end{equation*}
A group is called \textbf{nilpotent}\index{nilpotent} if the terms in the sequence are
eventually the group itself, that is, for some $n$ in~$\upomega$,
\begin{equation*}
  \cseries nG=G.
\end{equation*}
So an abelian group is nilpotent, since its center is itself. 

Suppose $G$ is nilpotent, and in particular $\cseries nG=G$.  For some
$g$ in $G$, and let $f$
be the operation $x\mapsto[g,x]$ on $G$.  Writing $f^0$ for $\id_G$
and $f^{n+1}$ for $f\circ f^n$, we have
\begin{align*}
  f^0(x)&\in G,&
f(x)&\in\cseries{n-1}G,&
f^2(x)&\in\cseries{n-2}G,&
&\dots,&
f^n(x)&=\gid.
\end{align*}
Thus $f$ is ``nilpotent'' in the monoid of operations on $G$.
However, this should not be taken as a sufficient condition for $G$ to
be nilpotent. 

Examples of nilpotent groups are given by:

\begin{theorem}
  Finite $p$-groups are nilpotent.
\end{theorem}

\begin{proof}
Suppose $G$ is a $p$-group.  If $H$ is a proper normal subgroup of
  $G$, then $G/H$ is a nontrivial $p$-group, so by
Theorem~\ref{thm:p-ntc} it has a nontrivial 
  center.  By Theorem~\ref{thm:central} the ascending central series
  of $G$ is strictly 
  increasing, until it reaches $G$ itself.
\end{proof}

The converse fails, because of:

\begin{theorem}
  A finite direct product of nilpotent groups is nilpotent.
\end{theorem}

\begin{proof}
Use that 
\begin{equation*}
\centr{G\times H}=\centr G\times\centr H.
\end{equation*}
If $\cseries nG=G$ and $\cseries mH=H$, then
$\cseries{\max\{n,m\}}{G\times H}=G\times H$.
\end{proof}

We now proceed to the converse of this theorem.

\begin{lemma}
If  $\cseries nG\subgp H$, then $\cseries{n+1}G
  \subgp \normalizer HG$. 
\end{lemma}

\begin{proof}
  Say $g\in\cseries{n+1}G$; we show $gHg\inv\included H$.  But if $h\in
  H$, then $[g,h]\in\cseries nG$, 
  so $ghg\inv\in\cseries nGh\included H$.  Therefore $gHg\inv\included
  H$.
\end{proof}

\begin{lemma}
  If $G$ is nilpotent, and $H\psubgp G$, then
  $H\psubgp\normalizer HG$.  
\end{lemma}

\begin{proof}
  Let $n$ be maximal such that $\cseries nG\subgp H$.  Then
  $\cseries{n+1}G\setminus H$ is non-empty, but, by the last lemma, it
  contains members of $\normalizer HG$.
\end{proof}

\begin{theorem}
  A finite nilpotent group is the direct product
  of its Sylow subgroups.
\end{theorem}

\begin{proof}
  Suppose $G$ is a finite nilpotent group.  We shall show that every
  Sylow subgroup of $G$ is a normal subgroup.  By
  Theorem~\ref{thm:wdp}, the first and second Sylow Theorems
  (Theorems~\ref{thm:Sylow-1} and~\ref{thm:Sylow-2}), and counting, $G$
  will be the direct product of its Sylow subgroups.

Suppose then $P$ is a Sylow
  $p$-subgroup of $G$.  We shall show that $P\nsubgp G$.
To do this, it is
  enough to show $\normalizer PG=G$.  To do \emph{this,} by the last
  lemma, it is enough
  to show $\normalizer{\normalizer PG}G\subgp \normalizer PG$.  To do
  \emph{this,} note that, as $P\nsubgp\normalizer PG$, so $P$ is the
  unique Sylow $p$-subgroup of $\normalizer PG$.  Hence, in
  particular, for any $x$ in $G$, if $xPx\inv\subgp \normalizer PG$, then
  $xPx\inv=P$, so $x\in\normalizer PG$.  But every $x$ in
  $\normalizer{\normalizer PG}G$ satisfies the
  hypothesis. 
\end{proof}

\section{Soluble groups}

The \textbf{commutator subgroup}\index{commutator!--- subgroup} of a
group $G$ is the subgroup
\begin{equation*}
  \gpgen{[x,y]\setcolon(x,y)\in G^2},
\end{equation*}
which is denoted by
\begin{equation*}
  G'.
\end{equation*}

\begin{theorem}\label{thm:G'}
  $G'$ is the smallest of the normal subgroups $N$ of $G$ such that
  $G/N$ is abelian.
\end{theorem}

\begin{proof}
  If $f$ is a homomorphism defined on $G$, then
  \begin{equation}\label{eqn:f([x,y])}
    f([x,y])=f(xyx\inv y\inv)=f(x)f(y)f(x)\inv f(y)\inv=[f(x),f(y)].
  \end{equation}
Thus, if $f\in\Aut G$, then
 $f(G')\subgp G'$.  In particular,
  $xG'x\inv\subgp G'$ for 
  all $x$ in $G$; so $G'\nsubgp G$.  Suppose $N\nsubgp G$; then the
  following are equivalent:
  \begin{compactenum}
    \item
$G/N$ is abelian;
\item
$N=[x,y]N$ for all $(x,y)$ in $G^2$;
\item
$G'\subgp N$.\qedhere
  \end{compactenum}
\end{proof}

We now define the \textbf{derived subgroups}\index{derived subgroup}
$\dsubgp nG$ of $G$ by 
\begin{gather*}
  \dsubgp 0G=G,\\
\dsubgp {n+1}G=(\dsubgp nG)'.
\end{gather*}
We have a descending sequence
\begin{equation*}
  G\nsupgp G'\nsupgp\dsubgp 2G\nsupgp\dotsb
\end{equation*}
The group $G$ is called \textbf{soluble}\index{soluble}
if this sequence reaches $\gpgen{\gid}$ (after finitely many steps).


For examples, let $K$ be a field.  Let
$G$ be the subgroup of $\GL K$ consisting of \textbf{upper triangular
  matrices.}\index{upper triangular}  So 
$G$ comprises the matrices
\begin{equation*}
    \begin{pmatrix}
    a_0&      &*\\
       &\ddots& \\
    0  &      &a_{n-1}
  \end{pmatrix}
\end{equation*}
where $a_0\dotsm a_{n-1}\neq0$.
We have
\begin{equation*}
  \begin{pmatrix}
    a_0&      &*\\
       &\ddots& \\
    0  &      &a_{n-1}
  \end{pmatrix}
  \begin{pmatrix}
    b_0&      &*\\
       &\ddots& \\
    0  &      &b_{n-1}
  \end{pmatrix}
=
  \begin{pmatrix}
    a_0b_0&      &*\\
       &\ddots& \\
    0  &      &a_{n-1}b_{n-1}
  \end{pmatrix}
\end{equation*}
and therefore every element of $G'$ is
\textbf{unitriangular,}\index{unitriangular} that is, it takes the form of
\begin{equation*}
  \begin{pmatrix}
    1&      &*\\
      &\ddots& \\
    0  &      &1
  \end{pmatrix}.
\end{equation*}
We also have
\begin{multline*}
    \begin{pmatrix}
    1&a_1&      &*\\
     &1  &\ddots& \\
     &   &\ddots&a_{n-1}\\
    0&   &      &1
  \end{pmatrix}
    \begin{pmatrix}
    1&b_1&      &*\\
     &1  &\ddots& \\
     &   &\ddots&b_{n-1}\\
    0&   &      &1
  \end{pmatrix}\\
=
    \begin{pmatrix}
    1&a_1+b_1&      &*\\
     &1  &\ddots& \\
     &   &\ddots&a_{n-1}+b_{n-1}\\
    0&   &      &1
  \end{pmatrix},
\end{multline*}
so the elements of $G''$ take the form of
\begin{equation*}
  \begin{pmatrix}
    1&0&      &*\\
     &1  &\ddots& \\
     &   &\ddots&0\\
    0&   &      &1
  \end{pmatrix}.
\end{equation*}
Proceeding, we find $\dsubgp{n+1}G=\gpgen{\gid}$. 

\begin{theorem}
  Nilpotent groups are soluble.
\end{theorem}

\begin{proof}
  Each quotient $\cseries{k+1}G/\cseries kG$ is the
  center of some group---namely $G/\cseries kG$)---, so it is abelian.
  By Theorem~\ref{thm:G'} then,
  \begin{equation*}
  \cseries{k+1}G'\subgp \cseries kG.
  \end{equation*}
Suppose $G$ is nilpotent, so that $G=\cseries
nG$ for some $n$ in $\upomega$.  Working left to right,  
we can build up the following commutative
diagram, where arrows are inclusions:
\begin{equation*}
\xymatrix{
G \ar[d] & G' \ar[l]\ar[d] & \dsubgp 2G \ar[l]\ar[d] & \ar[l] \dsubgp
3G \ar[d] & \dsubgp nG \ar@{.>}[l]\ar[d]\\
G \ar[d] & \cseries nG' \ar[l]\ar[d] & \cseries{n-1}G'\ar[l]\ar[d] &
\ar[l]\ar[d]\cseries{n-2}G'\ar[l] & \centr G' \ar@{.>}[l] 
\ar[d]\\
\cseries nG & \ar[l] \cseries {n-1}G & \ar[l] \cseries{n-2}G & \ar[l]
\cseries{n-3}G & \gpgen{\gid} \ar@{.>}[l]
}
\end{equation*}
That is, we know $\dsubgp 0G\subgp\cseries nG$; and if
$\dsubgp k G \subgp  \cseries{n-k}G$
for some $k$ in $n$, then
\begin{equation*}
  \dsubgp{k+1}G=(\dsubgp kG)'\subgp \cseries{n-k}G'\subgp \cseries{n-(k+1)}G. 
\end{equation*}
By induction then, $\dsubgp nG\subgp \cseries 0G=\gpgen{\gid}$, so
$\dsubgp nG=\gpgen{\gid}$. 
\end{proof}


\begin{theorem}
  Solubility is preserved in subgroups and quotients.  If $N\nsubgp
  G$, and $N$ and $G/N$ are soluble, then $G$ is soluble.
\end{theorem}

\begin{proof}
  Suppose $f\colon G\to H$.
  By~\eqref{eqn:f([x,y])}, we have $f(\dsubgp
  nG)\subgp \dsubgp nH$,
  with equality is $f$ is surjective.  The case where $f$ is an
  inclusion of $G$ in $H$ shows that subgroups of soluble groups are
  soluble.  The case where $f$ is a quotient map shows that quotients
  of soluble groups are soluble.


Finally, if $N\nsubgp G$, then $(G/N)'=G'N/N$.  Suppose $\dsubgp
n{(G/N)}=\gpgen{\gid}$, and
$\dsubgp mN=\gpgen{\gid}$.
Then $\dsubgp nG\subgp N$ and so $\dsubgp{n+m}G=\gpgen{\gid}$.
\end{proof}

\begin{theorem}
Groups with non-abelian simple subgroups are not soluble.  In particular,
  $\Sym 5$ is not soluble if $n\geq 5$.
\end{theorem}

\begin{proof}
  Suppose $H$ is simple.  Since $H'\nsubgp H$, we have either
  $H'=\gpgen{\gid}$ or $H'=H$.  In the former case, $H$ is abelian; in
  the latter, $H$ is insoluble.
\end{proof}

The last theorem suggests the origin of the notion of solubility of
groups: the general 5th-degree polynomial equation
\begin{equation*}
  a_0+a_1x+a_2x^2+a_3x^3+a_4x^4+x^5=0
\end{equation*}
is ``insoluble by radicals'' precisely because $\Sym 5$ is an
insoluble group.


\section{Normal series}

A \textbf{normal series}\index{normal!--- series} for a group $G$ is
a sequence $(G_n\colon n\in\upomega)$ of subgroups, where $G_{n+1}\nsubgp
G_n$ in  each case; the situation can be depicted by
\begin{equation*}
  G=G_0\nsupgp G_1\nsupgp G_2\nsupgp\dotsb
\end{equation*}
(If one wants to distinguish, one may call this a \textbf{subnormal
  series}\index{subnormal series}\index{series!subnormal ---}, normal if each $G_i$ is
  normal in $G$.)  The \textbf{factors}\index{factor} of 
the normal series are the quotients $G_i/G_{i+1}$.  If $G_n=\gpgen{\gid}$
for some $n$, then the series is called
\begin{compactenum}
  \item
a \textbf{composition series,}\index{composition series}\index{series!composition ---} if the
factors are simple; 
\item
a \textbf{soluble series,}\index{soluble series}\index{soluble!---
  series}\index{series!soluble ---} if the factors are abelian.
\end{compactenum}

For example,
  if $G$ is nilpotent, then the series
  \begin{equation*}
    \gpgen{\gid}\nsubgp \centr G\nsubgp \cseries 2G\nsubgp\dotsb \nsubgp G 
  \end{equation*}
is a soluble series.  


\begin{theorem}
A group is soluble if and only if it has a soluble series.
\end{theorem}

\begin{proof}
  If the series
  \begin{equation*}
    G\nsupgp G_1\nsupgp G_2\nsupgp\dotsb\nsupgp G_n=\gpgen{\gid}
  \end{equation*}
is soluble, then, by Theorem~\ref{thm:G'}, we have
\begin{align*}
  G'&<G_1,&
G''&<G_1{}'<G_2,&
G'''&<G_1{}''<G_2'<G_3,&
\dsubgp nG&=\gpgen{\gid},
\end{align*}
so $G$ is soluble.
Conversely, if $G$ is soluble, then the series
\begin{equation*}
  G\nsupgp G'\nsupgp\dsubgp 2G\nsupgp\dotsb\nsupgp\gpgen{\gid}
\end{equation*}
is a soluble series.
\end{proof}

So not every group has a soluble series.  However:

\begin{theorem}\label{thm:comp}
  Every finite group has a composition series.
\end{theorem}

\begin{proof}
  A finite group $G$ has a maximal proper normal subgroup $N$.  Then
  $G/N$ is simple.  Indeed, every normal subgroup of $G/N$ is $H/N$ for
  some normal subgroup $H$ of $G$ such that $N\subgp H$, and therefore
  $H$ is either $N$ or $G$.  

So we can form
  $G=G_0\nsupgp G_1\nsupgp\cdots$, where each $G_{n+1}$ is a maximal
  proper normal subgroup of $G_n$.  The factors are simple, and, since
  $G$ is finite, the series must terminate.
\end{proof}

If, from a normal series, another can be got by deleting some terms,
then the former is a \textbf{refinement}\index{refinement} of the latter.
As a normal series, a composition series is maximal in that it has no
nontrivial refinement, that is, no refinement without trivial factors.

A soluble series for a finite group has a refinement in which the
nontrivial factors are cyclic of prime order.

Any normal series is \textbf{equivalent}\index{equivalent} to the series that results
when all repeated terms are deleted (so that all trivial factors are
removed).  Then two normal series
\begin{equation*}
  G_i(0)\nsupgp G_i(1)\nsupgp G_i(2)\nsupgp\dotsb\nsupgp G_i(n)
\end{equation*}
(where $i<2$) with no trivial factors are \textbf{equivalent}\index{equivalent} if there
is $\sigma$ in $\Sym n$ such that 
\begin{equation*}
  G_0(i)/G_0(i+1)\cong G_1(\sigma(i))/G_1(\sigma(i+1))
\end{equation*}
for each $i$ in $n$.  We now aim to prove Theorem~\ref{thm:JH} below.

\begin{lemma}[Zassenhaus or Butterfly]\index{Zassenhaus
    Lemma}\index{Butterfly Lemma}\index{theorem!Zassenhaus Lemma}
  \index{theorem!Butterfly Lemma}\index{lemma|see{theorem}}
  Suppose $N_i\nsubgp H_i\subgp G$ for each $i$ in $2$.  Let
  $H=H_0\cap H_1$.  Then:
  \begin{compactenum}
    \item
$N_i(H_i\cap N_{1-i})\nsubgp N_iH$ for each $i$;
\item
the two groups $N_iH/N_i(H_i\cap N_{1-i})$ are isomorphic.
  \end{compactenum}
\end{lemma}

\begin{proof}
We have $H_i\cap N_{1-i}\nsubgp H$.
Let 
\begin{equation*}
K=(H_0\cap N_1)(H_1\cap N_0); 
\end{equation*}
then
$K\nsubgp H$.  The groups we have to work with form the commutative
diagram below, arrows being inclusions.
\begin{equation*}
  \xymatrix{
   &            H_0        &               & H_1                   &\\
   &            N_0H\ar[u] &               & N_1H\ar[u]            &\\
   &                       &H\ar[ul]\ar[ur]&                       &\\
   &N_0(H_0\cap N_1)\ar[uu]&               &N_1(H_1\cap N_0)\ar[uu]&\\
N_0\ar[ur]&&K\ar[ul]\ar[ur]\ar[uu]&&N_1\ar[ul]\\
&H_1\cap N_0\ar[ul]\ar[ur]&&H_0\cap N_1\ar[ul]\ar[ur]&
}
\end{equation*}
We exhibit an epimorphism from $N_iH$ onto $H/K$
whose kernel is $N_i(H_i\cap N_{1-i})$.  Now, if $n,n'\in N_i$ and
$h,h'\in H$ and $nh'=n'h$, then
\begin{equation*}
  h'h\inv=n\inv n'\in N_i\cap H\subgp K,
\end{equation*}
so that $Kh=Kh'$.  Hence there is a well-defined homomorphism $f$ from
 $N_iH$ into $H/K$ such that, if $n\in N_i$ and $h\in H$, then
\begin{equation*}
  f(nh)=Kh.
\end{equation*}
That $f$ is surjective is clear.
Moreover, the following are equivalent conditions on such $n$ and $h$:
\begin{compactenum}
  \item
$nh\in \Ker f$;
\item
$h\in K$;
\item\label{item:h=n_0n_1}
$h=n_0n_1=n_1n_0$ for some $n_i$ in $H_{1-i}\cap N_i$.
\end{compactenum}
Also,~\eqref{item:h=n_0n_1} implies that $nh=nn_in_{1-i}$, which is in
$N_i(H_i\cap N_{1-i})$; thus
\begin{compactenum}\setcounter{enumi}{3}
\item\label{item:nh-in}
 $nh\in N_i(H_i\cap N_{1-i})$.  
\end{compactenum}
Conversely,
suppose~\eqref{item:nh-in} holds.  Then also $h=n\inv nh$, which is
also in $N_i(H_i\cap
N_{1-i})$, so $h=n'h'$ for some $n'$ in $N_i$ and $h'$ in $N_{1-i}\cap
H_i$.  Then $n'=h(h')$, which is in $\in H_{1-i}$, so $n'\in
N_i\cap H_{1-i}$, 
and therefore $h\in K$.
\end{proof}

\begin{theorem}[Schreier]\index{Schreier Theorem}
  \index{theorem!Schreier Th---}
  Any two normal series have equivalent refinements.
\end{theorem}

\begin{proof}
  Suppose that
  \begin{equation*}
    G=G_i(0)\nsupgp G_i(1)\nsupgp\dotsb\nsupgp G_i(n_i)=\gpgen{\gid},
  \end{equation*}
where $i<2$, are normal series for $G$.  
In particular,
\begin{equation*}
  G_i(j+1)\nsubgp G_i(j)\subgp G.
\end{equation*}
Define
\begin{equation*}
  G_i(j,k)=G_i(j+1)(G_i(j)\cap G_{1-i}(k)),
\end{equation*}
where $(j,k)\in n_i\times n_{1-i}$.  Then
\begin{multline*}
  G_i(j)=G_i(j,0)\nsupgp G_i(j,1)\nsupgp\dotsb\nsupgp G_i(j,n_{1-i}-1)\\
  \nsupgp G_i(j,n_{1-i})=G_i(j+1),
\end{multline*}
giving us normal series that are refinements of the original ones; but also
\begin{equation*}
  G_0(j,k)/G_0(j,k+1)\cong G_1(k,j)/G_1(k,j+1)
\end{equation*}
by the Butterfly Lemma.
\end{proof}

\begin{theorem}[Jordan--H\"older]\label{thm:JH}\index{Jordan--H\"older
    Theorem}\index{theorem!Jordan--H\"older Th---}
  Any two composition series of a group are equivalent.
\end{theorem}

Combining this with Theorem~\ref{thm:comp}, we have that every finite
group has a uniquely determined set of simple ``factors''.  Hence the
interest in the classification of the finite simple groups.


\part{Rings}

\chapter{Rings in the most general sense}

\section{Not-necessarily-associative rings}\label{sect:nna-rings}

Rings were introduced in \S \ref{sect:rings}.  A more general
definition is possible.  If $E$ is an abelian
group (written additively), then a \textbf{multiplication}\index{multiplication} on $E$ is a
binary operation that distributes in both senses over addition.  In
the most general sense then, a \textbf{ring}\index{ring} is an abelian group with
a multiplication.  The ring is \textbf{associative}\index{associative} if the
multiplication is associative.  


Associative rings are not the only rings of interest.  For example,
the associative ring 
$\Ham$ defined in \S \ref{sect:new} has the automorphism
$z+w\mj\mapsto\bar z-w\mj$; then the same 
constuction that creates $\Ham$ out of $\C$
can be applied to $\Ham$ 
itself, yielding the ring $\Oct$ of \textbf{octonions;}\index{octonion}
but this ring is not associative.  Also, if $(E,\cdot)$ is a ring,
then there is another multiplication on $E$, namely $\bracket$ or
$(x,y)\mapsto[x,y]$, where 
\begin{equation*}
[x,y]=x\cdot y-y\cdot x; 
\end{equation*}
this multiplication makes
$E$ into a \textbf{Lie ring,}\index{Lie ring} namely a ring that 
respects the identity
\begin{equation*}
  [x,x]=0
\end{equation*}
along with the \textbf{Jacobi identity,}\index{Jacobi identity}
\begin{equation*}
[[x,y],z]=[x,[y,z]]-[y,[x,z]].
\end{equation*}
For example, from the associative ring $(\End E,\circ)$, we obtain the
Lie ring $(\End E,\bracket)$.  Then $\End E$ has a subgroup $\Der{E,\cdot}$,
which is closed under $\bracket$, but not generally under~$\circ$.
Specifically, $\Der{E,\cdot}$ consists of the
\textbf{derivations}\index{derivation} of 
$(E,\cdot)$, which are the endomorphism $D$ of $E$ respecting the
\textbf{Leibniz rule,}
\begin{equation*}
  D(x\cdot y)=Dx\cdot y+x\cdot Dy.
\end{equation*}
In particular, `taking the derivative' on the field of meromorphic
functions on $\C$ is a derivation.   Derivations will be used in \S \ref{sect:fact-pol}. 

\begin{theorem}
Every ring respects the identities
\begin{align*}
  (x-y)\cdot z&=x\cdot z-y\cdot z,&
x\cdot(y-z)&=x\cdot y-x\cdot z.
\end{align*}
Hence, in particular,
\begin{gather}\label{eqn:0.x=0}
    0\cdot x=0=x\cdot 0,\\\notag
(-x)\cdot y=-(x\cdot y)=x\cdot(-y).
\end{gather}
\end{theorem}

A ring is \textbf{unital} if it has
a multiplicative identity, generally denoted by $1$.
The result of Theorem~\ref{thm:exp-in-groups} can be strengthened when
the scope of the theorem is restricted to abelian groups:

\begin{theorem}\label{thm:Z-action}
  Let $E$ be an abelian group.  Then $n\mapsto (x\mapsto nx)$ is a
  homomorphism of unital rings from $(\Z,\cdot,1)$ to $(\End
  E,\circ,\id_E)$.
\end{theorem}

In a word, we can say that, as a unital ring, $\Z$ \textbf{acts} on
the endomorphism group of every abelian group.  Compare the notion of
action defined in \S \ref{sect:actions}.  
In the notation of Theorem~\ref{thm:Z-action},
\begin{gather}\label{eqn:0x=0}
  0x=0,\\\notag
1x=x,\\\label{eqn:-x}
(-1)x=-x;
\end{gather}
here~\eqref{eqn:0x=0} is~\eqref{eqn:a^0} written additively; combining
it with~\eqref{eqn:0.x=0}, we have
\begin{equation*}
  0\cdot x=0x,
\end{equation*}
where the zeros come from the ring and from $\Z$ respectively.
More generally, we have

\begin{theorem}
For every integer $n$, every ring respects the identity
  \begin{equation*}
(nx)\cdot y=n(x\cdot y)=x\cdot ny.
  \end{equation*}
\end{theorem}

\begin{proof}
  Induction and~\eqref{eqn:-x}.
\end{proof}

\section{Associative, not-necessarily-unital rings}

Henceforth the word \emph{ring} means associative ring.
By
Theorem~\ref{thm:x-lambda_x}, a unital ring also acts on the
endomorphism group of the underlying abelian group.  We have in particular
\begin{equation*}
  1\cdot x=1x.
\end{equation*}

Again a ring is
\textbf{commutative}\index{commutative} if the multiplication is
commutative.
As examples of commutative rings with identity, we have $\Z$ and $\Zmod n$ (by~\ref{thm:Z-mod-n}); and if $R$ is a
commutative ring with identity, then $\MatR$ is a ring with
identity, by Theorem~\ref{thm:M}.
  The continuous functions on $\R$ with compact
  support compose a ring with respect to the operations induced from
  $\R$: this ring has no identity. 

The \textbf{characteristic}\index{characteristic} of a ring $(E,\cdot)$ is the non-negative
integer $n$ such that $\Zmod n$ is the kernel of the homomorphism
$n\mapsto(y\mapsto ny)$ from $\Z$ to $\End E$.  This kernel is the kernel
of $n\mapsto n1$, if $(E,\cdot)$ has an identity. 
For example,
If $0\leq n$, then  $\Zmod n$ has characteristic $n$.

\begin{theorem}
  Every ring embeds in a ring with identity having the same
  characteristic, and in a ring with identity having characteristic $0$.
\end{theorem}

\begin{proof}
 Suppose $R$ is a ring of characteristic $n$.  Let $A$ be $\Z$ or
 $\Zmod n$, and give $A\oplus R$ the multiplication defined by
 \begin{equation*}
   (m,x)(n,y)=(mn,my+nx+xy);
 \end{equation*}
then $(1,0)$ is an identity, and $x\mapsto(0,x)$ is an embedding.
\end{proof}

\section{Unital associative rings}

\emph{Henceforth in the word \emph{ring} means ring with
  identity,} as it did in \S \ref{sect:rings}.  
We know from
Theorem~\ref{thm:units} that a ring $R$ has a group of units, $\unit
R$.  The example in 
\S \ref{sect:prod-sum} shows that some ring elements can have right
inverses without being units.  However, if $a$ has both a left and a
right inverse, then they are the same, since if $ab=1=ca$, then
\begin{equation*}
c=c1=c(ab)=(ca)b=1b=b.
\end{equation*}
A \textbf{zero-divisor}\index{zero-divisor}\index{divisor!zero ---} of
$R$ is a element 
$b$ distinct from $0$ such
that the equations $bx=0$ and $yb=0$ are soluble in $R$.  So
zero-divisors are not units. 
For example, if $m>1$ and $n>1$, then $m+\gpgen {mn}$ and $n+\gpgen{mn}$ are
  zero-divisors in $\Zmod {mn}$.  
The unique element of the trivial ring $\Zmod 1$ is a unit, but not a
zero-divisor.

A commutative ring is an
\textbf{integral domain}\index{integral domain}\index{domain!integral ---}%
\index{ring|seealso{domain}} if it has no zero-divisors and $1\neq0$.
So fields are integral domains.
But $\Z$ is an integral domain that is not a field.
If $p$ is prime, then $\Zmod
  p$ is a field, denoted by $\F_p$. 

An arbitrary ring $R$ such that $R\setminus\unit R=\{0\}$ is a
\textbf{division ring.}\index{division ring}  So fields are division rings; but $\Ham$ is a
non-commutative division ring.

If $R$ is a ring, and $G$ is a group, we can form the direct sum
$\sum_{g\in G}R$, which is, first of all, an abelian group; we can
give it a multiplication as follows.  We write an element $(r_g\colon
g\in G)$ of the direct sum as
\begin{equation*}
  \sum_{g\in G}r_gg;
\end{equation*}
this is a \textbf{formal finite $R$-linear combination}\index{linear
  combination}  of the elements of
$G$.  Then multiplication is defined as one expects: if $r,s\in R$ and
$g,h\in G$, then
\begin{equation*}
  (rg)(sh)=(rs)(gh),
\end{equation*}
and the definition extends to all of $\sum_{g\in G}R$ by
distributivity.  The resulting ring can be denoted by
\begin{equation*}
  R(G);
\end{equation*}
it is the \textbf{group ring}\index{group ring} of $G$ over $R$.

We can do the same construction with monoids, rather than
groups.  For example, if we start with the free monoid generated by a
symbol $X$, we get a \textbf{polynomial ring}\index{polynomial ring}
in one variable, denoted by 
\begin{equation*}
  R[X];
\end{equation*}
this is the ring of formal $R$-linear combinations
\begin{equation*}
  \sum_{k=0}^na_kx^k,
\end{equation*}
where $n\in\upomega$ and $a_k\in R$.  We could use a second variable,
getting for example $R[X,Y]$.  Usually $R$ here is commutative and is
in particular a field.

\section{Ideals}

If $A$ is a sub-ring of $R$, then we can form the abelian group
$R/A$.  We could try to define a multiplication on this by
\begin{equation*}
  (x+A)(y+A)=xy+A.
\end{equation*}
However, if $x-x'\in A$, and $y-y'\in A$, we need not have $xy-x'y'\in
A$. 

A \textbf{left ideal}\index{left!--- ideal}\index{ideal!left ---}
of $R$ is a sub-ring $I$ such that
\begin{equation*}
  RI\included I,
\end{equation*}
that is, $rx\in I$ whenever $r\in R$ and $x\in I$.  Likewise,
\textbf{right}\index{right!--- ideal}\index{ideal!right ---} and
\textbf{two-sided}
\index{two-sided ideal}\index{ideal!two-sided ---}ideal. 
For example,
the set of matrices
\begin{equation*}
  \begin{bmatrix}
    * & 0 & \dots & 0\\
\vdots & \vdots & & \vdots\\
* & 0 & \dots & 0
  \end{bmatrix}
\end{equation*}
is a left ideal of $\MatR$, but not a right ideal unless $n=1$.
Also,  $Rx$ is a left ideal of $R$, while $RxR$ is a two-sided ideal.


\begin{theorem}
  If $I$ is a two-sided ideal of $R$, then $R/I$ is a well-defined
  ring.  The kernel of a ring-homomorphism is a two-sided ideal.
\end{theorem}

Suppose $(A_i\setcolon i\in I)$ is an indexed family of left ideals of
a ring $R$.  Let the abelian subgroup of $R$ generated by
$\bigcup_{i\in I}A_i$ be denoted by 
\begin{equation*}
  \sum_{i\in I}A_i;
\end{equation*}
this is the \textbf{sum}\index{sum} of the left ideals $A_i$.  This must not be
confused with the \emph{direct sums} defined in \S \ref{sect:prod-sum}.
If in particular $I=n$,
let the abelian subgroup of $R$ generated by
\begin{equation*}
  \{a_0\dotsm a_{n-1}\setcolon a_i\in A_i\}
\end{equation*}
be denoted by
\begin{equation*}
  A_0\dotsb A_{n-1};
\end{equation*}
this is the \textbf{product}\index{product} of the left ideals $A_i$.


\begin{theorem}
Sums and finite products of left ideals are left ideals; sums and
products of two-sided ideals are two-sided ideals.  Addition and
multiplication of ideals are associative; addition is commutative;
multiplication distributes over addition.
\end{theorem}

\begin{theorem}
  If $A$ and $B$ are left ideals of a ring, then so is $A\cap B$, and
  $AB\included A\cap B$.
\end{theorem}

Usually $AB$ does not include $A\cap B$, since for example $A^2$ might
not include $A$; such is the case when $A=2\Z$, since then $A^2=4\Z$.

\begin{theorem}
  If $f\colon R\to S$, a homomorphism of rings, and $I$ is a two-sided
  ideal of $R$
  included in $\Ker f$, then there is a unique homomorphism $\tilde
  f$ from $R/I$ to $S$ such that $f=\tilde f\circ\uppi$.
\end{theorem}

Hence the isomorphism theorems, as for groups.

\chapter{Commutative rings}

\section{Commutative rings}\label{sect:comm}

Henceforth, let all rings be commutative, so all ideals are two-sided.
A subset $A$ of a ring $R$ determines the ideal denoted by
\begin{equation*}
  (A),
\end{equation*}
namely the smallest ideal including $A$.  This consists of the
\textbf{$R$-linear combinations}\index{linear combination} of elements
of $A$, namely the well-defined sums
\begin{equation*}
\sum_{a\in
  A}r_aa, 
\end{equation*}
where $r_a\in R$; in particular, $r_a=0$ for all but finitely many
  $a$. 

If $A=\{a\}$, then $(A)$ is denoted by
\begin{equation*}
  (a)
\end{equation*}
or $Ra$
and is called a \textbf{principal
  ideal.}\index{principal!--- ideal}\index{ideal!principal ---}  A
\textbf{principal ideal domain}\index{principal!--- ideal
  domain}\index{domain!principal ideal ---} or
\pid\ is an integral domain whose 
every ideal 
is principal.
For example,
  $\Z$ is a \pid\ by Theorem~\ref{thm:Z-subg}.
But in the polynomial ring $\R[X,Y]$, the ideal $(X,Y)$ is not
  principal. 


An ideal is proper if and only if it does not contain a unit.
A \emph{proper} ideal $P$ is \textbf{prime}\index{prime} if
\begin{equation}\label{eqn:p-ideal}
  ab\in P\implies a\in P\lor b\in P.
\end{equation}
So a ring in which $1\neq0$ is an integral domain if and only if $(0)$
is a prime ideal. 
Compare the definition of prime ideal with the following: a positive
integer $p$ is prime if and only if
\begin{equation*}
  p\divides ab\implies p\divides a\lor p\divides b.
\end{equation*}
We shall address the relation between prime integers and prime ideals
in \S \ref{sect:factor}.  Meanwhile, an 
equivalent formulation of prime ideals is given by the following.

\begin{theorem}
  A proper ideal $P$ of a ring is prime if and only if, for all ideals $I$
  and $J$ of the ring,
  \begin{equation}\label{eqn:IJ}
    IJ\included P\iff I\included P\lor J\included P.
  \end{equation}
\end{theorem}

\begin{proof}
  The given condition has~\eqref{eqn:p-ideal} as a special case, since
  the latter can be written as
  \begin{equation*}
    (a)(b)\included P\implies(a)\included P\lor (b)\included P.
  \end{equation*}
Also, if~\eqref{eqn:IJ} fails, so that $IJ\included P$, but
$I\setminus P$ contains some $a$, and $J\setminus P$ contains some
$b$, then $ab\in P$, so~\eqref{eqn:p-ideal} fails.
\end{proof}


\begin{theorem}
  A proper ideal $P$ of a ring $R$ is prime if and only if $R/P$ is an
  integral domain.
\end{theorem}

\begin{proof}
  That $I$ is prime means~\eqref{eqn:p-ideal}, which can be written as
\begin{equation*}
  (a+I)(b+I)=I\implies a+I=I\lor b+I=I;
\end{equation*}
but this means $R/I$ is integral.
\end{proof}

An ideal is called \textbf{maximal}\index{maximal} if it is maximal as
a proper ideal. 
A ring is a field if and only if $(0)$ is a maximal ideal.  (Note that
$(0)$ is in fact the ideal with \emph{no} generators, so it could be
written as $(\ )$; but it usually is not.)

\begin{theorem}
  A proper ideal $I$ of a ring $R$ is maximal if and only if $R/I$ is
  a field.
\end{theorem}

\begin{proof}
That $R/I$ is a field means that, if $a\in R\setminus I$, then for
some $b$,
\begin{equation*}
  ab\in 1+I.
\end{equation*}
That $I$ is maximal means that, if $a\in R\setminus I$, then
\begin{equation*}
  I+(a)=R,
\end{equation*}
equivalently,
$1\in I+(a)$, which means that, for some $b$,
$ba-1\in I$.
\end{proof}

\begin{corollary}
  Maximal ideals are prime.
\end{corollary}

The converse fails easily, since
 the prime ideals of $\Z$ are the ideals $(0)$ and $(p)$, where $p$ is
 prime, and the latter are maximal, but $(0)$ is not.  However, it is
 not even the case that prime ideals other than $(0)$ are always maximal.
For example, $\R[X,Y]$ has the prime ideal $(X)$, which
is not maximal.


A ring is \textbf{Boolean}\index{Boolean} if it respects the identity
  \begin{equation*}
    x^2=x.
  \end{equation*}
For example, if $\Omega$ is a set, then $\pow{\Omega}$\label{pow} is a Boolean
ring, where multiplication is
intersection, and addition is the taking of \textbf{symmetric differences,}\index{symmetric difference} where the symmetric difference of $x$ and $y$ is $x\setminus
y)\cup(y\setminus x)$, denoted by $x\symdiff y$.    

  \begin{theorem}\label{thm:Boole}
In Boolean rings, all prime ideals are maximal.
  \end{theorem}

  \begin{proof}
    In a Boolean ring, we have $2x=(2x)^2=4x^2=4x$,
%$x+x=(x+x)^2=x^2+2x+x^2=x+2x+x$,
so 
\begin{equation*}
2x=0.  
\end{equation*}
(Thus nontrivial Boolean rings have characteristic $2$.)
Hence
\begin{equation*}
x(1+x)=x+x^2=x+x=0,
\end{equation*}
so $x$ is a zero-divisor unless it or $1+x$ is $0$, that is, unless
$x$ is $0$ or $1$.
Therefore there are no Boolean integral domains besides $\F_2$, which
is a field.
  \end{proof}

In $\Z$, the ideal $(a,b)$ is the principal ideal generated by
$\gcd(a,b)$.  So $a$ and $b$ are coprime if $(a,b)=\Z$.  This
condition can be written as $(a)+(b)=\Z$.  Then the following
generalizes Theorem~\ref{thm:CRT}.

\begin{theorem}[Chinese Remainder]\label{thm:CRT-R}\index{Chinese
    Remainder Theorem} 
  \index{theorem!Chinese Remainder Th---}
  Suppose $R$ has an indexed family $(I_i\colon i<n)$ of ideals such
  that $I_i+I_j=R$ in each case.  Let $I=\bigcap_{i<n}I_i$.  Then the
  monomorphism 
  \begin{equation}\label{eqn:xxx}
  x+I\mapsto(x+I_0,\dots,x+I_{n-1})
  \end{equation}
  from $R/I$ to $\sum_{i<n}R/I_i$
  is an isomorphism.
\end{theorem}

\begin{proof}
We proceed by induction.  The claim is trivially true when $n=1$.
Proving the inductive step reduces to the proving the claim when $n=2$.
In that case, we have $a_0+a_1=1$ for some
$a_0$ in $I_0$ and $a_1$ in $I_1$.  Then
\begin{align*}
  a_0&\equiv 1\pmod{I_1},&a_0&\equiv0\pmod{I_0},
\end{align*}
and similarly for $a_1$.  Therefore
\begin{align*}
  a_0x_0+a_1x_1&\equiv x_0\pmod{I_0},&
  a_0x_0+a_1x_1&\equiv x_1\pmod{I_1}.
\end{align*}
Thus $(x_0+I_0,x_1+I_1)$ is in the image of the map in~\eqref{eqn:xxx}.
\end{proof}

\section{Factorization}\label{sect:factor}

(Recall that all rings are now commutative with identity.)  In a ring 
$R$, an element $a$ is a \textbf{divisor}\index{divisor} of $b$, or
$a$ \textbf{divides}\index{divides} 
$b$, and we write
\begin{equation*}
  a\divides b,
\end{equation*}
if $ax=b$ for some $x$ in $R$.  Two elements that divide each other
are \textbf{associates.}\index{associates}

\begin{theorem}
  In any ring:
  \begin{compactenum}
    \item
$a\divides b \iff (b)\included (a)$;
\item
$a$ and $b$ are associates if and only if $(a)=(b)$.
  \end{compactenum}
Suppose $a=bx$.
\begin{compactenum}\setcounter{enumi}{2}
\item
If $x$ is a unit, then $a$ and $b$ are associates.
\item
If $b$ is a zero-divisor or $0$, then so is $a$.
\item
If $a$ is a unit, then so is $b$.
  \end{compactenum}
\end{theorem}

For example, in $\Zmod 6$, the elements $1$ and $5$ are units; the
other non-zero elements are zero-divisors.  Of these, $2$ and $4$ are
associates, since 
\begin{align}\label{eqn:2.2=4}
2\cdot 2&\equiv4,&4\cdot 2&\equiv 2\pmod 6; 
\end{align}
but $3$
is not an associate of these.  

In $\Z$, a \textbf{prime number} can be defined as a positive number
$p$ with either of two properties: 
\begin{compactenum}
\item\label{item:p=ab}
if $p=ab$, then one of $a$ and $b$ is $\pm 1$;
\item\label{item:p|ab}
if $p\divides ab$, then $p\divides a$ or $p\divides b$.
\end{compactenum}
Easily~\eqref{item:p|ab} implies~\eqref{item:p=ab}, since if $p=ab$, then
$p\divides ab$, so that, if also $p\divides b$, then, since $b\divides
p$, we have $b=\pm p$, so $a=\pm 1$.
Conversely,~\eqref{item:p=ab} implies~\eqref{item:p|ab}, with more
difficulty.  Indeed,
property~\eqref{item:p=ab} implies that, if $p\ndivides a$, then
$\gcd(p,a)=1$, so $px+ay=1$ for some $x$ and $y$.  If also
$p\divides ab$, but $p\ndivides a$, then, since $b=pbx+aby$, we have
$p\divides b$.

We let~\eqref{item:p|ab} be the defining property of \emph{primes;}
and~\eqref{item:p=ab}, \emph{irreducibles.}
More precisely,
an element of a ring is \textbf{irreducible}\index{irreducible} if
it is not a unit or $0$, and 
its only divisors are associates and units.  So the element is
irreducible just in case the ideal it generates is maximal amongst the
proper principal ideals.

For example,
  in $\R[X,Y]$, the element $X$ is irreducible, although $(X)$ is not
  a maximal ideal.  However, if $(X)\included(f(X,Y))\pincluded
  \R[X,Y]$, then $f(X,Y)$ must be constant in $Y$, and then it must
  have degree $1$ in $X$, and then its constant term must be $0$; so
  $f(X,Y)$ is just $aX$ for some $a$ in $\unit{\R}$.

An element of a ring is \textbf{prime}\index{prime} if it is not $0$
and the ideal that it generates is prime in the sense of
\S \ref{sect:comm}.

For example:
\begin{asparaenum}[1.]
\item
The primes of $\Z$ are the integers $\pm p$, where
$p$ is a prime 
  natural number, and these are just the irreducibles of $\Z$. 
\item
In $\Z/6\Z$, the element $2$ is prime.  Indeed, the multiples of $2$
are $0$, $2$, and $4$, so the non-multiples are $1$, $3$, and $5$, and
the product of no two of these is a multiple of $2$.  Similarly, $4$
is prime.  However, $2$ and $4$ are not irreducible, by~\eqref{eqn:2.2=4}. 
\item
In $\C$ we have
\begin{equation}\label{eqn:236}
2\cdot 3=(1+\sqrt{-5})(1-\sqrt{-5}),
\end{equation}
so, because the factors $2$, $3$, and $1\pm\sqrt{-5}$ are all irreducible in the smallest sub-ring of $\C$ that contains $\sqrt{-5}$, those factors cannot be prime in that ring.  Details are worked out in the next section.
\end{asparaenum}

\section{Some algebraic number theory}\label{sect:ant}

Suppose $d$ is a \textbf{squarefree} integer, that is, an integer
different from $1$ that is not
divisible by the square of a prime number.   
The subset $\{x+y\sqrt d\colon x,y\in\Q\}$ of $\C$ is a field, denoted by
\begin{equation*}
\Q(\sqrt d).
\end{equation*}
Define
\begin{equation*}
\tau_d=\begin{cases}
\sqrt d,&\text{if }d\not\equiv 1\pmod 4,\\
\displaystyle\frac{1+\sqrt d}2,&\text{ if }d\equiv 1\pmod 4.
\end{cases}
\end{equation*}
The abelian subgroup $\gpgen{1,\upomega}$ of $\Q(\sqrt d)$ is a sub-ring, denoted by
\begin{equation*}
\Z[\tau_d].
\end{equation*}
\begin{theorem}
The elements of $\Z[\tau_d]$ are precisely the solutions in
$\Q(\sqrt d)$ of an equation  
\begin{equation*}\label{eqn:x^2}
x^2+bx+c=0, 
\end{equation*}
where $b$ and $c$ are in $\Z$.
\end{theorem}

\begin{proof}
From school the solutions of~\eqref{eqn:x^2} are
\begin{equation*}
x=\frac{-b\pm\oldsqrt{b^2-4c}}2.
\end{equation*}
Suppose one of these is in $\Q(\sqrt d)$.  Then $b^2-4c=a^2d$ for some $a$ in $\Z$, so that
\begin{equation*}
x=\frac{-b\pm a\sqrt d}2.
\end{equation*}
If $b$ is odd, then $b^2-4c\equiv1\pmod 4$, so $a$ must be odd and $d\equiv1\pmod 4$.  If $b$ is even, then $b^2-4c\equiv0\pmod 4$, so $a$ is even.
This establishes $x\in\Z[\tau_d]$ in all cases.

Conversely, suppose $x=k+n\tau_d$ for some $k$ and $n$ in $\Z$.  If $d\equiv1\pmod 4$, then 
\begin{gather*}
2x-2k-n=n\sqrt d,\\
4x^2-4(2k+n)x+(2k+n)^2=n^2d,\\
x^2-(2k+n)x+k^2+kn+n^2\frac{1-d}4=0,
\end{gather*}
while if $d\not\equiv1\pmod 4$, then
\begin{equation*}
x^2-2kx+k^2-n^2d=0.
\end{equation*}
In either case, $x\in\Z[\tau_d]$.
\end{proof}

The elements of $\Z[\tau_d]$ are therefore called the \textbf{integers} of $\Q(\sqrt d)$.  Since $\Z[\tau_d]\cap\Q=\Z$, we may refer to the elements of $\Z$ as \textbf{rational integers.}
We have for example~\eqref{eqn:236} in $\Z[\tau_{-5}]$;
to show that $2$, $3$ and $1\pm\tau_{-5}$ are
irreducible in this ring, we define, in the general case, the operation $z\mapsto z'$ on
$\Q(\sqrt d)$ by 
\begin{equation*}
(x+y\sqrt d)'=x-y\sqrt d.
\end{equation*}
This is an \emph{automorphism} of $\Q(\sqrt d)$.  (It is the
restriction of complex conjugation, if $d<0$.)
Then we define a \textbf{norm} function $N$ from $\Q(\sqrt d)$ to $\Q$ by
\begin{equation*}
N(z)=zz'.
\end{equation*}
Then $N$ is multiplicative, that is, 
\begin{equation*}
N(\alpha\beta)=N(\alpha)N(\beta).  
\end{equation*}
Also,
\begin{equation*}
N(x+\tau_d y)=\begin{cases}
x^2-dy^2,&\text{ if }d\not\equiv1\pmod 4,\\
x^2+xy+\displaystyle\frac{1-d}4y^2,&\text{ if }d\equiv 1\pmod 4,
\end{cases}
\end{equation*}
so $N$ maps $\Z[\tau_d]$ into $\Z$.  If $d<0$, then it maps $\Z[\tau_d]$ into $\N$.
Let us restrict our attention to this case.
Here, $\alpha$ is a unit
in $\Z[\tau_d]$ if and only if $N(\alpha)=1$.  Therefore $\alpha$ in $\Z[\tau_d]$ is
irreducible if 
and only if it has no divisor $\beta$ such that
$1<N(\beta)<N(\alpha)$.  In case $d=-5$ we have
\begin{equation}\label{eqn:N23}
  \begin{array}{c||c|c|c}
    x&2&3&1\pm\tau_{-5}\\\hline
N(x)&4&9&6
  \end{array}.
\end{equation}
Since no elements of $Z[\tau_{-5}]$ have
norm $2$ or $3$, the elements $2$, $3$, and $1\pm\tau_{-5}$ are
irreducible. 

But they are not prime.  Indeed, if $\alpha\divides\beta$, then
$N(\alpha)\divides N(\beta)$; but no norm in~\eqref{eqn:N23} divides
another.  This is where \emph{ideals} come up.  There are 
factorizations of the relevant ideals:
\begin{equation}\label{eqn:factors}
\begin{gathered}
  (2)=(2,1+\tau_{-5})^2,\\
(3)=(3,1+\tau_{-5})(3,1-\tau_{-5}),\\
  (1+\tau_{-5})=(2,1+\tau_{-5})(3,1+\tau_{-5}),\\
  (1-\tau_{-5})=(2,1+\tau_{-5})(3,1-\tau_{-5}).
\end{gathered}
\end{equation}
For example,
\begin{equation*}
(2,1+\tau_{-5})(2,1+\tau_{-5})=(2,1+\tau_{-5})(2,1-\tau_{-5})=(4,2+2\tau_{-5},6)=(2).
\end{equation*}
The right-hand members of~\eqref{eqn:factors} are in fact prime factorizations.  To see this, we first note that, being a subgroup of $\gpgen{1,\tau_d}$ on more than one generator, an ideal $I$ of $\Z[\tau_d]$ can be written as
$\gpgen{a+b\tau_d,c+d\tau_d}$, where 
\begin{equation*}
\begin{pmatrix}
a&b\\
c&d
\end{pmatrix}
\in\MatZ[2]\cap\GL[2]{\Q}.
\end{equation*}
Multiplication on the left by a matrix in $\GL[2]{\Z}$ does not change the ideal.  Hence we can define
  \begin{equation*}
    N(I)=\abs{\det
    \begin{pmatrix}
      a&b\\
c&d
    \end{pmatrix}},
  \end{equation*}
  which is in $\N$.
In case $d<0$, this agrees with the function $N$ defined above in the sense that
$N((\alpha))=N(\alpha)$, because
\begin{equation*}
(a+b\tau_d)\gpgen{1,\tau_d}=\gpgen{a+b\tau_d,db+a\tau_d}.
\end{equation*}
Moreover, if
 $I\pincluded J\pincluded\Z[\tau_d]$, then $N(J)\divides N(I)$ and $N(I)>N(J)>1$.  
In case $d=-5$, we compute
\begin{gather*}
  (2,1+\tau_{-5})=\gpgen{2,2\tau_{-5},1+\tau_{-5},\tau_{-5}-5}
=\gpgen{2,1+\tau_{-5}},\\
(3,1\pm\tau_{-5})=\gpgen{3,3\tau_{-5},1\pm\tau_{-5},\tau_{-5}\mp 5}
=\gpgen{3,1\pm\tau_{-5}},
\end{gather*}
hence
\begin{equation*}
  \begin{array}{c||c|c}
    I&(2,1+\tau_{-5})&(3,1\pm\tau_{-5})\\\hline
N(I)&2&3
  \end{array}.
\end{equation*}
So these ideals are maximal, hence prime.
Ideals of the rings $\Z[\tau_d]$ were originally called \textbf{ideal numbers.}


\section{Integral domains}\label{sect:int-dom}

\begin{theorem}
In an integral domain, if $a$ and $b$ are non-zero associates, and
  $a=bx$, then 
  $x$ is a unit.
\end{theorem}

\begin{proof}
  We have also $b=ay=bxy$, $b(1-xy)=0$, $1=xy$ since $b\neq0$ and we
  are in an integral domain. 
\end{proof}

\begin{corollary}
  In an integral domain, prime elements are irreducible.
\end{corollary}

\begin{proof}
  If $p$ is prime, and $p=ab$, then $p$ is an associate of $a$ or $b$,
  so the other is a unit.
\end{proof}

A \textbf{unique factorization domain}\index{unique factorization
  domain}\index{domain!unique 
  factorization ---} or UFD is an integral domain whose every
non-zero element is `unique\-ly' a product of irreducibles.  This means that, if
\begin{equation*}
\prod_{i<n}\pi_i=\prod_{i<n'}\pi_i',
\end{equation*}
where the $\pi_i$ and $\pi_i'$ are irreducible, then $n=n'$, and (perhaps after re-indexing) $\pi_i$ and $\pi_i'$ are associates.  Hence:

\begin{theorem}
In a UFD, irreducibles are prime.  \hfill\qedsymbol
\end{theorem}

In any ring, a \textbf{greatest common divisor}\index{greatest common
  divisor}\index{divisor!greatest common ---} of elements $a$ and $b$ is an element of the set of all divisors of $a$ and $b$ that is a maximum with respect to dividing: that is, it is some $c$ such that $c\divides a$ and $c\divides b$, and for all $x$, if $x\divides a$ and $x\divides b$, then $x\divides c$.  There can be more than one greatest common divisor, but they are all associates.  Every element is a greatest common divisor of itself and $0$.

\begin{theorem}
In a UFD, any two elements have a greatest common divisor.
\end{theorem}

\begin{proof}
If they are nonzero, we can write the elements as 
\begin{align*}
  &u\prod_{i<n}\pi_i{}^{a(i)},&&v\prod_{i<n}\pi_i{}^{b(i)},
\end{align*}
where $u$ and $v$ are units and the $\pi_i$ are irreducibles; a greatest common divisor is then
\begin{equation*}
  \prod_{i<n}\pi_i{}^{\min(a(i),b(i))}.\qedhere
\end{equation*}
\end{proof}

In a PID, more is true:

\begin{theorem}
In a PID, any two elements have a greatest common divisor, which is some linear combination of those elements.
\end{theorem}

\begin{proof}
If $(a,b)=(c)$, then $c$ is a greatest common divisor of $a$ and $b$, and $c=ax=by$ for some $x$ and $y$ in the ring.
\end{proof}

\begin{lemma}
  In a PID, irreducibles are prime.
\end{lemma}

\begin{proof}
  Suppose the irreducible $\pi$ divides $ab$ but not $a$.  Then a
  greatest common divisor of $\pi$ and $a$ is $1$; hence $\pi x+ay=1$ for some $x$
  and $y$ in the ring.  Then $b=\pi xb+ aby$, and $\pi$ divides each summand, so
  $\pi\divides b$.
\end{proof}

\begin{lemma}
  In a PID, irreducible factorizations are unique.
\end{lemma}

A ring is \textbf{Noetherian}\index{Noetherian ring} if every strictly ascending
chain of ideals is finite.

\begin{theorem}
  PIDs are Noetherian.
\end{theorem}

\begin{proof}
  If $I_0\included I_1\included\dotsb$, then $\bigcup_{i\in\upomega}I_i$
  is an ideal $(a)$; then $a\in I_n$ for some $n$, so the chain cannot grow beyond $I_n$. 
\end{proof}

\begin{lemma}
  In a PID, every element is a product of irreducibles.
\end{lemma}

\begin{proof}
  A tree of factorizations has no infinite branches.  More precisely, let $a$ be an element of a PID.  For certain finite binary sequences $\sigma$, we define $a_{\sigma}$ thus: $a_{()}=a$, and if $a_{(e(0),\dots,e(n-1))}$ can be factorized as $bc$, where neither $b$ nor $c$ is a unit, then let
  $a_{(e(0),\dots,e(n-1),0)}=b$ and
  $a_{(e(0),\dots,e(n-1),1)}=c$; otherwise these are undefined.  Then every branch of the tree corresponds to a chain
  \begin{equation*}
(a_{()})\pincluded (a_{(e(0))})\pincluded (a_{(e(0),e(1))})\pincluded (a_{(e(0),e(1),e(2))})\pincluded\cdots,
\end{equation*}
so it must be finite.  Therefore the whole tree is finite, and $a$ is the product of the irreducibles found at the end of each branch.
\end{proof}

\begin{theorem}\label{thm:PID-->UFD}
A PID is a UFD.\hfill\qedsymbol
\end{theorem}

Recall how the Euclidean algorithm for finding greatest common divisors works.  To find $\gcd(201,27)$, compute:
\begin{align*}
201&=87\cdot 2+27,\\
87&=27\cdot3+6,\\
27&=6\cdot 4+3,\\
6&=3\cdot 2.
\end{align*}
So $\gcd(201,27)=3$.  In general, if $a_0\geq a_1>0$, then $\gcd(a_0,a_1)=a_n$, where there is a descending sequence $(a_0,\dots a_n)$ of positive integers such that $a_{k+2}=a_{k+1}\cdot b_k+a_k$ for some $b_k$.
A \textbf{Euclidean domain}\index{Euclidean domain}\index{domain!Euclidean ---} is then an integral domain in which the Euclidean algorithm works.  More precisely, a Euclidean domain is a domain $R$ equipped with a map $\phi$ from $R\setminus\{0\}$ to $\upomega$ such that,
and, for all $a$ and $b$ in $R\setminus\{0\}$, one of the following holds:
\begin{itemize}
\item
there exist $q$ in $R$ and $r$ in $R\setminus\{0\}$ such that $a=qb+r$ and $\phi(r)<\phi(b)$, or
\item
$b\divides a$ and $\phi(b)\leq\phi(a)$.
\end{itemize}

For example:
\begin{asparaenum}
\item
$\Z$ is Euclidean with respect to $x\mapsto\abs x$;
\item
a field, $x\mapsto 0$;
\item
a polynomial-ring $K[X]$ over a field $K$, $f\mapsto\deg f$ (see \S \ref{sect:fact-pol}).
\end{asparaenum}
  
  The \textbf{Gaussian integers}\index{Gaussian integer} are the elements of $\Z[\tau_{-1}]$, where $\tau_{-1}=\sqrt{-1}=\mi$ as in \S \ref{sect:ant}.
 This domain is
  Euclidean with respect to the norm function, namely $z\mapsto\abs z^2$, where $\abs{x+y\mi}^2=x^2+y^2$.  Indeed, if $a$ and $b$ are nonzero Gaussian integers, then there is a Gaussian integer $q$ such that $\abs{a/b-q}\leq\sqrt 2/2$.  Let $r=a-bq$; then $\abs r^2=\abs b^2\cdot\abs{a/b-q}^2\leq\abs b^2/2$.

\begin{theorem}
Euclidean domains are \pid s.
\end{theorem}

\begin{proof}
  An ideal of a Euclidean domain is generated by any non-zero element
  $x$ such that $\phi(x)$ is minimal. 
\end{proof}

\section{Localization}\label{sect:loc}

A subset of a ring is \textbf{multiplicative}\index{multiplicative} if it is closed
under multiplication.  For example,
  the complement of a prime ideal is multiplicative.

\begin{lemma}
If $S$ is a multiplicative subset of a ring $R$, then on $R\times S$
there is an equivalence-relation $\sim$ given by
\begin{equation}\label{eqn:q}
  (a,b)\sim (c,d)\iff (ad-bc)\cdot e=0\text{ for some $e$ in }S.
\end{equation}
\end{lemma}

\begin{proof}
Reflexivity and symmetry are obvious.  For transitivity, note that, if $(a,b)\sim(c,d)$ and $(c,d)\sim(e,f)$, so that, for some $g$ and $h$ in $S$,
\begin{align*}
0&=(ad-bc)g=adg-bcg,&0&=(cf-de)h=cfh-deh,
\end{align*}
then 
\begin{align*}
(af-be)cdgh
&=afcdgh-becdgh\\
&=adgcfh-bcgdeh
=bcgcfh-bcgcfh=0,
\end{align*}
so $(a,b)\sim(e,f)$.
\end{proof}

In the notation of the lemma, the equivalence-class of $(a,b)$ is denoted by
\begin{equation*}
  \frac ab,
\end{equation*}
and the quotient $R\times S\modsim$ is denoted by
\begin{equation*}
  S\inv R.
\end{equation*}
If $R$ is an integral domain, and $0\notin S$, then~\eqref{eqn:q} can be simply
\begin{equation*}
  (a,b)\sim (c,d)\iff ad-bc=0.
\end{equation*}
If $0\in S$, then $S\inv R$ has a unique element.  An instance where $R$ is not an integral domain will be considered in the next section.

\begin{theorem}\label{thm:loc}
  Suppose $R$ is a ring with multiplicative subset $S$.
  \begin{compactenum}
  \item
  In $S\inv R$, if $c\in S$,
  \begin{equation*}
\frac ab=\frac{ac}{bc}.
\end{equation*}
  \item
  $S\inv R$ is a ring
in which the operations are given by
\begin{align*}
  \frac ab\cdot\frac cd&=\frac{ac}{bd},&
  \frac ab\pm\frac cd&=\frac{ad\pm bc}{bd}.  
\end{align*}
  \item
  There is a ring-homomorphism $\phi$ from $R$ to $S\inv R$ where, for every $a$ in $S$,
  \begin{equation*}
\phi(x)=\frac{xa}a.
\end{equation*}
\newcounter{local}
\setcounter{local}{\value{enumi}}
 \end{compactenum}
   Suppose in particular $R$ is an integral
domain and $0\notin S$.  
\begin{compactenum}\setcounter{enumi}{\value{local}}
\item
$S\inv R$ is an integral domain, and the homomorphism $\phi$ is an embedding.
\item
If $S=R\setminus\{0\}$, then $S\inv R$ is a field, and if
If $\psi$ is an embedding of $R$ in a field $K$, then there is an embedding $\tilde{\psi}$ of $S\inv R$ in $K$ such that $\tilde{\psi}\circ\phi=\psi$.
\end{compactenum}
\end{theorem}

In the most important case, $S$ is the complement of a prime ideal
$\primei$, and then $S\inv R$ is called the \textbf{localization}\index{local!---ization} of $R$ at $\primei$,  denoted by
\begin{equation*}
  R_{\primei}.
\end{equation*}
If $R$ is an integral domain, so that $(0)$ is prime, then $R_{(0)}$ (which is a field by the theorem)
is the
\textbf{quotient-field}%
\index{quotient!--- field}\index{field!quotient ---} of $R$. 
A \textbf{local ring}\index{ring!local ---}\index{local!--- ring}
is a ring with a unique 
maximal ideal.  The connection between localizations and local rings is made by the theorem below.

\begin{lemma}
An ideal $\maxi$ of a ring $R$ is a unique maximal ideal of $R$
if and only if $\unit R=R\setminus\maxi$.
\end{lemma}

\begin{theorem}
  The localization of a ring at a prime ideal is a local ring.
\end{theorem}

\begin{proof}
  The ideal generated by the image of $\primei$ in $R_{\primei}$
  consists of those $a/b$ such that $a\in\primei$.  In this case, if
  $c/d=a/b$, then 
  $cb=da\in\primei$, so $c\in\primei$ since $\primei$ is prime.  Hence
  the following are equivalent:
  \begin{compactenum}
    \item
$x/y\notin R_{\primei}\primei$;
\item
$x\notin\primei$;
\item
$x/y$ has an inverse, namely $y/x$.
  \end{compactenum}
By the lemma, we are done.
\end{proof}

\section{Ultraproducts of fields}

Suppose $\family K$ is an indexed family $(K_i\colon i\in A)$ of fields.
If $a\in\prod\family K$, there is an element $a^*$ of $\prod K$ given by
\begin{equation*}
\pi_i(a^*)=\begin{cases}
\pi_i(a)\inv,&\text{ if }\pi_i(a)\neq0,\\
0,&\text{ if }\pi_i(a)=0.
\end{cases}
\end{equation*}
Then
\begin{equation*}
aa^*a=a.
\end{equation*}
Because of this, $\prod\family K$ is an example of a \textbf{regular ring} (in the sense of von Neumann).\footnote{In general, a regular ring need not be commutative; see \cite[IX.3, ex.~5, p.~442]{MR600654}.}

\begin{theorem}
In a regular ring, all prime ideals are maximal.
\end{theorem}

\begin{proof}
Let $R$ be a regular integral domain.  If $a\in R\setminus\{0\}$, then, since
\begin{equation*}
0=aa^*a-a=a(a^*a-1),
\end{equation*}
we have $a^*a=1$.  Thus $R$ is a field.
\end{proof}

\begin{theorem}
If $\primei$ is a prime ideal of a regular ring $R$, then
\begin{equation*}
R/\primei\cong R_{\primei},
\end{equation*}
the isomorphism being $x+\primei\mapsto x/1$.
\end{theorem}

\begin{proof}
If $a\in R$ and $b\in R\setminus\primei$, then $a/b=ab^*/1$ since
\begin{equation*}
(a-bab^*)b=ab-abb^*b=ab-ab=0.
\end{equation*}
Thus the homomorphism $x\mapsto x/1$ guaranteed by Theorem~\ref{thm:loc} is surjective.
We also have $a/1=0/1$ if and only if $ab=0$ for some $b$ in $R\setminus\primei$; but the latter implies $ab\in\primei$, so $a\in \primei$ since the ideal is prime.  Conversely, if $a\in\primei$, then $a^*a\in\primei$, so $a^*a-1\notin\primei$ since the ideal is proper; but $a(a^*a-1)=0$, so $a/1=0/1$.  Therefore the kernel of the homomorphism is $\primei$.
\end{proof}

With $\family K$ as above,
there is a one-to-one correspondence between ideals of $\prod\family K$ and ideals of the Boolean ring $\pow A$.  To define this correspondence, we first define the \textbf{support} of an element $a$ of $\prod\family K$ to be the set of those $i$ in $A$ such that $\pi_i(a)\neq0$.  We may denote this set by $\supp a$.  Then
\begin{align*}
\supp{ab}&=\supp a\cap\supp b,&
\supp{a+b}&\included\supp a\cup\supp b.
\end{align*}
So $x\mapsto\supp x$ is not quite a ring-homomorphism from $\prod\family K$ to $\pow A$.
However,
if $I$ is an ideal of $\prod\family K$, then $\Supp I$ is an ideal of $\pow A$.
Indeed, for every subset $B$ of $A$, there is an element $e_B$ of $\prod\family K$ given by
\begin{equation*}
\pi_i(e_B)=\begin{cases}
1,&\text{ if }i\in B,\\
0,&\text{ if }i\notin B.
\end{cases}
\end{equation*}
Then $\supp{e_B}=B$.
If $a\in\prod\family K$, and $B=\supp a$, then $e_B=aa^*$.  If, further, $a\in I$,
and $C\included B$, then $e_C=e_Caa^*$, so this is in $I$ and therefore $C\in\Supp I$.  Also,
if $B$ and $C$ are in $\Supp I$, then $B\symdiff C=\supp{e_B-e_C}$, which is in $\Supp I$.  So $\Supp I$ is indeed an ideal of $\pow A$.  If $J$ is an ideal of $\pow A$, then $J=\Supp I$, where $I$ is the ideal of $\prod\family K$ generated by those $e_B$ such that $B\in J$. Since every ideal $I$ is generated by those $e_B$ such that $B\in\Supp I$,
we conclude that $\phi$ is the claimed bijection.

Let $\primei$ be a prime ideal of $\prod\family K$.  Then the quotient $\prod\family K/\primei$ is a field, called an \textbf{ultraproduct} of $\family K$.
Now, $\primei$ could be principal, in which case $\phi(\primei)$ would be principal; but since it is also maximal, it would have a set $A\setminus\{i\}$ as a generator.  In this case $\prod\family K/\primei\cong K_i$.

However, $\pow A$ has the ideal $I$ consisting of the the finite subsets of $A$.  If $A$ itself is infinite, then $I$ is a proper ideal.  In this case, if $I\included\Supp{\primei}$, then $\primei$ is not principal, and the field $\prod\family K/\primei$ is called a \textbf{nonprincipal ultraproduct} of $\family K$.  This is a sort of `average' of the $K_i$.  In particular, we have
\begin{align*}
a\equiv b\pmod{\primei}
&\iff a-b\in\primei\\
&\iff \supp{a-b}\in\Supp{\primei}\\
&\iff \{i\in A\colon \pi_i(a)\neq\pi_i(b)\}\in\Supp{\primei}.
\end{align*}
We may think of the elements of $\Supp{\primei}$ as `small' sets; their complements are `large'.  (Then every subset of $A$ is small or large.)  So all finite subsets of $A$ are small, and all cofinite subsets of $A$ are large.  Then elements of $\prod\family K$ represent the same element in the ultraproduct if they agree on a large set.

Say for example $A$ is the set of prime numbers in $\upomega$, along with $0$, and each $K_p$ has characteristic $p$.  Then $\prod\family K/\primei$ has characteristic $0$, since for each prime $p$, the element $p1$ of $\prod\family K$ disagrees with $0$ on a large set.

The proof that nonprincipal ultraproducts exist uses the Axiom of Choice.\label{ac-up}

\section{Factorization of polynomials}\label{sect:fact-pol}

\begin{theorem}
If $R$ is a ring, then $R[X_0,\dots,X_{n-1}]$ is the unique ring-extension $A$ of $R$ such
  that, for all rings $S$, and all homomorphisms $\phi$ from $R$ to
  $S$, and all $\vec a$ in 
  $S^n$, there is a unique homomorphism $\tilde{\phi}$ from $A$ to $S$
  such that $\tilde{\phi}|_R=\phi$ and $\tilde{\phi}(X^i)=a^i$ in each
  case. 
\end{theorem}
An arbitrary element of $R[X]$ can be written
\begin{equation*}
  \sum_{i\leq n}a_iX^i;
\end{equation*}
the \textbf{degree}\index{degree} of this is $n$, if $a_n\neq0$; then
$a_n$ is the 
\textbf{leading coefficient}\index{leading coefficient} of the polynomial. 

We said in \S \ref{sect:int-dom} that $K[X]$ is a Euclidean domain when equipped with
$\deg$.  More generally:

\begin{lemma}
If $f$ and $g$ are polynomials over $R$, then:
\begin{itemize}
  \item
$\deg (f+g)\leq\max(\deg f, \deg g)$;
\item
$\deg (f\cdot g)\leq \deg f+\deg g$, with equality if the product of
  the leading coefficients is not $0$.
\end{itemize}
In particular,
if $R$ is an integral domain, then so is $R[X]$.
\end{lemma}

\begin{proof}
  The leading coefficient of a product is the product of the leading
  coefficients. 
\end{proof}

\begin{lemma}[Division Algorithm]\index{Division Algorithm}
  \index{theorem!Division Algorithm} \index{algorithm!Division A---}
  If $f$ and $g$ are polynomials in $X$ over $R$, and the leading coefficient
  of $g$ is $1$, then
  \begin{equation*}
    f=qg+r
  \end{equation*}
for some unique $q$ and $r$ in $R[X]$ such that $\deg r<\deg g$.
\end{lemma}

\begin{proof}
  If $\deg g\leq \deg f$, and $a$ is the leading coefficient of $f$,
  then
  \begin{equation*}
    f=aX^{\deg f-\deg g}\cdot g + (f-aX^{\deg f-\deg g}\cdot g), 
  \end{equation*}
the second term having degree less than $f$.  Continue as necessary. 
\end{proof}

\begin{lemma}[Remainder Theorem]\index{Remainder Theorem}
  \index{theorem!Remainder Th---}
If $c\in R$, then any $f$ in $R[X]$ can be written uniquely as
$q(X)\cdot (X-c)+f(c)$.  
\end{lemma}

\begin{proof}
By the Division Algorithm,
  $f=q(X)\cdot (X-c)+d$ for some $d$ in $R$; letting $X$ be $c$ yields
  the claim.
\end{proof}

\begin{theorem}
A ring-element  $c$ is a zero of a polynomial $f$ if and only if
$(X-c)\divides f$.  If $f$ is over an integral domain, then the number
of its distinct zeros is at most $\deg f$.
\end{theorem}

\begin{proof}
By the Remainder Theorem, $c$ is a zero of $f$ if and only if $f=q(X)\cdot(X-c)$ for some $q$.  In this case, if the ring is an integral domain, and $d$ is another zero of $f$, then, since $d-c\neq0$, we must have that $d$ is a zero of $q$.  Hence,
if $\deg(f)=n$, and $f$ has the distinct zeros $r_0$, \dots, $r_{n-1}$, then repeated application of the Remainder Theorem yields
\begin{equation*}
f=(X-r_0)\dotsm(X-r_{m-1}).
\end{equation*}
Then every zero of $f$ is a zero of one of the $X-r_k$, so it must be $r_k$.
\end{proof}

Recall however from the proof of Theorem~\ref{thm:Boole} that every element of a Boolean ring is a zero of $X(1+X)$, that is, $X+X^2$; but some Boolean rings have more than two elemments.  In $\Zmod 6$, the same polynomial has the zeros $0$, $2$, $3$, and $5$.

\begin{theorem}
  If $K$ is a field, then $K[X]$ is a Euclidean domain whose units are
  precisely the elements of $K$.
\end{theorem}

\begin{proof}
Over a field, the Division Algorithm does not require the leading coefficient of the divisor to be $1$.
\end{proof}

A zero $c$ of a polynomial over an integral domain has \textbf{multiplicity} $m$ if the polynomial can be written as $g(X)\cdot(X-c)^m$, where $c$ is not a zero of $g$.  A zero with multiplicity greater than $1$ is \textbf{multiple.}
Derivations were defined in \S \ref{sect:nna-rings}; they will be useful for recognizing the existence of multiple roots.

\begin{lemma}
  If $\delta$ is a derivation of a ring $R$, then for all $x$ in $R$ and $n$ in $\upomega$,
  \begin{equation*}  
   \delta(x^n)=nx^{n-1}\delta(x).
   \end{equation*}
\end{lemma}

\begin{proof}
Since $\delta(1)=\delta(1\cdot1)=\delta(1)\cdot 1+1\cdot\delta(1)=2\cdot\delta(1)$, we have $\delta(1)=0$, so the claim holds when $n=0$.  If it holds when $n=k$, then
\begin{equation*}
\delta(x^{k+1})=\delta(x)x^k+x\delta(x^k)=\delta(x)x^k+kx^k\delta(x)=(k+1)x^k\delta(x),
\end{equation*}
so the claim holds when $n=k+1$.
\end{proof}

\begin{theorem}
  On a polynomial ring $R[X]$, there is a
  unique derivation $f\mapsto
  f'$ such that 
  \begin{compactenum}
  \item
  $X'=1$,
  \item
   $c'=0$ for all $c$ in $R$.
   \end{compactenum}
   This derivation is given by
   \begin{equation}\label{eqn:der}
\Bigl(\sum_{k=0}^na_kX^k\Bigr)'=
\sum_{k=0}^{n-1}(k+1)a_{k+1}X^k.
\end{equation}
\end{theorem}

\begin{proof}
Uniqueness and~\eqref{eqn:der} follow from the lemma and the definition of a derivation.
  If $\delta$ is a derivation, then $\delta(x\cdot
  (y+z))=\delta(xy+xz)$.  
  Also,~\eqref{eqn:der} does define an endomorphism of the underlying group of $R[X]$ that meets the given conditions.  
Because
  \begin{align*}
(X^k)'(X^{\ell})+X^k(X^{\ell})'
&=kX^{k-1}X^{\ell}+\ell X^kX^{\ell-1}\\
&=(k+\ell)X^{k+\ell+1}\\
&=(X^{k+\ell})',
\end{align*}
the additive endomorphism $f\mapsto f'$ of $R[X]$ is a derivation.
\end{proof}

In the notation of the theorem, $f'$ is the \textbf{derivative} of $f$.

\begin{lemma}
  Say $R$ is an integral domain, $f\in R[X]$ and $f(c)=0$.  Then $c$
  is a multiple zero of $f$ if and only if $f'(c)=0$.
\end{lemma}

\begin{proof}
  Write $f$ as $(X-c)^m\cdot g$, where $g(c)\neq0$.  Then $m\geq1$, so 
  \begin{equation*}
    f'=m(X-c)^{m-1}\cdot g+(X-c)^m\cdot g'.
  \end{equation*}
If $m>1$, then $f'(c)=0$.  If $f'(c)=0$, then $m\cdot 0^{m-1}\cdot
g(c)=0$, so $m>1$.
\end{proof}

If $L$ is a field with subfield $K$, then a polynomial over $K$ may be irreducible over $K$, but not over $L$.  For example, $X^2+1$ is irreducible over $\R$, but not over $\C$.  Likewise, the polynomial may have zeros from $L$, but not $K$.  Hence it makes sense to speak of zeros of an irreducible polynomial.

\begin{theorem}
Supppose $K$ is a field and $f\in K[X]$.  
  \begin{compactenum}
  \item
  If $\gcd(f,f')=1$, then $f$ has
  no multiple zeros.
  \item
  If $f$ is irreducible, then $\gcd(f,f')$ is $1$ or $0$.
  \item
  If $\gcd(f,f')=0$, then $K$ has a positive characteristic $p$, and $f=g(X^p)$ for some polynomial $g$ over $K$.
  \end{compactenum}
\end{theorem}

\begin{proof}
If $\gcd(f,f')=1$, then $1=g\cdot f+h\cdot f'$ for some polynomials $g$ and $h$, so $f$
  and $f'$ can have no common zero.  Since $\deg(f')<\deg(f)$ by~\eqref{eqn:der}, if $f$ is irreducible and $\gcd(f,f')\neq1$, then $\gcd(f,f')=0$.  The rest also follows from~\eqref{eqn:der}.
\end{proof}

A polynomial over a UFD is \textbf{primitive} if $1$ is a greatest common divisor of its coefficients.

\begin{lemma}[Gauss]
The product of primitive polynomials is primitive.
\end{lemma}

\begin{proof}
Let $f=\sum_{k=0}^ma_kX^k$ and $g=\sum_{k=0}^nb_kX^k$.  Then
\begin{equation*}
fg=\sum_{k=0}^{mn}c_kX^k,
\end{equation*}
where
\begin{equation*}
c_k=\sum_{i+j=k}a_ib_j=a_0b_k+a_1b_{k-1}+\dotsb+a_kb_0.
\end{equation*}
Suppose the $c_k$ have a common prime factor $\pi$, but $f$ is primitive.  There is some $\ell$ such that $\pi\divides a_i$ when $i<\ell$, but $\pi\ndivides a_{\ell}$.  Since $\pi\divides c_{\ell}$, we have $\pi\divides b_0$; then, since $\pi\divides e_{\ell+1}$, we have $\pi\divides b_1$, and so on.  So $g$ is not primitive.
\end{proof}

Henceforth let $R$ be a UFD with quotient field $K$.


\begin{lemma}
Primitive polynomials over $R$ that are associated over $K$ are associated over~$R$.
\end{lemma}

\begin{proof}
If $f$ and $g$ are polynomials defined over $R$, but associated over $K$, then they must have the same degree, and so we have $af=bg$ for some $a$ and $b$ in $R$.  If $f$ and $g$ are primitive, then $a$ and $b$ must be associates, so $b=ua$ for some unit in $R$, and then $f=ug$, so $f$ and $g$ are associates.
\end{proof}

\begin{lemma}
Primitive polynomials over $R$ are irreducible over $R$ if and only if irreducible over~$K$. 
\end{lemma}

\begin{proof}
Say $f$ and $g$ are defined over $K$, but $fg$ is over $R$ and primitive.  Then $af$ and $bg$ are over $R$ and primitive for some $a$ and $b$ in $R$.  By a previous lemmma, $abfg$ is primitive; but so is $fg$, so $ab$ must be a unit in $R$.  Hence $a$ and $b$ are units in $R$, so $f$ and $g$ are over $R$.  Since units of $R[X]$ are units of $K[X]$, it follows that a primitive polynomial irreducible polynomial over $R$ is still irreducible over $K$.  Also, any non-unit factor of a \emph{primitive} polynomial over $R$ is still not a unit over $K$, so the polynomial is reducible over $K$.
\end{proof}

Note however that if $f$ is primitive and irreducible over $R$, and $a$ in $R$ is not a unit or $0$, then $af$ is still irreducible over $K$ (since $a$ is a unit in $K$) but not over $R$.

\begin{theorem}
$R[X]$ is a UFD.
\end{theorem}

\begin{proof}
Every element of $R[X]$ can be written as $af$, where $a\in R$ and $f$ is primitive.  Then $f$ has a prime factorization over $K$ (since $K[X]$ is a Euclidean domain): say $f=f_0\dotsm f_{n-1}$.  There are $b_k$ in $R$ such that $a_kf_k$ is a primitive polynomial over $R$.  The product of these is still primitive, so the product of the $a_k$ must be a unit in $R$, hence each $a_k$ is a unit in $R$.  Thus $f$ has an irreducible factorization over $R$.  Its uniqueness follows from its uniqueness over $K$ and the next-to-last lemma.
\end{proof}

\begin{theorem}[Eisenstein's Criterion]
If $f$ is a polynomial $\sum_{k=0}^na_kX^k$ over $R$, and $\pi$ is an irreducible element of $R$ such that
\begin{align*}
\pi^2&\ndivides a_0,&
\pi&\divides a_0,&
\pi&\divides a_1,&
&\dots,&
\pi&\divides a_{n-1},&
\pi\ndivides a_n,
\end{align*}
then $f$ is irreducible over $K$ and, if primitive, over $R$.
\end{theorem}

\begin{proof}
Suppose $f=gh$, where $g=\sum_{k=0}^nb_kX^k$ and $h=\sum_{k=0}^nc_kX^k$, all coefficients from $R$ (and some being $0$).  We may assume $f$ is primitive, so $g$ and $h$ must be primitive.  We may assume $\pi$ divides $b_0$, but not $c_0$.  Let $\ell$ be such that $\pi\divides b_k$ when $k<\ell$.  If $\ell=n$, then (since $g$ is primitive) we must have $b_n\neq0$, so $\deg g=n$, and $h=c_0$ and is a unit.  If $\ell<n$, then, since $\pi\divides a_{\ell}$, but
\begin{equation*}
a_{\ell}=b_0c_{\ell}+b_1c_{\ell-1}+\dotsb+b_{\ell}c_0,
\end{equation*}
we have $\pi\divides b_{\ell}$.  By induction, $\pi\divides b_k$ whenever $k<n$, so as before $\deg g=n$.
\end{proof}

An application is the following.

\begin{theorem}
If $p$ is prime, then $\sum_{k=0}^{p-1}X^k$ is irreducible.
\end{theorem}

\begin{proof}
Consider
\begin{equation*}
\sum_{k=0}^{p-1}(X+1)^k
=\sum_{k=0}^{p-1}\sum_{j=0}^k\binom kjX^j
=\sum_{j=0}^{p-1}X^j\sum_{k=j}^{p-1}\binom kj
=\sum_{j=0}^{p-1}X^j\binom p{j+1},
\end{equation*}
which meets the Eisenstein Criterion since
\begin{align*}
\binom p1&=p,&
\binom p{j+1}&=\frac{p!}{(p-j-1)!(j+1)!},
\end{align*}
which is divisible by $p$ if and only if $j<p-1$.
\end{proof}


%\newpage
%\part{Extras}%{Appendices}

%\part*{Appendices}

\appendix

\chapter{The German script}\label{app:German}

In his encyclopedic \emph{Model Theory} of 1993, Wilfrid Hodges observes \cite[Ch.~1, p.~21]{MR94e:03002}:
\begin{quotation}
  Until about a dozen years ago, most model theorists named structures
  in horrible Fraktur lettering.  Recent writers sometimes adopt a
  notation according to which all structures are named $M$, $M'$,
  $M^*$, $\bar M$, $M_0$, $M_i$ or occasionally $N$.  
I hope I cause no offence by using a more freewheeling notation.
\end{quotation}
For Hodges, \emph{structures} (as defined in \S\ref{sect:structures} above) are denoted by the letters $A$, $B$, $C$, and so forth; he refers to
their universes as
\textbf{domains}\index{domains}
and denotes these by $\operatorname{dom}(A)$ and so forth.  
%\begin{comment}
This practice is
convenient if one is using a typewriter (as in the preparation of
another of Hodges's books \cite{Hodges-Building}, from 1985).
In his \emph{Model Theory:  An Introduction} of 2002, David Marker \cite{MR1924282} uses `calligraphic' letters to denote structures, as distinct from their universes:
so $M$ is the universe of~$\mathcal M$, and $N$ of $\mathcal N$.
%\end{comment}
I still prefer the older practice of using capital Fraktur letters for structures:
%In \AmS\ \LaTeX\ (by which these notes are typeset) these letters are:
\begin{equation*}
\begin{array}{*{13}{c}}
\mathfrak A&\mathfrak B&\mathfrak C&\mathfrak D&\mathfrak E&\mathfrak F&\mathfrak G&\mathfrak H&\mathfrak I&\mathfrak J&\mathfrak K&\mathfrak L&\mathfrak M\\\mathfrak N&\mathfrak O&\mathfrak P&\mathfrak Q&\mathfrak R&\mathfrak S&\mathfrak T&\mathfrak U&\mathfrak V&\mathfrak W&\mathfrak X&\mathfrak Y&\mathfrak Z
  \end{array}
\end{equation*}
For the record, here are the minuscule Fraktur letters, which are also occasionally useful:
\begin{equation*}
\begin{array}{*{13}{c}}
\mathfrak a&\mathfrak b&\mathfrak c&\mathfrak d&\mathfrak e&\mathfrak f&\mathfrak g&\mathfrak h&\mathfrak i&\mathfrak j&\mathfrak k&\mathfrak l&\mathfrak m\\\mathfrak n&\mathfrak o&\mathfrak p&\mathfrak q&\mathfrak r&\mathfrak s&\mathfrak t&\mathfrak u&\mathfrak v&\mathfrak w&\mathfrak x&\mathfrak y&\mathfrak z
  \end{array}
\end{equation*}
A way to write these letters by hand is seen in a textbook
on the German language from 1931 \cite{German}:
%\vfill

\begin{figure}[p]
  \begin{sideways}
\centering
%\includegraphics[width=417pt,height=292pt]{german-script-cropped.eps}
\includegraphics%[width=1\textwidth]%[width=350pt]%
{../german-script-cropped.eps}
  \end{sideways}
%\caption{Alman el yaz\i s\i}%\label{fig:German}
\end{figure}

\chapter{Group-actions}\label{App:ga}

This chapter is a suggested reference from page~\pageref{app-ref}.
The chapter is partially inspired by an expository article
\cite{MR1997347} by Serre.
Suppose a group $G$ acts on a set $\setactedon$ by $(g,x)\mapsto gx$.
Just as, for an
element $a$ of $\setactedon$, we define
\begin{equation*}
  G_a=\{g\in G\colon ga=a\},
\end{equation*}
so, for an element $g$ of $G$, we may define
\begin{equation*}
  \setactedon^g=\{x\in \setactedon\colon gx=x\}:
\end{equation*}
this is the set of \textbf{fixed points}\index{fixed point} of $g$.
The orbit of $a$ under the action of $G$ is defined by
\begin{equation*}
  Ga=\{ga\setcolon g\in G\}.
\end{equation*}
Then $ga=ha\iff gG_a=hG_a$, and therefore
\begin{equation*}
  \size{Ga}=[G:G_a],
\end{equation*}
and the sets $Ga$ partition $G$.  We may define
\begin{equation*}
  {\setactedon}/G=\{Gx\setcolon x\in {\setactedon}\}.
\end{equation*}
Assume $G$ is finite.
For any function $\phi$ from $G$ to $\R$ and subset $X$ of $G$, we
define 
\begin{align*}
  \int_X\phi&=\sum_{g\in X}\frac{\phi(g)}{\size
  G},&
\int\phi&=\int_G\phi. 
\end{align*}
Assume ${\setactedon}$ is also finite, and
let $\chi$ be the function
\begin{equation*}
  g\mapsto\size{{\setactedon}^g}
\end{equation*}
from $G$ to $\upomega$.

\begin{lemma}[Burnside]\index{Burnside Lemma} \index{theorem!Burnside
    Lemma} 
  $\size{{\setactedon}/G}=\int\chi$.
\end{lemma}

\begin{proof}
Letting $R=\{(g,x)\in G\times\setactedon\colon gx=x\}$, we define $\pi_G$ as
$(g,x)\mapsto g$ from $R$ to $G$, and $\pi_{\setactedon}$ as
$(g,x)\mapsto x$ from $R$ to $\setactedon$.
Then
\begin{equation*}
  \size R=\sum_{g\in G}\size{\pi_G{}\inv(g)}=\sum_{g\in G}\chi(g),
\end{equation*}
but also
\begin{equation*}
\size{R}
=\sum_{x\in {\setactedon}}\size{G_x}
= \sum_{C\in {\setactedon}/G}\sum_{x\in C}\size{G_x}.
\end{equation*}
But if $C\in {\setactedon}/G$ and $a\in C$, then $C=[G:G_a]$.  Hence
\begin{equation*}
 \sum_{C\in {\setactedon}/G}\sum_{x\in C}\size{G_x}
=  \sum_{C\in {\setactedon}/G}\sum_{x\in C}\frac{\size G}{\size C}
=  \sum_{C\in {\setactedon}/G}\size G
=\size{{\setactedon}/G}\cdot\size G.\qedhere
\end{equation*}
\end{proof}
Now define
\begin{equation*}
  G_0=\{g\in G\setcolon {\setactedon}^g=\emptyset\},
\end{equation*}
the set of elements of $G$ with no fixed points.

\begin{theorem}[Jordan]\index{Jordan Theorem} \index{theorem!Jordan Th---}
  If $\size{{\setactedon}/G}=1$ and $\size {\setactedon}\geq 2$, then 
  \begin{equation*}
      G_0\neq\emptyset.
  \end{equation*}
\end{theorem}

\begin{proof}
  By the Burnside Lemma, the average size of ${\setactedon}^g$ is $1$.  Since
  ${\setactedon}^1={\setactedon}$, and $\size {\setactedon}\geq 2$, we must have $\size {\setactedon}^g<1$ for some
  $g$ in $G$.
\end{proof}

A stronger result is the following:

\begin{theorem}[Cameron--Cohen]\index{Cameron--Cohen Theorem}
  If $\size{{\setactedon}/G}=1$ and $\size {\setactedon}\geq 2$, then 
  \begin{equation*}
    \size{G_0}\cdot\size
  {\setactedon}\geq\size G.
  \end{equation*}
\end{theorem}

\begin{proof}
  The action of $G$ on ${\setactedon}$ induces an action on ${\setactedon}\times {\setactedon}$, and
  $\size{({\setactedon}\times {\setactedon})^g}=\chi(g)^2$.  Now, $({\setactedon}\times {\setactedon})/G$ contains the
  diagonal $G(1,1)$ and at least one other element, so
  \begin{equation*}
    \int\chi^2\geq 2
  \end{equation*}
by Burnside's Lemma.  Let $n=\size {\setactedon}$.  Then for all $g$
in $G\setminus G_0$, we have $1\leq\chi(g)\leq n$ and therefore
\begin{equation*}
(\chi(g)-1)(\chi(g)-n)\leq0;
\end{equation*}
but $(\chi(g)-1)(\chi(g)-n)=n$ when $g\in G_0$.
Consequently,
\begin{align*}
  \frac{\size{G_0}\cdot\size {\setactedon}}{\size G}=n\int_{G_0}1
  &=     \int_{G_0}(\chi-1)(\chi-n) \\
  &\geq  \int_G(\chi-1)(\chi-n)=
  \int_G(\chi^2-1)\geq1.\qedhere 
\end{align*}
\end{proof}

Serre's article gives applications to topology and number-theory.

%\bibliographystyle{plain}
%\bibliography{../../references}
%\bibliography{../references}

\begin{thebibliography}{10}

\bibitem{MR0103812}
Chen~Chung Chang.
\newblock On unions of chains of models.
\newblock {\em Proc. Amer. Math. Soc.}, 10:120--127, 1959.

\bibitem{MR0159773}
Richard Dedekind.
\newblock {\em Essays on the theory of numbers. {I}: {C}ontinuity and
  irrational numbers. {II}: {T}he nature and meaning of numbers}.
\newblock authorized translation by Wooster Woodruff Beman. Dover Publications
  Inc., New York, 1963.

\bibitem{MR1487370}
Joel~David Hamkins.
\newblock Every group has a terminating transfinite automorphism tower.
\newblock {\em Proc. Amer. Math. Soc.}, 126(11):3223--3226, 1998.

\bibitem{German}
Roe-Merrill~S. Heffner.
\newblock {\em Brief {G}erman Grammar}.
\newblock D. C. Heath and Company, Boston, 1931.

\bibitem{MR0120156}
Leon Henkin.
\newblock On mathematical induction.
\newblock {\em Amer. Math. Monthly}, 67:323--338, 1960.

\bibitem{MR94e:03002}
Wilfrid Hodges.
\newblock {\em Model theory}, volume~42 of {\em Encyclopedia of Mathematics and
  its Applications}.
\newblock Cambridge University Press, Cambridge, 1993.

\bibitem{Hodges-Building}
Wilfrid Hodges.
\newblock {\em Building models by games}.
\newblock Dover Publications, Mineola, New York, 2006.
\newblock original publication, 1985.

\bibitem{MR600654}
Thomas~W. Hungerford.
\newblock {\em Algebra}, volume~73 of {\em Graduate Texts in Mathematics}.
\newblock Springer-Verlag, New York, 1980.
\newblock Reprint of the 1974 original.

\bibitem{MR0472307}
Morris Kline.
\newblock {\em Mathematical thought from ancient to modern times}.
\newblock Oxford University Press, New York, 1972.

\bibitem{MR12:397m}
Edmund Landau.
\newblock {\em Foundations of Analysis. {T}he Arithmetic of Whole, Rational,
  Irrational and Complex Numbers}.
\newblock Chelsea Publishing Company, New York, N.Y., third edition, 1966.
\newblock translated by F. Steinhardt; first edition 1951; first German
  publication, 1929.

\bibitem{MR0089813}
Jerzy {\L}o{\'s} and Roman Suszko.
\newblock On the extending of models ({IV}): {I}nfinite sums of models.
\newblock {\em Fund. Math.}, 44:52--60, 1957.

\bibitem{MR1924282}
David Marker.
\newblock {\em Model theory: an introduction}, volume 217 of {\em Graduate
  Texts in Mathematics}.
\newblock Springer-Verlag, New York, 2002.

\bibitem{MR0098777}
James~H. McKay.
\newblock Another proof of {C}auchy's group theorem.
\newblock {\em Amer. Math. Monthly}, 66:119, 1959.

\bibitem{Peano}
Giuseppe Peano.
\newblock The principles of arithmetic, presented by a new method (1889).
\newblock In van Heijenoort \cite{MR1890980}, pages 83--97.

\bibitem{Russell-letter}
Bertrand Russell.
\newblock Letter to {F}rege (1902).
\newblock In van Heijenoort \cite{MR1890980}, pages 124--5.

\bibitem{MR1997347}
Jean-Pierre Serre.
\newblock On a theorem of {J}ordan.
\newblock {\em Bull. Amer. Math. Soc. (N.S.)}, 40(4):429--440 (electronic),
  2003.

\bibitem{Skolem-some-remarks}
Thoralf Skolem.
\newblock Some remarks on axiomatized set theory (1922).
\newblock In van Heijenoort \cite{MR1890980}, pages 290--301.

\bibitem{MR801316}
Simon Thomas.
\newblock The automorphism tower problem.
\newblock {\em Proc. Amer. Math. Soc.}, 95(2):166--168, 1985.

\bibitem{MR1890980}
Jean van Heijenoort, editor.
\newblock {\em From {F}rege to {G}\"odel: {A} source book in mathematical
  logic, 1879--1931}.
\newblock Harvard University Press, Cambridge, MA, 2002.

\bibitem{von-Neumann-ax}
John von Neumann.
\newblock An axiomatization of set theory (1925).
\newblock In van Heijenoort \cite{MR1890980}, pages 393--413.

\bibitem{von-Neumann}
John von Neumann.
\newblock On the introduction of transfinite numbers (1923).
\newblock In van Heijenoort \cite{MR1890980}, pages 346--354.

\bibitem{Zermelo-invest}
Ernst Zermelo.
\newblock Investigations in the foundations of set theory {I} (1908a).
\newblock In van Heijenoort \cite{MR1890980}, pages 199--215.

\end{thebibliography}


%\printindex

\end{document}