\documentclass[%
version=last,%
a5paper,
12pt,%
headings=small,%
bibliography=totoc,%
twoside,%
reqno,%
cleardoublepage=empty,%
open=any,%
%parskip=half,%
draft=true,%
%DIV=classic,%
%DIV=11,%
DIV=12,%
headinclude=false,%
pagesize]
{scrbook}

%\usepackage[notref,notcite]{showkeys}

\usepackage{scrpage2}
\pagestyle{scrheadings}
\clearscrheadings
\ofoot{\pagemark}
\ifoot{\headmark}
%\cehead{Ultraproducts}

%\addtocounter{tocdepth}{-1}

%\usepackage[turkish]{babel}
%\usepackage[utf8]{inputenc}
%\usepackage[latin5]{inputenc}

\usepackage{hfoldsty}
\usepackage[neverdecrease]{paralist}
\usepackage{cclicenses}

\usepackage{relsize} % Here \smaller scales by 1/1.2; \relscale{X} scales by X

\renewenvironment{quote}{\begin{list}{}
{\relscale{.90}\setlength{\leftmargin}{0.05\textwidth}
\setlength{\rightmargin}{\leftmargin}}
\item[]}
{\end{list}}



\usepackage{verbatim}


%\begin{comment}
  
\usepackage{float}
\floatstyle{boxed}
\restylefloat{figure}
% This makes ``Figure x.y.'' bold and adds a colon
%\end{comment}

\renewcommand{\captionformat}{\ }  % doesn't work with the previous.

\newcommand{\enquote}[1]{``#1''}

\usepackage{amssymb, amsmath,
  amsthm,url,upgreek,bm,mathrsfs,stmaryrd,pstricks,pst-node,pst-tree,pst-plot}
\allowdisplaybreaks
\usepackage[all]{xy}

\newcommand{\sgn}{\operatorname{sgn}}
\newcommand{\Sym}[1]{\operatorname{Sym}(#1)}  % symmetric group
\newcommand{\Alt}[1]{\operatorname{Alt}(#1)}       % alternating group
\newcommand{\id}[1]{\operatorname{id}_{#1}}
\newcommand{\gid}{\operatorname e}  % identity of group
\newcommand{\modsim}{/\mathord{\sim}}  % modulo the eq-ren \sim
\newcommand{\simcl}{^{\sim}}  % \sim-class
\newcommand{\eqc}[1]{[#1]}             % equivalence class
\newcommand{\trivgp}{\{\gid\}}  % trivial group
\newcommand{\subgp}{<}              % subgroup
\newcommand{\nsubgp}{\vartriangleleft}  % normal subgroup
\newcommand{\Ker}[1]{\ker(#1)}
\newcommand{\im}[1]{\operatorname{im}(#1)}
%\newcommand{\pid}{\textsc{pid}}

\newcommand{\End}[1]{\operatorname{End}(#1)}
\newcommand{\Z}{\mathbb Z}
\newcommand{\N}{\mathbb N}
\newcommand{\R}{\mathbb R}
\newcommand{\Q}{\mathbb Q}
\newcommand{\C}{\mathbb C}
\newcommand{\F}{\mathbb F}
\newcommand{\Qp}{\Q^+}         % positive rationals
\newcommand{\rc}[1]{#1^{\mathrm{rc}}}         % real closure
\newcommand{\Rp}{\R^+}         % positive real numbers

\newcommand{\zfc}{\mathrm{ZFC}}
\newcommand{\zf}{\mathrm{ZF}}
\newcommand{\ac}{\marginline{\textbf{AC}}}
\newcommand{\PI}{\marginline{\textbf{PI}}}
\newcommand{\win}{\mathrel{\text{in}}}
\newcommand{\It}{\mathbf{I}}
\newcommand{\on}{\mathbf{ON}}       % ordinals
\newcommand{\cn}{\mathbf{CN}}       % cardinals

\newcommand{\var}[1]{\operatorname Z(#1)}

\newcommand{\U}[1]{\operatorname U_{#1}}
\DeclareMathOperator{\Zlop}{Z}
\newcommand{\V}[2][L]{\Zlop_{#1}(#2)}
\newcommand{\I}[2][K]{\operatorname I_{#1}(#2)}
\newcommand{\zk}[2][K]{\overline{#2}^{#1}}
%\newcommand{\tp}[1]{\operatorname{tp}(#1)}
\newcommand{\Exists}[1]{\exists#1\;}
\newcommand{\Forall}[1]{\forall#1\;}
%\newcommand{\Or}{\lor}
\newcommand{\Or}{\DOTSB\;\mathbin{\textsc{or}}\;}
\renewcommand{\And}{\DOTSB\;\mathbin{\&}\;}
\newcommand{\lto}{\Rightarrow}
\newcommand{\liff}{\Leftrightarrow}
\newcommand{\vrbl}[1]{\operatorname{var}(#1)}
\newcommand{\fv}[1]{\operatorname{fv}(#1)}

\newcommand{\card}[1]{\lvert#1\rvert}  % cardinality
%\newcommand{\abs}[1]{\lvert#1\rvert}
\newcommand{\Abs}[1]{\operatorname{Abs}(#1)}  % absolute numbers
\newcommand{\primei}{\mathfrak{p}}      % a prime ideal
\newcommand{\maxi}{\mathfrak{m}}        % a maximal ideal
\newcommand{\supp}[1]{\operatorname{supp}(#1)}
\newcommand{\Supp}[1]{\operatorname{supp}[#1]}

\newcommand{\sig}[1][S]{\mathscr{#1}}

\newcommand{\Str}[1][\sig]{\mathbf{Str}_{#1}}
\newcommand{\Mod}[1]{\mathbf{Mod}(#1)}
\DeclareMathOperator{\sentences}{Sen}
\newcommand{\Sn}[1][\sig]{\sentences(#1)}
%\newcommand{\qfsn}[1][\sig]{\sentences^0(#1)} % quantifier-free sentences
\DeclareMathOperator{\formulas}{Fm}
\newcommand{\Fm}[2][n]{\formulas_{#1}(#2)}
\newcommand{\Th}[1]{\operatorname{Th}(#1)}
\newcommand{\bv}[2][\mathscr A]{\lVert#2\rVert_{#1}} % Boolean value

\newcommand{\mts}{\mathbf{Mod}/\mathord{\equiv}}
\newcommand{\SnC}{\operatorname{Sn}}
\newcommand{\ThC}[1][\mathcal K]{\operatorname{Th}(#1)}
\newcommand{\StC}[1][\SnC]{\operatorname S(#1)}


\newcommand{\Def}[1][n]{\operatorname{Def}^{#1}_B(\str A)}
\newcommand{\proves}{\vdash}
\newcommand{\nproves}{\nvdash}
\newcommand{\divides}{\mathrel{\mid}}
\newcommand{\ndivides}{\mathrel{\nmid}}

%\newcommand{\indexset}{\Omega}
\newcommand{\ideal}{I}
%\newcommand{\LT}[2]{\operatorname{Lin}_{#1}(#2)}
\newcommand{\Lin}[2][0]{\operatorname{Lin}_{#1}(#2)}
\newcommand{\St}[2][]{\operatorname S_{#1}(#2)} % S_0: complete theories

\newcommand{\included}{\subseteq}
\newcommand{\nincluded}{\nsubseteq}
\newcommand{\includes}{\supseteq}
\newcommand{\pincluded}{\subset}
\newcommand{\pincludes}{\supset}
\newcommand{\fsub}{\included_{\mathrm{fin}}}

\newcommand{\units}[1]{{#1}^{\times}}
\newcommand{\Zmod}[1]{\Z_{#1}}
\newcommand{\pow}[1]{\mathscr P(#1)}
\newcommand{\powf}[1]{\mathscr P_{\upomega}(#1)}
\newcommand{\symdiff}{\vartriangle}
\newcommand{\comp}{^{\mathrm c}}
\newcommand{\alg}[1]{{#1}^{\mathrm{alg}}}
\newcommand{\sep}{^{\mathrm{sep}}}
\newcommand{\str}[1]{\mathfrak{#1}}
\newcommand{\inv}{^{-1}}
%\newcommand{\rad}{\sqrt{\vphantom{I}}}
%\newcommand{\rad}{\surd}
\newcommand{\spec}[1][R]{\operatorname{Spec}(#1)}
\newcommand{\Stone}[1]{\operatorname{Sto}(#1)}  % Stone space of #1

\newcommand{\Oplain}{\mathscr O}
\newcommand{\Oh}[1]{\Oplain(#1)}
\newcommand{\Gal}[1]{\operatorname{Gal}(#1)}
\newcommand{\Fix}[1]{\operatorname{Fix}(#1)}
\newcommand{\pc}[1][R]{#1^{+}}
\newcommand{\st}[1]{{}^*\!#1}
\newcommand{\Br}[1]{\operatorname B(#1)}
\newcommand{\Aut}[1]{\operatorname{Aut}(#1)}
\newcommand{\Frob}{\operatorname{Frob}}
\newcommand{\gpgen}[1]{\langle#1\rangle}
\newcommand{\Char}[1]{\operatorname{char}(#1)}
\newcommand{\diag}[1]{\operatorname{diag}(#1)}
\newcommand{\lcm}{\operatorname{lcm}}

\newcommand{\nmodels}{\nvDash}
\renewcommand{\models}{\vDash}

\renewcommand{\phi}{\varphi}
\renewcommand{\theta}{\vartheta}
\renewcommand{\epsilon}{\varepsilon}
\let\oldvec\vec
\renewcommand{\vec}[1]{\bm{#1}}
\renewcommand{\setminus}{\smallsetminus}
\renewcommand{\emptyset}{\varnothing}
\let\oldleq\leq
\renewcommand{\leq}{\leqslant}
\renewcommand{\nleq}{\nleqslant}
\renewcommand{\geq}{\geqslant}

%\renewcommand{\theequation}{\fnsymbol{equation}}

%\newtheorem*{specialthm}{}
\newtheorem*{zornslemma}{Zorn's Lemma}
\newtheorem*{mith}{Maximal Ideal Theorem}
\newtheorem*{pith}{Prime Ideal Theorem}
\newtheorem{axiom}{Axiom}
\newtheorem{lemma}{Lemma}%[section]
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}{Corollary}
\numberwithin{corollary}{theorem}

\newtheorem{porism}{Porism}
\numberwithin{porism}{theorem}

\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{fact}[theorem]{Fact}
\newtheorem{question}{Question}
%\newtheorem{problem}{Problem}
\theoremstyle{definition}
\newtheorem*{definition}{Definition}

\begin{document}
\title{Ultraproducts}
\author{David Pierce}
\date{September 15, 2014\\
reformated July 23, 2015\\
and again November 11, 2016\\
and again (slightly) January 10, 2018}
\publishers{Mimar Sinan G\"uzel Sanatlar \"Universitesi\\
Matematik B\"ol\"um\"u\\
\url{http://mat.msgsu.edu.tr/~dpierce/}}
\uppertitleback{\centering
\emph{Ultraproducts}\\
\mbox{}\\
This work is licensed under the\\
 Creative Commons Attribution--Noncommercial--Share-Alike
License.\\
 To view a copy of this license, visit\\
  \url{http://creativecommons.org/licenses/by-nc-sa/3.0/}\\
\mbox{}\\
\cc \ccby David Pierce \ccnc \ccsa\\
\mbox{}\\
Mathematics Department\\
Mimar Sinan Fine Arts University\\
Istanbul, Turkey\\
\url{http://mat.msgsu.edu.tr/~dpierce/}\\
\url{dpierce@msgsu.edu.tr}
}
%\frontmatter
\maketitle

%\mainmatter

\addchap{Preface}

In preparing the first edition of this text,
I tried to write down everything that I might talk about
in an upcoming course on ultraproducts.  
I had no clear plan for a coherent whole.
From my records,
here is a summary of the six days of the course 
(August 13--19, 2012, Monday to Sunday, with Thursday off, 
8--10 o'clock in the morning):
\begin{compactenum}
  \item
$\R^{\upomega}/M$.
\item
The ordering of $\R^{\upomega}/M$; bad statement of \L o\'s's Theorem.
\item
Better statement of \L o\'s's Theorem; proof.
\item
Compactness.
\item
Voting (Arrow's Theorem).
\item
The ultraproduct scheme.
\end{compactenum}

Later I edited the text
for my own use in a two-week course, in July, 2014.
For the sake of completeness, at least,
I incorporated more background.
In the fall of 2013, I had taught a graduate course on groups and rings,
and I thoroughly edited my notes for \emph{that} course;
then I took sections from those notes to add to the present ones.

I added and rearranged a lot.
I worked out quite generally
the notion of a Galois correspondence
and its relation to topology.
I also investigated the Axiom of Choice
and distinguished the results that need it
from those that need only the Prime Ideal Theorem.
Some of this work would be relevant to
a talk on the Compactness Theorem of logic
given at the Caucasian Mathematics Conference,
Tbilisi, September 5--6, 2014,
and then again at a tutorial on the Compactness Theorem
given June 20--1, 2015,
at the 5th World Congress and School on Universal Logic,
Istanbul.

I have not properly revisited the text since 2014.
It is still quite rough.
It uses more field theory than it actually develops.
It grew so long that to read it straight through,
checking for coherence, would be difficult.
I have not done this.
I \emph{did} try to add many cross-references.

\addchap{Preface to the first edition}

These notes are for a course called Ultraproducts
and Their Consequences, to be given at the Nesin Mathematics Village in
\c Sirince,
Sel\c cuk, \.Izmir, Turkey, in August, 2012.  The notes are mainly for my use;
they do not constitute a textbook, although parts of them
may have been written in textbook style.  The notes have not been
thoroughly checked for correctness; writing the notes has been my own
way of learning some topics.

The notes have grown like a
balloon, at all points: I have added things here and there as I have
seen that they
are needed or useful.  I have also rearranged sections.  There is too
much material here for a week-long course.  Some of the material is
background necessary for thorough consideration of some topics; this
background may be covered in a simultaneous course in \c Sirince.   

The catalogue listing for the
course\footnote{From
  \url{http://matematikkoyu.org/etkinlikler/2012-tmd-lisans-lisansustu/ultra_pierce.pdf},
  to which there is a link on
  \url{http://matematikkoyu.org/etkinlikler/2012-tmd-lisans-lisansustu/}
  as of August 6, 2012.}(with abstract as submitted by me on January
27, 2012) is as follows. 
  \begin{compactdesc}\relscale{0.9}
    \item[Title of course:] Ultraproducts and their consequences

\item[Instructor:] Assoc.\ Prof.\ David Pierce

\item[Institution:] Mimar Sinan GS\"U

\item[Dates:] 13--19 A\u gustos 2012

\item[Prerequisites:] Some knowledge of algebra, including the theorem
  that a quotient of a ring by an ideal is a field if and only if the
  ideal is maximal. 

\item[Level:] Advanced undergraduate and graduate

\item[Abstract:] An ultraproduct is a kind of average of infinitely many
structures. The construction is usually traced to a 1955 paper of
Jerzy Los; however, the idea of an ultraproduct can be found in Kurt
Goedel's 1930 proof (from his doctoral dissertation) of the
Completeness Theorem for first-order logic.  Non-standard analysis,
developed in the 1960s by Abraham Robinson, can be seen as taking
place in an ultraproduct of the ordered field of real numbers: more
precisely, in an ultrapower.  Indeed, for each integer, the `average'
real number is greater than that integer; therefore an ultrapower of
the ordered field of real numbers is an ordered field with infinite
elements and therefore infinitesimal elements.  Perhaps the first
textbook of model theory is Bell and Slomson's \emph{Models and
Ultraproducts} of 1969: the title suggests the usefulness of
ultraproducts in the development various model-theoretic ideas.  Our
course will investigate ultraproducts, starting from one of the
simplest interesting examples: the quotient of the cartesian product
of an infinite collection of fields by a maximal ideal that has
nontrivial projection onto each coordinate.  No particular knowledge
of logic is assumed. 
  \end{compactdesc}

Such was the abstract that I submitted in January.  I have written the
following notes since then, by way of working out for myself some of
the ideas that might be presented in the course.  I have tried to
emphasize examples.  In some cases, I may have sacrificed generality
for concreteness.  A theorem that I might have covered, but have not,
is the theorem of Keisler and Shelah that elementary equivalence is
the same thing as isomorphism of ultrapowers. 


\tableofcontents

\listoffigures

%\setcounter{chapter}{-1}


\chapter{Introduction}\label{ch:intro}

In this text,
the \textbf{natural numbers} begin with $0$ and compose the set
$\upomega$.  Thus,%%%%% 
\footnote{The letter $\upomega$\label{fn:omega} 
is not the minuscule English letter called \emph{double u,} 
but the minuscule Greek \emph{omega,} 
which is probably in origin a double o.  
Obtained with the control sequence \url{\upomega} 
from the \url{upgreek} package
for \LaTeX, 
the $\upomega$ used here is upright, 
unlike the standard slanted $\omega$ (obtained with \url{\omega}).  
The latter $\omega$ might be used as a variable.
We shall similarly distinguish between the constant $\uppi$ 
(used for the ratio of the circumference to the diameter of a circle,
as well as for the \emph{coordinate projections} 
defined on page~\pageref{coord-proj}) 
and the variable $\pi$.
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{equation*}
\upomega=\{0,1,2,\dots\}.
\end{equation*}
We shall use this set in two ways:
\begin{compactenum}[1)]
\item
 as an index-set for countably infinite sequences $(a_k\colon k\in\upomega)$;
 \item
 as the cardinal number of each countably infinite set.
 \end{compactenum}  
We shall also make use of the following feature of the elements of $\upomega$:
each of them is a set whose cardinal number is itself.
That is, each $n$ in $\upomega$ is an $n$-element set.
More precisely,
\begin{equation*}
n=\{0,\dots,n-1\},
\end{equation*}
so that
\begin{align*}
0&=\emptyset,&1&=\{0\},&2&=\{0,1\},&3&=\{0,1,2\},
\end{align*}
and so on.
If $k$ and $n$ are in $\upomega$, then
\begin{equation*}
  k\in n\iff k\pincluded n;
\end{equation*}
in this case we may write simply
\begin{equation*}
  k<n.
\end{equation*}

If $A$ and $B$ are sets, then a \textbf{function} from $B$ to $A$
is just a subset $f$ of $B\times A$ such that,
for every $x$ in $B$, 
there is exactly one $y$ in $A$ such that $(x,y)\in f$.
In this case we write
\begin{equation*}
  y=f(x).
\end{equation*}
Then the function $f$ is the set
\begin{equation*}
  \{(x,f(x))\colon x\in B\}.
\end{equation*}
We may abbreviate this as
\begin{equation*}
  x\mapsto f(x);
\end{equation*}
this notation is useful 
when we do not actually have a single letter for $f$ itself,
but have an expression for $f(x)$.
When we do have a letter like $f$,
then, in place of $f(x)$, we may use one of the notations
\begin{align*}
  &f_x,&&f^x
\end{align*}
(see below).
The set of all functions from $B$ to $A$ will be denoted by
\begin{equation*}
A^B.%=\{\text{functions from $B$ to $A$}\}.
\end{equation*}
If $f\in A^B$, then $B$ is the \textbf{domain} of $f$,
while the \textbf{range} of $f$ is the subset
\begin{equation*}
  \{f(x)\colon x\in B\}
\end{equation*}
of $A$.  
One may say that $A$ is a \textbf{codomain} of $f$, 
but in this case, if $A\included C$, then $C$ is also a codomain of $f$.
In the expression for the range of $f$,
if we replace the braces with round brackets (parentheses), we obtain%
\label{indexed-first}
\begin{equation*}
  (f(x)\colon x\in B),
\end{equation*}
which we shall understand as yet another notation for the function $f$ itself
(strictly, we may understand it as an \emph{indexed set}:
see page \pageref{indexed-set}).

As a special case of the foregoing notation, if $n\in\upomega$, we have
\begin{equation*}
A^n=\{\text{functions from $n$ to $A$}\}.
\end{equation*}
Instead of $(b_k\colon k<n)$ or $(b^k\colon k<n)$,
an element of $A^n$ may be written as one of
\begin{align*}
&(b_0,\dots,b_{n-1}),&&(b^0,\dots,b^{n-1}).
\end{align*}
In a slight departure from the foregoing notation,
we may abbreviate this element of $A^n$ by
\begin{equation*}
\vec b,
\end{equation*}
in boldface: it is an \textbf{$n$-tuple} of elements of $A$.  
We shall occasionally use both upper and lower indices at the same time, 
as for example in consideration of sequences $(\vec b_k\colon k\in\upomega)$, 
where $\vec b_k\in A^n$, so that
\begin{equation*}
\vec b_k=(b_k^0,\dots,b_k^{n-1}).
\end{equation*}
Note that
\begin{equation*}
A^0=\{0\}=1.
\end{equation*}

According to what seems to be all but universal usage today,
the ring of (rational) integers is 
\begin{equation*}
  \Z;
\end{equation*}
this is a sub-ring of the field
\begin{equation*}
  \Q
\end{equation*}
of rational numbers, which is in turn is a subfield of the field
\begin{equation*}
  \R
\end{equation*}
of real numbers.  

We shall use $\N$ to denote the set of \emph{positive} integers,
so that
\begin{equation*}
\N=\{1,2,3,\dots\}.
\end{equation*}
Literally then $\upomega$ is the set $\{0\}\cup\N$
of \emph{non-negative} integers.
However, when we consider an element $n$ of $\upomega$ as an integer
and hence as a rational number,
we are not interested in the internal structure of $n$ as a set.
This is a reason why it may be useful to introduce the notation $\N$.
It is useful \emph{not} to put $0$ in $\N$,
because then we can describe the set $\Qp$ of positive rational numbers
as $\{x/y\colon(x,y)\in\N\times\N\}$ (see page \pageref{Qp}).







\chapter{Mathematical foundations}\label{ch:N}%\label{part:N}

\section{Sets as collections}

Most objects of mathematical study can be understood as \emph{sets.}
A set is a special kind of \emph{collection.}
A \textbf{collection} is many things, considered as one thing.  
Those many things are the 
\textbf{members}%
\index{member}
or 
\textbf{elements}%
\index{element}
of the collection.  
The members \textbf{compose} the collection, 
and the collection \textbf{comprises} them.%%%%%
\footnote{Thus the relations 
named by the verbs \enquote{compose} and \enquote{comprise} 
are converses of one another; 
but native English speakers often confuse these two verbs.}  
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Each member \textbf{belongs} to the collection 
and is \textbf{in} the collection, 
and the collection \textbf{contains} the member.  

We shall designate certain collections as \textbf{sets.}
We shall not define the collection of all sets;
rather, we shall identify some rules for obtaining sets
that will allow us to do the mathematics that we want.
These rules will be expressed by \emph{axioms.} 
We shall use versions of the so-called Zermelo--Fraenkel Axioms
with the Axiom of Choice.  
The collection of these axioms is denoted by $\zfc$.
Most of these axioms were described by Zermelo in 1908 \cite{Zermelo-invest}.

We study study sets axiomatically, 
because a na\"\i ve approach can lead to contradictions.
For example, one might think na\"\i vely 
that there was a collection of all collections.
But there can be no such collection,
because if there were, then there would be 
a collection of all collections that did not contain themselves,
and \emph{this} collection would contain itself if and only if it did not.
This result is the \textbf{Russell Paradox,}\label{Russell}
described in a letter \cite{Russell-letter} from Russell to Frege in 1902.

The elements of every set will be sets themselves.
This is a conceptual and notational convenience
that will turn out to be adequate for our purposes,
even though, in ordinary life,
the members of a collection are not usually collections themselves.

By the definition to be given officially on page \pageref{equal-off}, 
two sets will be
\emph{equal}\label{equal}%
\index{equal}
if they have the same elements.%%%%%
\footnote{This definition of equality
is usually an axiom, rather than a definition.
That is because equality is confused with \emph{identity,}
and the identity of two objects 
is considered to be an inherent property of the objects themselves,
rather than a property that we assign to them.
By this way of thinking, 
we say $1/2=2/4$ because the expressions $1/2$ and $2/4$ 
are names of the same equivalence-class $\{(x,y)\in\N\times\N\colon 2x=y\}$.
But we can just as well say that,
if $a$, $b$, $c$, and $d$ are positive integers, 
then, \emph{by definition,} the expression $a/b=c/d$ 
means that the products $ad$ and $bc$ are the same.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
There will be an \emph{empty set,}\label{empty} denoted by
\begin{equation*}
\emptyset;
\end{equation*}
this will have no elements.
If $a$ is a set, then there will be a set denoted by
\begin{equation*}
\{a\},
\end{equation*}
with the unique element $a$.  If $b$ is also a set, then there will be a set denoted by
\begin{equation*}
a\cup b,
\end{equation*}
whose members are precisely the members of $a$ and the members of $b$.  
Thus there will be sets $a\cup\{b\}$ and $\{a\}\cup\{b\}$; 
the latter is usually written as
\begin{equation*}
\{a,b\}.
\end{equation*}
If $c$ is another set, we can form the set $\{a,b\}\cup\{c\}$, 
which we write as
\begin{equation*}
\{a,b,c\},
\end{equation*}
and so forth.  
This will allow us to build up the following infinite sequence:
\begin{align*}
&\emptyset,&
&\{\emptyset\},&
&\bigl\{\emptyset,\{\emptyset\}\bigr\},&
&\Bigl\{\emptyset,\{\emptyset\},\bigl\{\emptyset,\{\emptyset\}\bigr\}\Bigr\},&
&\dots
\end{align*}
By definition,\label{nat} these sets will be the natural numbers $0$, $1$, $2$, $3$, \dots
To be more precise, they are the \textbf{von Neumann natural numbers} \cite{von-Neumann}.

\section{Set theory}\label{sect:sets}

\subsection{Notation}\label{subsect:notation}

Our formal axioms for set theory will be written in a certain \emph{logic,}
whose symbols are:
\begin{compactenum}[1)]
\item
\textbf{variables,} as $x$, $y$, and $z$;
\item
\textbf{constants,}
as $a$, $b$, and $c$, or $A$, $B$, and $C$;
\item
the symbol $\in$ denoting the membership relation;
\item
the \textbf{Boolean connectives} of propositional logic:
\begin{compactenum}
\item
the singulary connective $\lnot$ (\enquote{not}), and
 \item
the binary connectives
\begin{compactenum}[i)]
\item 
$\lor$ (\enquote{or}), 
\item
$\land$ (\enquote{and}), 
\item
$\lto$ (\enquote{implies}), and 
\item
$\liff$ (\enquote{if and only if});
\end{compactenum}
\end{compactenum}
\item
parentheses;
\item
the \textbf{quantification symbols}
\begin{compactenum}
\item 
$\exists$ (\enquote{there exists}) and 
\item
$\forall$ (\enquote{for all}).
\end{compactenum}
\end{compactenum}
We could do without constants as distinct from variables;
but they seem to be useful.
The distinction between constants and variables
can be traced back 
at least as far as Descartes's \emph{Geometry} \cite{Descartes-Geometry} 
of 1637,
where letters like $a$, $b$, and $c$ are used for known lengths,
and $z$, $y$, and $x$, for unknown lengths.

A variable or a constant is called a \textbf{term.}  
If $t$ and $u$ are terms, then the expression
\begin{equation*}
t\in u
\end{equation*}
is called an \textbf{atomic formula.}  
It means $t$ is a member of $u$.
From atomic formulas,\label{formula}
 other formulas are built up \emph{recursively} 
by use of the symbols above, according to certain rules, as follows:
\begin{compactenum}
\item
If $\phi$ is a formula, then so is its \textbf{negation} $\lnot\phi$.
\item
If $\phi$ and $\psi$ are formulas, then so are
\begin{compactenum}
\item 
the \textbf{disjunction} $(\phi\lor\psi)$,
\item
the \textbf{conjunction} $(\phi\land\psi)$,
\item
the \textbf{implication} $(\phi\lto\psi)$, and
\item
the \textbf{equivalence} $(\phi\liff\psi)$.
\end{compactenum}
\item
If $\phi$ is a formula and $x$ is variable, 
then
\begin{compactenum}
\item 
the \textbf{instantiation} $\Exists x\phi$ and
\item
the \textbf{generalization} $\Forall x\phi$
\end{compactenum}
are both formulas.
\end{compactenum}
The expressions $\exists x$ and $\forall x$ 
are called \textbf{quantifiers.}\label{quantifier}
The negation of the formula $t\in u$ is usually written as
\begin{equation*}
t\notin u
\end{equation*}
rather than $\lnot\;t\in u$;
it says $t$ is \emph{not} a member of $u$.  
The expression
\begin{equation*}
  \Forall z(z\in x\lto z\in y)
\end{equation*}
is the formula saying that every element of $x$ is an element of $y$.  
Another way to say this is that $x$ is a 
\textbf{subset}%
\index{subset}
of $y$,
or $x$ is \textbf{included} in $y$,
or $y$ 
\textbf{includes}%
\index{include}
$x$.  We abbreviate the formula by%%%%%
\footnote{The relation $\included$ of \emph{being included in}
is completely different from the relation $\in$ of \emph{being contained in.}
However, many mathematicians confuse these relations in words, 
using \enquote{contained} to describe both.}
\begin{equation*}
x\included y.
\end{equation*}
Then the expression
\begin{equation*}
  (x\included y\land y\included x)
\end{equation*}
stands for the formula
saying that $x$ and $y$ have the same members, 
so that they are \textbf{equal}\label{equal-off} 
by the definition foretold above 
(page~\pageref{empty}); in this case we use the abbreviation
\begin{equation*}
x=y.
\end{equation*}
The negation of this is usually written as
\begin{equation*}
  x\neq y.
\end{equation*}
Another abbreviation\label{precedence} that we use
is to eliminate the outer parentheses from a formula (when they are present)
and to eliminate internal parentheses
when they can be resupplied according to the following rules:\label{abbrev}
\begin{compactenum}
\item 
The binary connectives $\land$ and $\lor$
have priority over $\lto$ and $\liff$,
so that, for example, 
$\phi\land\psi\lto\chi$ means $(\phi\land\psi)\lto\chi$.
\item
When two connectives $\lto$ appear without an intervening parenthesis,
the arrow on the right has priority,
so $\phi\lto\psi\lto\chi$ means $\phi\lto(\psi\lto\chi)$.
\end{compactenum}

\subsection{Truth and falsity}

The same variable may have several \textbf{occurrences} 
in a particular formula.
All occurrences of the variable $x$ 
in the formulas $\Exists x\phi$ and $\Forall x\phi$ 
are said to be \textbf{bound,}%%%%%
\footnote{The word \enquote{bound} here 
is the past participle of the verb \enquote{to bind,}
meaning \emph{tie up or restrain.}
There is another verb, \enquote{to bound,}
meaning \emph{put a bound or limit on}:
this is also used in mathematics, 
but its past participle is \enquote{bounded.}
Although they have similar meanings,
the two verbs \enquote{to bind} and \enquote{to bound} 
have different origins.
The verb \enquote{to bind} has been part of English
for as long as that language is recognized to have existed: 
since the eighth century.
That is, the precursor of \enquote{to bind} is found in Old English.
The verb \enquote{to bound} is based on the noun \enquote{bound,}
which entered Middle English in the 12th century from the Old French noun 
that became the modern French \emph{borne.}}  
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
and they remain bound when other formulas are built up from these formulas.  
Occurrences of a variable that are not bound are \textbf{free.}  
The same variable can have both bound and free occurrences in the same formula,
although this can always be avoided.
For example, in the formula $x\in y\lto\Forall yx\in y$,
the first occurrence of $y$ is free, 
but the other two occurrences are bound;
nonetheless, 
the formula will have the same meaning as $x\in y\lto\Forall zx\in z$,
in which the only occurrence of $y$ is free.

If a variable has free occurrences in a formula,
then the variable is said to be a \textbf{free variable} of the formula,
even though the variable might also have bound occurrences in the formula.
A \textbf{sentence} is a formula like $\Forall x\Exists yx\in y$ 
or $\Forall xx\notin a$,
with no free variables.
A \textbf{singulary}%%%%%
\footnote{In place of \enquote{singulary,} 
the word \textbf{unary}\index{unary} is more common, 
but less etymologically correct.}
  %%%%%
formula is a formula with only one free variable.
If $\phi$ is a singulary formula, and its free variable is $x$, 
then we may write $\phi$ as
\begin{equation*}
\phi(x).
\end{equation*}
By replacing every free occurrence of $x$ in $\phi$ with a constant $a$, 
we obtain the formula
\begin{equation*}
\phi(a),
\end{equation*}
which is a sentence.

An arbitrary sentence has a \textbf{truth-value,}\label{truth-sets}
which is either \textbf{true} or \textbf{false,} but not both.
However, the truth-value of a sentence in which constants occur
may depend on which sets are named by those constants.
Like the definition of formulas in the first place,
the definition of the truth-value
of sentences is \emph{recursive,} as follows.
\begin{compactenum}
\item 
The atomic sentence $a\in b$ is true if and only if 
the set $a$ is an element of the set $b$.
\item
If $\sigma$ and $\tau$ are sentences, and $*$ is a binary Boolean connective,
then the truth-value of the sentence $(\sigma*\tau)$
depends on the truth-value of $\sigma$ and $\tau$
according to the usual rules of propositional logic:
\begin{compactenum}
\item 
$(\sigma\lor\tau)$ is true in $\str A$ if and only if 
at least one of $\sigma$ and $\tau$ is true in~$\str A$.
\item
$(\sigma\land\tau)$ is true in $\str A$ if and only if
both $\sigma$ and $\tau$ are true in~$\str A$.
\item
$(\sigma\lto\tau)$ is true in $\str A$ if and only if
$(\lnot\sigma\lor\tau)$ is true in $\str A$.
\item
$(\sigma\liff\tau)$ is true in $\str A$ if and only if
both $(\sigma\lto\tau)$ and $(\tau\lto\sigma)$ are true in $\str A$.
\end{compactenum}
\item
Suppose $\phi(x)$ is a singulary formula.
\begin{compactenum}
\item
The instantiation $\Exists x\phi(x)$ is true
if and only if $\phi(a)$ is true for \emph{some} set $a$.
\item
The generalization $\Forall x\phi(x)$ is true
if and only if $\phi(a)$ is true for \emph{all} sets $a$.
\end{compactenum}
\end{compactenum}
The validity of this definition relies on:

\begin{theorem}[Unique Readability]\label{thm:ur}
A given formula can be built up from atomic formulas in only one way.  
\end{theorem}

This means two things:
\begin{compactenum}
  \item
Each formula is of exactly one of the eight kinds 
named in the previous subsection:
\begin{inparaenum}[(i)]
  \item
an atomic formula, 
\item
a negation, 
\item
a disjunction, 
\item
a conjunction, 
\item
an implication, 
\item
an equivalence, 
\item
an instantiation, or 
\item
a generalization.
\end{inparaenum}
\item
Each formula is of one of these kinds in only one way.
\end{compactenum}
These two conclusions are obvious for atomic formulas, negations, 
generalizations, and instantiations.
For disjunctions, conjunctions, implications, and equivalences,
the theorem is a consequence of the following.

\begin{lemma}\label{lem:init-seg}
  No proper initial segment of a formula is a formula.
\end{lemma}

\begin{proof}
  We prove by induction that every formula
neither \emph{is} a proper initial segment of another formula,
nor \emph{has} a proper initial segment that is a formula.
This is obviously true for atomic formulas.
Suppose this is true for the formulas $\phi$ and $\psi$.
Then it is obviously true for the the three formulas 
that can be obtained in one step from $\phi$,
as well as for the four formulas 
that can be obtained in one step from $\phi$ and $\psi$.
Therefore the claim is true for all formulas.
\end{proof}

Another difficulty with the definition of truth and falsity is as follows.
The definition assigns truth-values, 
not to arbitrary formulas, but to sentences only.
However, sentences as such are not defined recursively.
Strictly, the recursive definition of truth-value determines, 
for each formula $\phi$,
an assignment of a truth-value to each sentence
that results from $\phi$ 
by replacing each free occurrence of a variable with a constant.

Note that $\Forall x\phi(x)$ and \enquote{For all $a$, $\phi(a)$} 
are two ways of saying the same thing.
The former expression is a sentence of our logic;
the latter expression is a sentence of English
that incorporates the constant $a$ and the sentence $\phi(a)$ of our logic.
In particular, in English, the constant $a$ plays the role of a variable.
In place of \enquote{For all $a$, $\phi(a)$,}
we may say simply $\phi(a)$, 
if it is clear that $a$ is an \emph{arbitrary} set.

\subsection{Logical truth}

The truth-value of a sentence 
is determined by the truth-values of all atomic sentences.
However, some sentences are true,
regardless of the truth-values of atomic sentences.
Such sentences are \textbf{logically true.}
For example, the sentences
\begin{align*}
(\sigma\lto\tau)&\liff\lnot\sigma\lor\tau,&
\Forall x\phi(x)&\liff\lnot\Exists x\lnot\phi(x)
\end{align*}
are logically true.
A \emph{formula} is logically true 
if every generation of it that is a sentence is logically true.
Then two formulas $\phi$ and $\psi$ 
are \textbf{logically equivalent} to one another
if the equivalence $\phi\liff\psi$ is logically true.
For example, 
$\phi\lto\psi$ and $\lnot\phi\lor\psi$ are logically equivalent to one another.
So are the formulas
\begin{align*}
  \psi&\lto\Forall x\phi(x),&
\Forall x\bigl(\psi&\lto\phi(x)\bigr);
\end{align*}
and so are the formulas
\begin{align*}
  \Forall x\phi(x)&\lto\psi,&
\Exists x\bigl(\phi(x)&\lto\psi\bigr).
\end{align*}
We shall use these logical equivalences
in examining equality below.

\subsection{Classes and equality}


If $\phi$ is a singulary formula $\phi(x)$,
and the sentence $\phi(a)$ is true, 
then $a$ can be said to \textbf{satisfy} $\phi$.  
There is a collection of all sets that satisfy $\phi$,
and we denote this collection by
\begin{equation*}
\{x\colon\phi(x)\}.
\end{equation*}
Such a collection is called a 
\textbf{class.}%
\index{class}\label{class}
In particular, it is the class \textbf{defined} by the formula $\phi$.
We may give this class a name like $\bm C$, written in boldface: 
in this case the expression
\begin{equation*}
x\in\bm C
\end{equation*}
means just $\phi(x)$.

A formula in which only two variables occur freely is \textbf{binary.}
If $\psi$ is such a formula, with free variables $x$ and $y$,
then we may write $\psi$ as
\begin{equation*}
\psi(x,y).
\end{equation*}
We shall want this notation for proving Theorem~\ref{thm:=} below.
If needed, we can talk about ternary formulas $\chi(x,y,z)$, and so on.

By definition of equality, the sentences
\begin{gather}
\Forall x\Forall y\Forall z\bigl(x=y\lto(z\in x\liff z\in y)\bigr),\notag\\
\Forall x\Forall y\Exists z\bigl((z\in x\liff z\in y)\lto x=y\bigr).
\label{eqn:>=}
\end{gather}
are logically true.
We can write the former as
\begin{equation}
  \Forall x\Forall y\bigl(x=y\lto(a\in x\liff a\in y)\bigr).\label{eqn:=}
\end{equation}


\begin{axiom}[Equality]
Equal sets belong to the same sets:
\begin{equation}\label{eqn:=2}
\Forall x\Forall y\bigl(x=y\lto(x\in a\liff y\in a)\bigr).
\end{equation}
\end{axiom}

\begin{theorem}\label{thm:=}
Equal sets satisfy the same formulas:
\begin{equation}\label{eqn:=thm}
\Forall x\Forall y\Bigl(x=y\lto\bigl(\phi(x)\liff\phi(y)\bigr)\Bigr).
\end{equation}
\end{theorem}

\begin{proof}
Suppose $a=b$.
By symmetry, it is enough to show
\begin{equation}\label{eqn:ab}
\phi(a)\lto\phi(b)
\end{equation}
for all singulary formulas $\phi(x)$.
We use \textbf{induction;}
this is possible because formulas are defined recursively.  
See \S\ref{sect:N} below (page~\pageref{sect:N}).


By \eqref{eqn:=} and \eqref{eqn:=2},
\eqref{eqn:ab} holds when $\phi(x)$
is an atomic formula
$x\in c$ or $c\in x$.
There is another form of singulary atomic formula, namely $x\in x$.
If $a\in a$,
then $a$ satisfies $x\in a$, and therefore so does $b$;
thus $b\in a$, so $a$ satisfies $b\in x$, and therefore so does $b$.
Thus $a\in a\lto b\in b$.
So we have \eqref{eqn:ab} when $\phi$ is any singulary atomic formula.

If we have \eqref{eqn:ab} when $\phi$ is $\psi$, 
then we have it when $\phi$ is $\lnot\psi$.
If we have \eqref{eqn:ab} when $\phi$ is $\psi$ or $\chi$, 
then we have it when $\phi$ is $(\psi*\chi)$,
where $*$ is one of the binary connectives.
If, for some binary formula $\psi(x,y)$,
we have \eqref{eqn:ab} whenever $\phi(x)$ is $\psi(x,c)$
for some set $c$,
then we have \eqref{eqn:ab} 
when $\phi(x)$ is $\Forall y\psi(x,y)$ or $\Exists y\psi(x,y)$.
Therefore we do have \eqref{eqn:ab} in all cases.
\end{proof}

For many writers,
equality is a logical concept,
and the sentence \eqref{eqn:=thm} is taken as logically true. 
Then \eqref{eqn:=} and \eqref{eqn:=2} are special cases of this, 
but \eqref{eqn:>=} is not logically true. 
In this case,
\eqref{eqn:>=} must also be taken as an axiom, 
which is called the \textbf{Extension Axiom.}  
No matter which approach one takes,
all of the sentences \eqref{eqn:>=}, \eqref{eqn:=}, \eqref{eqn:=2}, 
and \eqref{eqn:=thm} end up being true.  
They tell us that equal sets are precisely those sets 
that are logically indistinguishable.
%We shall henceforth treat logical indistinguishability as \emph{identity}: 
%so equal sets will be the \emph{same} set. 

As with sets, so with classes, 
one of them \textbf{includes} another
if every element of the latter belongs to the former.
Hence if formulas $\phi(x)$ and $\psi(y)$ 
define classes $\bm C$ and $\bm D$ respectively,
and if
\begin{equation*}
  \Forall x\bigl(\phi(x)\lto\psi(x)\bigr),
\end{equation*}
this means $\bm D$ includes $\bm C$, and we write
\begin{equation*}
  \bm C\included\bm D.
\end{equation*}
If also $\bm C$ includes $\bm D$,
then the two classes are \textbf{equal,}
and we write
\begin{equation*}
  \bm C=\bm D;
\end{equation*}
this means $\Forall x\bigl(\phi(x)\liff\psi(x)\bigr)$.
Likewise set and a class can be considered as \textbf{equal} 
if they have the same members.
Thus if again $\bm C$ is defined by $\phi(x)$,
then the expression
\begin{equation*}
a=\bm C
\end{equation*}
means $\Forall x\bigl(x\in a\liff\phi(x)\bigr)$. 

\begin{theorem}
Every set is equal to a class.
\end{theorem}

\begin{proof}
$a=\{x\colon x\in a\}$.
\end{proof}

However, there is no reason to expect the converse to be true.

\begin{theorem}\label{thm:RP}
Not every class is equal to a set.
\end{theorem} 

\begin{proof}
There are formulas $\phi(x)$ such that
\begin{equation}\label{eqn:Russell}
\Forall y\lnot\Forall x\bigl(x\in y\liff\phi(x)\bigr);
\end{equation}
for example, $\phi(x)$ could be $x\notin x$, so that
$\Forall y\lnot\bigl(y\in y\liff\phi(y)\bigr)$.
In any case, if \eqref{eqn:Russell} holds,
then no set can be equal to the class $\{x\colon\phi(x)\}$.
\end{proof}

More informally, the argument is that the class $\{x\colon x\notin x\}$ is not a set,
because if it were a set $a$, then $a\in a\liff a\notin a$,
which is a contradiction.  
This is what was given above as the Russell Paradox (page~\pageref{Russell}). 
Another example of a class that is not a set 
is given by the \emph{Burali-Forti Paradox} on page~\pageref{BF} below.

\subsection{Construction of sets}

We have established what it means for sets to be equal.
We have established that sets are examples, 
but not the only examples,
of the collections called classes.
However, we have not officially exhibited any sets.
We do this now.

\begin{axiom}[Empty Set]
The empty class is a set:
  \begin{equation*}
\Exists x\Forall yy\notin x.
\end{equation*}
\end{axiom}

As noted above (page~\pageref{empty}), 
the set whose existence is asserted by this axiom is denoted by $\emptyset$.
This set is the class $\{x\colon x\neq x\}$.

We now obtain the sequence $0$, $1$, $2$, \dots, described above (page~\pageref{nat}). 
We use the Empty Set Axiom to start the sequence.
We continue by means of:

\begin{axiom}[Adjunction]
  If $a$ and $b$ are sets, then there is a set denoted by $a\cup\{b\}$:
\begin{equation*}
\Forall x\Forall y\Exists z\Forall w(w\in z\liff w\in x\lor w=y).
\end{equation*}
\end{axiom}

In writing the axiom formally,
we have followed the abbreviative conventions on page \pageref{abbrev}.
We can understand the Adjunction Axiom as saying that, for all sets $a$ and $b$,
the class $\{x\colon x\in a\lor x=b\}$ is actually a set.
Adjunction is not one of Zermelo's original axioms of 1908;
but the following is Zermelo's \textbf{Pairing Axiom:}

\begin{theorem}
For any two sets $a$ and $b$, the set $\{a,b\}$ exists:
\begin{equation*}
\Forall x\Forall y\Exists z\Forall w(w\in z\liff w=x\lor w=y).
\end{equation*}
\end{theorem}

\begin{proof}
By Empty Set and Adjunction, $\emptyset\cup\{a\}$ exists, but this is just $\{a\}$.
Then $\{a\}\cup\{b\}$ exists by Adjunction again.
\end{proof}

The theorem is that the class $\{x\colon x=a\lor x=b\}$ is always a set.
Actually Zermelo does not have a Pairing Axiom as such,
but he has an \textbf{Elementary Sets Axiom,} 
which consists of what we have called the Empty Set Axiom and the Pairing Axiom.%%%%%
\footnote{Zermelo also requires that for every set $a$ there be a set $\{a\}$; 
but this can be understood as a special case of pairing.}




Every class $\bm C$ has a \textbf{union,} 
which is the class
\begin{equation*}
 \{x\colon\Exists y(x\in y\land y\in\bm C)\}. 
\end{equation*}
This class is denoted by
\begin{equation*}
\bigcup\bm C.
\end{equation*}
This notation is related as follows
with the notation for the classes involved in the Adjunction Axiom:

\begin{theorem}
For all sets $a$ and $b$, $a\cup\{b\}=\bigcup\bigl\{a,\{b\}\bigr\}$.
\end{theorem}

We can now use the more general notation
\begin{equation*}
a\cup b=\bigcup\{a,b\}.
\end{equation*}

\begin{axiom}[Union]
  The union of a \emph{set} is always a set:
\begin{equation*}
\Forall x\Exists yy=\bigcup x.
\end{equation*}
\end{axiom}

The Adjunction Axiom is 
a consequence of the Empty-Set, Pairing, and Union Axioms.  
This why Zermelo did not need Adjunction as an axiom.
We state it as an axiom,
because we can do a lot of mathematics with it
that does not require the full force of the Union Axiom.

Suppose $A$ is a set and $\bm C$ is the class $\{x\colon\phi(x)\}$.  
Then we can form the class
\begin{equation*}
A\cap\bm C,
\end{equation*}
which is defined by the formula $x\in A\land\phi(x)$.  
Standard notation for this class is%%%%%
\footnote{This notation is unfortunate.
Normally the formula $x\in A$ is read as a sentence of ordinary language, 
namely \enquote{$x$ belongs to $A$} or \enquote{$x$ is in $A$.}
However, the expression in \eqref{eqn:xinA} is read 
as \enquote{the set of $x$ in $A$ such that $\phi$ holds of $x$};
in particular, $x\in A$ here is read as the noun phrase \enquote{$x$ in $A$}
(or \enquote{$x$ belonging to $A$,} or \enquote{$x$ that are in $A$,}
or something like that).
Thus a more precise way to write the expression in \eqref{eqn:xinA}
would be something like
$\{x\win A\colon\phi(x)\}$.
Ambiguity of expressions like $x\in A$ (is it a noun or a sentence?)
is common in mathematical writing, 
as for example in the abbreviation of 
$\Forall{\varepsilon}(\epsilon>0\lto\phi)$ as $(\forall{\varepsilon>0})\;\phi$.
Nonetheless, such ambiguity is avoided in this text.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{equation}\label{eqn:xinA}
\{x\in A\colon\phi(x)\}.
\end{equation}

\begin{axiom}[Separation]
  Every class $\{x\in A\colon\phi(x)\}$ is a set.  
\end{axiom}

The Separation Axiom is really a \emph{scheme} of axioms, 
one for each singulary formula $\phi$:
\begin{equation*}
\Forall x\Exists y\Forall z\bigl(z\in y\liff z\in x\land\phi(z)\bigr).
\end{equation*}

In most of mathematics, and in particular in the other sections of this text, 
one need not worry too much about the distinction between sets and classes.  
But it is logically important.  
It turns out that the objects of interest in mathematics 
can be understood as sets.  
Indeed, we have already defined natural numbers as sets.  
We can talk about sets by means of formulas.  
Formulas define classes of sets, as we have said.  
Some of these classes turn out to be sets themselves; 
but again, there is no reason to expect all of them to be sets,  
and indeed by Theorem~\ref{thm:RP} (page~\pageref{thm:RP}) 
some of them are not sets.  
\emph{Sub-classes} of sets are sets, by the Separation Axiom; 
but some classes are too big to be sets.  
The class $\{x\colon x=x\}$ of all sets is not a set, 
since if it were, then the sub-class $\{x\colon x\notin x\}$ would be a set, and it is not.

Every set $a$ has a \emph{power class,} 
namely the class $\{x\colon x\included a\}$ of all subsets of $a$.  
This class is denoted by
\begin{equation*}
\pow a.
\end{equation*}

\begin{axiom}[Power Set]
Every power class is a set:
\begin{equation*}
\Forall x\Exists yy=\pow x.
\end{equation*}
\end{axiom}

Then $\pow a$ can be called the \textbf{power set} of $a$.
The Power Set Axiom is of fundamental importance
for allowing us to prove Theorem~\ref{thm:prod-set} 
on page~\pageref{thm:prod-set} below.

We want the collection $\{0,1,2,\dots\}$ of natural numbers 
as defined on page~\pageref{nat} to be a set.  
Now, it is not obvious how to formulate this as a sentence of our logic.  
However, the indicated collection contains $0$, 
which by definition is the empty set;
also, for each of its elements $n$,
the collection contains also $n\cup\{n\}$.
Let $\It$ be the class of all \emph{sets} with these properties: thus
\begin{equation*}
\It=\bigl\{x\colon0\in x\land\Forall y(y\in x\lto y\cup\{y\}\in x)\bigr\}.
\end{equation*}
If it exists, the set of natural numbers will belong to $\It$.
Furthermore, the set of natural numbers 
will be the \emph{smallest} element of $\It$.
But we still must make this precise.
For an arbitrary class $\bm C$, we define
\begin{equation*}
\bigcap\bm C=\{x\colon\Forall y(y\in\bm C\lto x\in y)\}.
\end{equation*}
This class is the \textbf{intersection} of $\bm C$. 

\begin{theorem}\label{thm:int}
If $a$ and $b$ are two sets, then
\begin{equation*}
a\cap b=\bigcap\{a,b\}.
\end{equation*}
If $a\in\bm C$, then
\begin{equation*}
\bigcap\bm C\included a,
\end{equation*}
so in particular $\bigcap\bm C$ is a set.
However, $\bigcap\emptyset$ is the class of all sets, which is not a set.%%%%%
\footnote{Some writers define $\bigcap\bm C$ 
only when $\bm C$ is a nonempty set.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{theorem} 

\begin{axiom}[Infinity]
  $\It\neq\emptyset$:
  \begin{equation*}
\Exists x\bigl(0\in x\land\Forall y(y\in x\lto y\cup\{y\}\in x)\bigr).
  \end{equation*}
\end{axiom}

We can now define%%%%%
\footnote{See note \ref{fn:omega} on page \pageref{fn:omega} 
about the letter $\upomega$.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{equation}\label{eqn:upomega-defn}
\upomega=\bigcap\It,
\end{equation}
knowing that this is a set.%%%%%
\footnote{Every other axiom of this section is of the form,
\enquote{Such-and-such classes are sets.}
We can express the Axiom of Infinity in this form,
as \enquote{$\bigcap\It$ is a set.}
However, it would be preferable to define $\upomega$ as a class,
without using the Axiom of Infinity;
then this Axiom could be simply, \enquote{$\upomega$ is a set.}
We can do this.
In the terminology of \S\ref{sect:count} (page \pageref{sect:count}),
we can define $\upomega$
as the class of all ordinals that neither \emph{contain} limits
nor \emph{are} limits themselves.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{theorem}\label{thm:AI}
$\upomega\in\It$.
\end{theorem}

We shall establish the additional properties of $\upomega$ 
in \S\ref{sect:omega} (p.~\pageref{sect:omega}).

\subsection[ZFC]{The Zermelo--Fraenkel Axioms with Choice}

We state the following for the record;
but we are not going to use it freely,
as we shall use the preceding axioms.

\begin{axiom}[Choice]
  For every set $A$ of nonempty sets,
any two of which are disjoint from one another,
there is a set $b$ such that, for each set $c$ in $A$,
the intersection $b\cap c$ has a unique element.
\end{axiom}

We have now named all of the axioms given by Zermelo in 1908:
\begin{compactenum}[(I)]
\item
Extension,
\item
Elementary Sets,
\item
Separation,
\item
Power Set,
\item
Union,
\item
Choice, and
\item
Infinity.
\end{compactenum}
Zermelo assumes that equality is identity: 
but his assumption is our Theorem~\ref{thm:=}.  
In fact Zermelo does not use logical formalism as we have.  
We prefer to define equality with \eqref{eqn:>=} and \eqref{eqn:=} 
and then use the Axioms of 
\begin{compactenum}[(i)]
\item
Equality,
\item
the Empty Set,
\item
Adjunction,
\item
Union,
\item
Separation,
\item
Power Set,
\item
Infinity, and
\item
Choice.
\end{compactenum}
But these two collections of definitions and axioms are logically equivalent:
using either collection,
we can prove the axioms in the other collection as theorems.

Apparently Zermelo overlooked an axiom, the \textbf{Replacement Axiom,} 
which was supplied in 1922 by Skolem \cite{Skolem-some-remarks} 
and by Fraenkel.%%%%%
\footnote{I have not been able to consult Fraenkel's original papers.  
However, according to van Heijenoort \cite[p.~291]{MR1890980}, 
Lennes also suggested something like the Replacement Axiom 
at around the same time (1922) as Skolem and Fraenkel; 
but Cantor had suggested such an axiom in 1899.}  
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
We shall give this axiom on page \pageref{ax:replacement} in the next section.  

\begin{sloppypar}
An axiom never needed in ordinary mathematics 
is the \textbf{Foundation Axiom.}  
Stated originally by von Neumann \cite{von-Neumann-ax}, 
it ensures that certain pathological situations, 
like a set's containing itself, 
are impossible.  
It does this by declaring that every nonempty set has an element 
that is disjoint from it: 
\begin{equation*}
\Forall x\Exists y(x\neq\emptyset\lto y\in x\land x\cap y=\emptyset).  
\end{equation*}
We shall never use this axiom.
\end{sloppypar}

Zermelo's axioms, 
along with Replacement and Foundation,
compose the collection called
\begin{equation*}
\zfc.
\end{equation*} 
If we leave out Choice, we have what is called
\begin{equation*}
  \zf.
\end{equation*}
\emph{We shall tacitly assume $\zf$ throughout this text.}
When we want to use the Axiom of Choice,
we shall be explicit about it.


\section{Functions and relations}\label{sect:f}

\subsection{Cartesian products}

Given two sets $a$ and $b$, we define
\begin{equation*}
(a,b)=\bigl\{\{a\},\{a,b\}\bigr\}.
\end{equation*}
This set is the \textbf{ordered pair} 
whose first entry is $a$ and whose second entry is $b$.
The purpose of the definition is to make the following theorem true.

\begin{theorem}
Two ordered pairs are equal if and only if 
their first entries are equal and their second entries are equal:
\begin{equation*}\label{eqn:op}
(a,b)=(x,y)\liff a=x\land b=y.
\end{equation*}
\end{theorem}

If $A$ and $B$ are sets, then we define
\begin{equation*}
A\times B=\bigl\{z\colon\Exists x\Exists y\bigl(z=(x,y)
\land x\in A\land y\in B\bigr)\bigr\}.
\end{equation*}
This is the \textbf{cartesian product}\index{cartesian product}
of $A$ and $B$.

\begin{theorem}\label{thm:prod-set}
The cartesian product of two sets is a set.
\end{theorem}

\begin{proof}
If $a\in A$ and $b\in B$, 
then $\{a\}$ and $\{a,b\}$ are elements of $\pow{A\cup B}$,
so $(a,b)\in\pow{\pow{A\cup B}}$, and therefore
\begin{equation*}
A\times B\included\pow{\pow{A\cup B}}.\qedhere
\end{equation*}
\end{proof}

An \textbf{ordered triple}\index{ordered triple} $(x,y,z)$ 
can be defined as $\bigl((x,y),z\bigr)$, and so forth.

\subsection{Functions}

A \textbf{function}\index{function} or \textbf{map}\index{map} 
from $A$ to $B$ 
is a subset $f$ of $A\times B$ such that, 
for each $a$ in $A$, 
there is exactly one $b$ in $B$ such that $(a,b)\in f$.  
Then instead of $(a,b)\in f$, we write 
\begin{equation}\label{eqn:f}
  f(a)=b.
\end{equation}
We have then
\begin{equation*}
A=\{x\colon\Exists yf(x)=y\},
\end{equation*}
that is, $A=\{x\colon\Exists y(x,y)\in f\}$.
The set $A$ is called the \textbf{domain} of $f$.
A function is sometimes said to be a function \textbf{on} its domain.
For example, the function $f$ here is a function on $A$.
The \textbf{range} of $f$ is the subset
\begin{equation*}
\{y\colon\Exists xf(x)=y\}
\end{equation*}
of $B$.  
If this range is actually equal to $B$,
then we say that $f$ is \textbf{surjective onto} $B$,
or simply that $f$ is \textbf{onto} $B$.
Strictly speaking, it would not make sense to say $f$ was surjective or onto, simply.

A function $f$ is
\textbf{injective} or \textbf{one-to-one} if
\begin{equation*}
\Forall x\Forall z(f(x)=f(z)\lto x=z).
\end{equation*}
The expression $f(x)=f(z)$ is an abbreviation of 
$\Exists y(f(x)=y\land f(z)=y)$,
which is another way of writing
$\Exists y\bigl((x,y)\in f\land(z,y)\in f\bigr)$.
An injective function from $A$ \emph{onto} $B$ 
is a \textbf{bijection} from $A$ to $B$.

If it is not convenient to name a function with a single letter like $f$, 
we may write the function as
\begin{equation*}
x\mapsto f(x),
\end{equation*}
where the expression $f(x)$ would be replaced by some particular expression involving $x$.  
As an abbreviation of the statement that $f$ is a function from $A$ to $B$, 
we may write
\begin{equation}\label{eqn:f:B->A}
f\colon A\to B.
\end{equation}
Thus, while the symbol $f$ can be understood as a \emph{noun,} 
the expression $f\colon A\to B$ is a complete \emph{sentence.}  
If we say, \enquote{Let $f\colon A\to B$,} we mean
let $f$ be a function from $A$ to $B$.

If $f\colon A\to B$ and $D\included A$, 
then the subset
\begin{equation*}
 \{y\colon\Exists x(x\in D\land y=f(x)\} 
\end{equation*}
of $B$
can be written as one of%%%%%
\footnote{The notation $f(D)$ is also used, but the ambiguity is dangerous, 
at least in set theory as such.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{align*}
&\{f(x)\colon x\in D\},&
&f[D].
\end{align*}
This set is the \textbf{image} of $D$ under $f$.
Similarly, for the Cartesian product $A\times B$,
instead of
\begin{equation*}
  \bigl\{z\colon
\Exists x\Exists y\bigl(z=(x,y)\land x\in A\land y\in B\bigr)\bigr\},
\end{equation*}
we can write
\begin{equation*}
\{(x,y)\colon x\in A\land y\in B\}.
\end{equation*}
Variations on this notation are possible.
If $f\colon A\to B$ and $D\included A$, 
then the \textbf{restriction} of $f$ to $D$ is the set
\begin{equation*}
\{(x,y)\in f\colon x\in D\},
\end{equation*}
which we may denote by
\begin{equation*}
f\restriction D.  
\end{equation*}
Then the following is just an exercise in notation.

\begin{theorem}
If $f\colon A\to B$ and $D\included A$, then 
\begin{equation*}
f\restriction D\colon D\to B
\end{equation*}
and, for all $x$ in $D$, $(f\restriction D)(x)=f(x)$.
\end{theorem}

If $f\colon A\to B$ and $g\colon B\to C$, 
then we can define
\begin{equation*}
g\circ f=\{(x,z)\colon\Exists y(f(x)=y\land g(y)=z)\};
\end{equation*}
this is called the \textbf{composite} of $(g,f)$.

\begin{theorem}\label{thm:composite}
If $f\colon A\to B$ and $g\colon B\to C$, then
\begin{equation*}
g\circ f\colon A\to C.
\end{equation*}
If also $h\colon C\to D$, then
\begin{equation*}
h\circ(g\circ f)=(h\circ g)\circ f.
\end{equation*}
\end{theorem}

We define
\begin{equation*}
\id A=\{(x,x)\colon x\in A\};
\end{equation*}
this is the \textbf{identity} on $A$.

\begin{theorem}\label{thm:id}
$\id A$ is a bijection from $A$ to itself.
If $f\colon A\to B$, then
\begin{align*}
f\circ\id A&=f,&
\id B\circ f&=f.
\end{align*}
\end{theorem}

If $f$ is a bijection from $A$ to $B$, we define
\begin{equation*}
f\inv=\{(y,x)\colon f(x)=y\};
\end{equation*}
this is the \textbf{inverse} of $f$.

\begin{theorem}\label{thm:inverses}
\mbox{}
\begin{compactenum}
\item
The inverse of a bijection from $A$ to $B$ is a bijection from $B$ to $A$.
\item
Suppose $f:A\to B$ and $g:B\to A$.  Then $f$ is a bijection from $A$ to $B$ whose inverse is $g$ if and only if
\begin{align*}
g\circ f&=\id A,&f\circ g=\id B.
\end{align*}
\end{compactenum}
\end{theorem}


In the definition of the cartesian product $A\times B$ 
and of functions from $A$ to $B$,
we may replace the sets $A$ and $B$ with classes.
For example, 
we may speak of the function $x\mapsto\{x\}$ on the class of all sets.

\begin{axiom}[Replacement]\label{ax:replacement}
  If $\bm F$ is a function on some class $\bm C$,
and $A$ is a \emph{subset} of $\bm C$,
then the image $\bm F[A]$ is also a set.
\end{axiom}

For example, if we are given a function $n\mapsto G_n$ on $\upomega$, 
then by Replacement the class $\{G_n\colon n\in\upomega\}$ is a set.
Then the union of this class is a set, which we denote by
\begin{equation*}
\bigcup_{n\in\upomega}G_n.
\end{equation*}

A \textbf{singulary operation}\index{singulary} on $A$ is a function
from $A$ to itself; a \textbf{binary}\index{binary operation} on $A$
is a function 
from $A\times A$ to $A$.  

\subsection{Relations}

A \textbf{binary relation} on $A$ is a
subset of $A\times A$; if $R$ is such, and $(a,b)\in R$, we often
write
\begin{equation*}\label{mathrel}
  a\mathrel Rb.
\end{equation*}
A singulary operation on $A$ is a particular kind of binary
relation on $A$; for such a relation, we already have the
special notation in~\eqref{eqn:f}.  
The reader will be familiar
with other kinds of binary relations, 
such as \emph{equivalence relations}
and \emph{orderings.}
Equality of sets
is an equivalence relation;
see also pages~\pageref{eq-rel} and \pageref{eq-rel-2}.
We are going to define a particular binary relation on page~\pageref{<} below
and prove that it is a linear ordering.

Meanwhile, if $R\included A\times B$, 
then $R$ is a binary relation on $A\cup B$;
but we may say more precisely 
that $R$ is a relation \textbf{from $A$ to} $B$,\label{rel-from}
or a relation \textbf{between} $A$ and $B$ (in that order).
We consider this situation 
in the proof of Theorem~\ref{thm:rec} (page \pageref{thm:rec}),
and then again in \S\ref{sect:spectra} (page \pageref{sect:spectra}).
The \textbf{domain} of a relation $R$ from $A$ to $B$
is the subset $\{x\in A\colon\Exists y(x\mathrel Ry)\}$ of $A$.

\section{An axiomatic development of the natural numbers}\label{sect:N}

In the preceding sections, we sketched an axiomatic approach to set theory.  
Now we start over with an axiomatic approach to the natural numbers alone.  
In the section after this,
we shall show that the set $\upomega$ 
does actually provide 
a \emph{model} of the axioms for natural numbers 
developed in the present section.

For the moment though, we forget the definition of $\upomega$.  
We forget about starting the natural numbers with $0$.  
Children learn to count starting with $1$, not $0$.  
Let us understand the natural numbers to compose \emph{some} set called $\N$. 
This set has
a distinguished \textbf{initial element,}\index{initial element}
which we call \textbf{one}\index{zero} and denote by
\begin{equation*}
1.
\end{equation*}
On the set $\N$ there is also
a distinguished singulary operation of
\textbf{succession,}\index{succession, successor} 
namely the operation
\begin{equation*}
n\mapsto n+1,
\end{equation*}
where $n+1$ is called the \textbf{successor} of $n$. 
Note that some other expression like $S(n)$ might be used for this successor.
For the moment, we have no binary operation called $+$ on $\N$.

I propose to refer to the ordered triple $(\N,1,n\mapsto n+1)$ as an
\emph{iterative structure.}
In general, by an \textbf{iterative structure,}\index{iterative} 
I mean any set that has a distinuished element 
and a distinguished singulary operation.  
Here the underlying set can be called 
the \textbf{universe}\index{universe} of the structure.%%%%%
\footnote{For a simple notational distinction 
between a structure and its universe, 
if the universe is $A$, 
the structure itself can be denoted by a fancier version of this letter,
such as the Fraktur version $\str A$.
See Appendix~\ref{app:German} (p.~\pageref{app:German}) for Fraktur versions,
and their handwritten forms, for all of the Latin letters.
However,
we shall not make use of Fraktur letters
until defining structures in general on page \pageref{structure}.}  
%%%%%
 The
iterative structure $(\N,1,n\mapsto n+1)$ is
distinguished among all iterative structures by satisfying the
following axioms.
\begin{compactenum}[I.]
\item\label{ax:0}
$1$ is not a successor: $1\neq n+1$.
\item\label{ax:inj}
Succession is injective: if $m+1=n+1$, then $m=n$.
\item\label{ax:ind}
The structure admits \textbf{proof by induction,}\index{induction} in
the following sense.  
Every subset $A$ of the universe must be the whole universe,
provided $A$ has the following two closure properties.
  \begin{compactenum}[A.]
  \item 
$1\in A$.
\item
For all $n$, if $n\in A$, then $n+1\in A$.
  \end{compactenum}
\end{compactenum}

These axioms were published first 
by Dedekind~\cite[II, VI (71), p.~67]{MR0159773}; 
but they were written down also by Peano~\cite{Peano}, 
and they are often known as the \textbf{Peano axioms.}\index{Peano} 
  
Suppose $(A,b,f)$ is an iterative structure.  
If we successively compute $b$, $f(b)$, $f(f(b))$, $f(f(f(b)))$, and so on, 
either we always get a new element of $A$,
or we reach an element that we have already seen.
In the latter case,
if the first repeated element is $b$,
then the first Peano axiom fails.
If it is not $b$, then the second Peano axiom fails.
The last Peano axiom, the Induction Axiom,
would ensure that every element of $A$ was reached by our computations.  
None of the three axioms implies the others, 
although the Induction Axiom implies 
that exactly one of the other two axioms holds \cite{MR0120156}.

\subsection{Recursion}
  
The following theorem will allow us 
to define all of the usual operations on $\N$.  
The theorem is difficult to prove.  
Not the least difficulty 
is seeing that the theorem \emph{needs} to be proved.%%%%%
\footnote{Peano did not see this need, but Dedekind did.  
Landau discusses the matter \cite[pp.~ix--x]{MR12:397m}.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\emph{Homomorphisms} will be defined generally on page~\pageref{hom},
but meanwhile we need a special case.
A \textbf{homomorphism} from the iterative structure $(\N,1,n\mapsto n+1)$ 
to an arbitrary iterative structure $(A,b,f)$
is a function $h$ from $\N$ to $A$ such that
\begin{compactenum}[1)]
\item 
$h(1)=b$, and
\item
$h(n+1)=f(h(n))$ for all $n$ in $\N$,
\end{compactenum}
that is, the diagram in Figure \ref{fig:comm-diag} \textbf{commutes}
(any two paths from one point to another represent the same function).
\begin{figure}
  \begin{equation*}
\xymatrix@=2cm@!{\{1\}\ar[r]^{\id{\{1\}}}\ar[d]_{h\restriction\{1\}}&
\N\ar[d]_h\ar[r]^{n\mapsto n+1}&\N\ar[d]^h\\
\{b\}\ar[r]_{\id{\{b\}}}&A\ar[r]_f&A}
  \end{equation*}
  \caption{A homomorphism of iterative structures}\label{fig:comm-diag}
  
\end{figure}

\begin{theorem}[Recursion]\label{thm:rec}
For every iterative structure, there is exactly one
homomorphism from
$(\N,1,n\mapsto n+1)$ to this structure.
\end{theorem}

\begin{proof}
Given an iterative structure $(A,b,f)$,
we seek a homomorphism $h$ from $(\N,1,x\mapsto n+1)$ to $(A,b,f)$.
Then $h$ will be a particular subset of $\N\times A$.
Let $\mathscr B$ be the set whose elements are the subsets $C$ of $\N\times
A$ such that, if $(n,y)\in C$, then either 
\begin{compactenum}[1)]
\item 
$(n,y)=(1,b)$ or else
\item $C$ has an element
$(m,x)$ such that $(n,y)=(m+1,f(x))$.
\end{compactenum}
In particular, $\{(1,b)\}\in\mathscr B$.
Also, if $C\in\mathscr B$ and $(m,x)\in C$, then
\begin{equation*}
C\cup\{(m+1,f(x))\}\in\mathscr B.
\end{equation*}
Let $R=\bigcup\mathscr B$; so $R$ is a subset of $\N\times A$,
that is, a relation from $\N$ to $A$
in the sense of page \pageref{rel-from}.  
If $(n,y)\in R$, then (on page~\pageref{mathrel})
we may write also 
\begin{equation*}
n\mathrel Ry.  
\end{equation*}
Since $\{(1,b)\}\in\mathscr B$, we have $1\mathrel Rb$.  
Also, if $m\mathrel Rx$, then $(m,x)\in C$ for some $C$ in $\mathscr B$, 
so $C\cup\{(m+1,f(x))\}\in\mathscr B$, 
and therefore $(m+1)\mathrel R f(x)$.  
Thus $R$ is the desired function $h$,
provided $R$ is actually a \emph{function} from $\N$ to $A$.  
Proving that $R$ is a function from $\N$ to $R$ has two stages.
\begin{asparaenum}[1.]
  \item
Let $D$ be the set of all $n$ in $\N$ 
for which there is $y$ in $A$ such that $n\mathrel Ry$.
Then we have just seen that $1\in D$, and if $n\in D$, then $n+1\in D$.  
By induction, $D=\N$.
Thus if $R$ is a function, its domain is $\N$.
\item
Let $E$ be the set of all $n$ in $\N$ such that,
for all $y$ in $A$,
if $n\mathrel Ry$ and $n\mathrel Rz$, then $y=z$.
Suppose $1\mathrel R y$.  
Then $(1,y)\in C$ for some $C$ in $\mathscr B$.  
Since $1$ is not a successor, 
we must have $y=b$, by definition of $\mathscr B$.  
Therefore $1\in E$.  
Suppose $n\in E$, and $(n+1)\mathrel Ry$.  
Then $(n+1,y)\in C$ for some $C$ in $\mathscr B$.  
Again since $1$ is not a successor, 
we must have
\begin{equation*}
(n+1,y)=(m+1,f(x))
\end{equation*}
for some $(m,x)$ in $C$.  
Since succession is injective, we must have $m=n$.  
Thus, $y=f(x)$ for some $x$ in $A$ such that $n\mathrel Rx$.
Since $n\in E$, we know $x$ is \emph{unique} such that $n\mathrel Rx$.  
Therefore $y$ is unique such that $(n+1)\mathrel Ry$.  
Thus $n+1\in E$.  
By induction, $E=\N$.
\end{asparaenum}

So $R$ is the desired function $h$.
Finally, $h$ is unique by induction.
\end{proof}

Note well that the proof uses all three of the Peano Axioms.
The Recursion Theorem is often used in the following form.

\begin{corollary}\label{cor:rec}
For every set $A$ with a distinguished element $b$, and for every function
$F$ from $\N\times B$ to $B$, there is a unique function $H$ from $\N$ to
$A$ such that
\begin{compactenum}[1)]
\item 
$H(1)=b$, and
\item
$H(n+1)=F(n,H(n))$ for all $n$ in $\N$.
\end{compactenum}
\end{corollary}

\begin{proof}
Let $h$ be the unique homomorphism from $(\N,1,n\mapsto n+1)$ to
$(\N\times A,(1,b),f)$, where $f$ is the operation
$(n,x)\mapsto(n+1,F(n,x)))$.  In particular, $h(n)$ is always an
ordered pair.  By induction, the 
first entry of $h(n)$ is always $n$; so there is a function $H$ from
$\N$ to $A$ such that $h(n)=(n,H(n))$.  Then $H$ is as desired.  By
induction, $H$ is unique.
\end{proof}

\subsection{Arithmetic operations}

We can now use recursion to define, on $\N$,
%\begin{compactenum}[1)]
%  \item
the binary operation
\begin{equation*}
(x,y)\mapsto x+y
\end{equation*}
of \textbf{addition,}\index{addition} and 
%\item
the binary operation
\begin{equation*}
(x,y)\mapsto x\cdot y
\end{equation*}
of \textbf{multiplication.}\index{multiplication}
%\end{compactenum}
More precisely, for each $n$ in $\N$,
we recursively define the operations $x\mapsto n+x$ and $x\mapsto n\cdot x$.
The definitions are:
\begin{align}\label{eqn:+.}
&  \begin{gathered}
n+1=n+1,\\
n\cdot1=n,    
  \end{gathered}&
&  \begin{gathered}
    n+(m+1)=(n+m)+1,\\
n\cdot(m+1)=n\cdot m+n.
  \end{gathered}
\end{align}
The definition of addition might also be written as $n+1=S(n)$ and $n+S(m)=S(n+m)$.
In place of $x\cdot y$, we often write $xy$.
 
\begin{lemma}
For all $n$ and $m$ in $\N$,
\begin{align*}
  1+n&=n+1,&(m+1)+n&=(m+n)+1.
\end{align*}
\end{lemma}

\begin{proof}
  Induction.
\end{proof}

\begin{theorem}\label{thm:N-comm}
Addition on $\N$ is
  \begin{compactenum}[1)]
  \item 
\textbf{commutative:}\index{commutative} $n+m=m+n$; and
\item
\textbf{associative:}\index{associative} $n+(m+k)=(n+m)+k$.
  \end{compactenum}
\end{theorem}

\begin{proof}
  Induction and the lemma.
\end{proof}

\begin{theorem}\label{thm:cancel}
  Addition on $\N$ allows \textbf{cancellation:}\index{cancellation}
if $n+x=n+y$, then $x=y$.
\end{theorem}

\begin{proof}
  Induction, and injectivity of succession.
\end{proof}

\begin{sloppypar}
The analogous proposition for multiplication is 
Corollary~\ref{cor:mulcan} below.
\end{sloppypar}

\begin{lemma}
For all $n$ and $m$ in $\N$,
\begin{align*}
  1\cdot n&=n,&(m+1)\cdot n&=m\cdot n+n.
\end{align*}
\end{lemma}

\begin{proof}
  Induction.
\end{proof}

\begin{theorem}\label{thm:mult-comm}
Multiplication on $\N$ is
  \begin{compactenum}[1)]
  \item 
commutative: $nm=mn$;
\item
\textbf{distributive}\index{distributive} over addition: $n(m+k)=nm+nk$; and
\item
associative: $n(mk)=(nm)k$.
  \end{compactenum}
\end{theorem}

\begin{proof}
  Induction and the lemma.
\end{proof}

Landau \cite{MR12:397m} proves \emph{using induction alone} 
that $+$ and $\cdot$ exist 
as given by the recursive definitions above.  
However, Theorem~\ref{thm:cancel} needs more than induction.  
So does the existence of the \textbf{factorial}\label{factorial} function
defined by
\begin{align*}
1!&=1,&(n+1)!&=n!\cdot(n+1).
\end{align*}
So does \textbf{exponentiation,}\index{exponentiation} defined by
\begin{align*}
n^1&=n,&n^{m+1}&=n^m\cdot n.
\end{align*}

\subsection{The linear ordering}

The usual ordering $<$ of $\N$ is defined recursively as follows.
First note that $m\leq n$ means simply $m<n$ or $m=n$.  
Then the definition of $<$\label{<} is:
\begin{compactenum}[1)]
\item 
$m\not<1$ (that is, $\lnot\;m<1$);% for \emph{no} $m$ in $\N$;
\item
$m<n+1$ if and only if $m\leq n$.
\end{compactenum}
In particular, $n<n+1$.
Really, it is the sets $\{x\in\N\colon x<n\}$ that are defined by
recursion:
\begin{align*}
\{x\in\N\colon x<1\}&=\emptyset,\\
\{x\in\N\colon x<n+1\}&=\{x\in\N\colon x<n\}\cup\{n\}\\
&=\{x\in\N\colon x\leq n\}.	
\end{align*}
We now have $<$ as a binary relation on $\N$;
we must \emph{prove} that it is an ordering.

\begin{theorem}\label{thm:<trans}
  The relation $<$ is \textbf{transitive}\index{transitive} on $\N$,
  that is, if $k<m$ 
  and $m<n$, then $k<n$.
\end{theorem}

\begin{proof}
  Induction on $n$.
\end{proof}

\begin{theorem}\label{thm:<irr}
  The relation $<$ is \textbf{irreflexive}\index{irreflexive} on $\N$:
  $m\not<m$. 
\end{theorem}

\begin{proof}
Since every element $k$ of $\N$ is less than some other element (namely $k+1$), 
it is enough to prove
\begin{equation*}
k<n\lto k\not<k.
\end{equation*}
We do this by induction on $n$.
The claim is vacuously true when $n=1$.
Suppose it is true when $n=m$.
If $k<m+1$, then $k<m$ or $k=m$.
If $k<m$, then by inductive hypothesis $k\not<k$.
If $k=m$, but $k<k$, then $k<m$,
so again $k\not<k$.
Thus the claim holds when $n=m+1$.
By induction, it holds for all $n$.
\end{proof}

Because the relation $<$ is transitive and irreflexive on $\N$,
the relation is called an \textbf{ordering}\label{ordering}%%%%%
\footnote{In some sources,
what we are calling an \emph{ordering}
is called merely a \emph{partial ordering.}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 of $\N$,
and $\N$ is \textbf{ordered}\index{order}\index{strict} by $<$. 

\begin{lemma}
  $1\leq m$.
\end{lemma}

\begin{proof}
  Induction.
\end{proof}

\begin{lemma}
If $k<m$, then $k+1\leq m$.
\end{lemma}

\begin{proof}
The claim is vacuously true when $m=1$.  
Suppose it is true when $m=n$.  
Say $k<n+1$.  
Then $k\leq n$.  
If $k=n$, then $k+1=n+1$, so $k+1\leq n+1$.  
If $k<n$, then $k+1\leq n$ by inductive hypothesis,
so $k+1<n+1$ by transitivity (Theorem~\ref{thm:<trans}), 
and therefore $k+1\leq n+1$.  
Thus the claim holds when $m=n+1$.
By induction, the claim holds for all $m$.
\end{proof}

\begin{theorem}\label{thm:<tot}
  The relation $<$ is \textbf{total}\index{total} on $\N$: either
  $k\leq m$ or 
  $m<k$.
\end{theorem}

\begin{proof}
By the last lemma but one, the claim is true when $k=1$.
Suppose it is true when $k=\ell$.
If $m\not<\ell+1$, then $m\nleq\ell$.
In this case, we have both $m\neq\ell$ and $m\not<\ell$.
Also, by the inductive hypothesis, $\ell\leq m$, so $\ell<m$,
and hence $\ell+1\leq m$ by the last lemma.
\end{proof}

Being a total ordering of $\N$, 
the relation $<$ is also called a \textbf{linear ordering}\label{lo} of $\N$,
and $\N$ is \textbf{linearly ordered}\index{order}\index{strict} by $<$. 

\begin{theorem}\label{thm:m+x=n}
  For all $m$ and $n$ in $\N$, we have $m<n$ if and only if the
  equation
  \begin{equation}\label{eqn:m+x=n}
    m+x=n
  \end{equation}
is soluble in $\N$.
\end{theorem}

\begin{proof}
  By induction on $k$, if $m+k=n$, then $m<n$.  We prove the converse by induction on $n$.  We never have $m<1$.  Suppose for some $r$ that, for all $m$, if $m<r$, then the equation $m+x=r$ is soluble.  Suppose also $m<r+1$.  Then $m<r$ or $m=r$.  In the former case, by inductive hypothesis, the equation $m+x=r$ has a solution $k$, and therefore $m+(k+1)=r+1$.  If $m=r$, then $m+1=r+1$.  Thus the equation $m+x=r+1$ is soluble whenever $m<r+1$.
By
induction, for all $n$ in $\N$, if $m<n$, then~\eqref{eqn:m+x=n}
is soluble in $\N$. 
\end{proof}

\begin{theorem}\label{thm:N<}
If $k<\ell$, then
\begin{align*}
  k+m&<\ell+m,&
km&<\ell m.
\end{align*}
\end{theorem}

Here the first conclusion is a refinement of Theorem~\ref{thm:cancel}; the second yields the following analogue of Theorem~\ref{thm:cancel} for multiplication.

\begin{corollary}\label{cor:mulcan}
  If $km=\ell m$, then $k=\ell$.
\end{corollary}

\begin{theorem}\label{thm:wo}
  $\N$ is well-ordered\index{well ordered} by $<$: every
  nonempty set of natural 
  numbers has a least element.
\end{theorem}

\begin{proof}
  Suppose $A$ is a set of natural numbers with no least element.  Let
  $B$ be the set of natural numbers $n$ such that, if $m\leq n$, then
  $m\notin A$.  Then
  $1\in B$, since otherwise $1$ would be the least
  element of $A$.  Suppose $m\in B$.  Then $m+1\in B$, since otherwise
  $m+1$ would be the least element of $A$.  By induction, $B=\N$, so
  $A=\emptyset$. 
\end{proof}

The members of $\N$ are the \textbf{positive integers;}\label{no-th}
the full set $\Z$ of \emph{integers} will be defined formally 
in \S\ref{sect:ZQ} below, on page~\pageref{Z}.
As presented in Books VII--IX of Euclid's \emph{Elements} 
\cite{MR1932864,MR17:814b},
number theory is a study of the positive integers;
but a consideration of all integers is useful in this study,
and the integers that will constitute a motivating example,
first of a group (page~\pageref{Z-as-group}), 
and then of a ring (page~\pageref{Z-as-ring}).

\section{A construction of the natural numbers}\label{sect:omega}

For an arbitrary set $a$, let
\begin{equation*}
a'=a\cup\{a\}.
\end{equation*}
If $A$ belongs to the class $\It$ defined in \eqref{eqn:upomega-defn} on page~\pageref{eqn:upomega-defn},
then $0\in A$, and $A$ is closed under the operation $x\mapsto x'$,
and so $(A,0,{}')$ is an iterative structure.
In particular,
$(\upomega,0,{}')$ is an iterative structure
by Theorem \ref{thm:AI} (page \pageref{thm:AI}).

\begin{theorem}\label{thm:Peano}
The structure $(\upomega,0,{}')$ satisfies the Peano Axioms.
\end{theorem}

\begin{proof}
There are three things to prove.
\begin{asparaenum}
\item
In $(\upomega,0,{}')$, the initial element $0$ is not a successor,
because for all sets $a$, the set $a'$ contains $a$, so it is nonempty.
\item
$(\upomega,0,{}')$ admits induction, because,
if $A\included\upomega$, 
and $A$ contains $0$ and is closed under $x\mapsto x'$,
then $A\in\It$, so $\bigcap\It\included A$ 
by Theorem \ref{thm:int} (page \pageref{thm:int}), 
that is, $\upomega\included A$.
\item
It remains to establish that $x\mapsto x'$ is injective on $\upomega$.
On page~\pageref{<}, we used recursion to define a relation $<$ on $\N$ so that
\begin{align}\label{eqn:mnot1}
m&\not<1,& m<n+1&\liff m<n\lor m=n.
\end{align}
\sloppy
Everything that we proved about this relation required only these properties, and induction.
On $\upomega$, we do not know whether we have recursion,
but we have \eqref{eqn:mnot1} when $<$ is $\in$ and $1$ is $0$: that is, we have
\begin{align*}
m&\notin0,&m\in n'&\liff m\in n\lor m=n.
\end{align*}
Therefore $\in$ must be a linear ordering of $\upomega$, 
by the proofs in the previous section.
Thus, if $m\neq n$, then either $m\in n$ or $n\in m$.
We also have the last lemma in that section for $\in$, 
that is, if $m\in n$, 
then either $m'=n$ or $m'\in n$;
and in either case, $m'\in n'$, so $m'\neq n'$.
Thus, assuming $m\neq n$, we have $m'\neq n'$.
\qedhere
\end{asparaenum}
\end{proof}

Given sets $A$ and $B$, we define
\begin{equation*}
A\setminus B=\{x\in A\colon x\notin B\}.
\end{equation*}
As a corollary of the foregoing theorem, we have that the iterative structure $(\upomega\setminus\{0\},1,{}')$ also satisfies the Peano Axioms.
We may henceforth assume that $(\N,1,x\mapsto x+1)$ is this structure.
In particular,
\begin{equation*}
\N=\upomega\setminus\{0\}.
\end{equation*}
Thus we no longer need the Peano Axioms as axioms;
they are theorems about $(\N,1,x\mapsto x+1)$ and $(\upomega,0,{}')$.

We extend the definitions of addition and multiplication on $\N$ to allow their arguments to be $0$:
\begin{align*}
n+0&=n=0+n,&n\cdot0&=0=0\cdot n.
\end{align*}

\begin{theorem}
Addition and multiplication are commutative and associative on $\upomega$,
and multiplication distributes over addition.
\end{theorem}

In particular, the equations \eqref{eqn:+.} (page \pageref{eqn:+.})
making up the recursive definitions of addition and multiplication on $\N$ 
are still valid on $\upomega$.
The same goes for factorials and exponentiation when we define\label{0factorial}
\begin{align*}
0!&=1,&n^0&=1.
\end{align*}
 

\section{Structures}\label{sect:structures}

For us, the point of using the von-Neumann definition of the natural numbers 
is that, under
this definition, a natural number $n$ 
is a particular set, namely $\{0,\dots,n-1\}$, with $n$ elements.
We denote the set of functions from a set $B$ to a set $A$ by\label{A^B}
\begin{equation*}
  A^B.
\end{equation*}
In particular then, $A^n$ is the set of functions from
$\{0,\dots,n-1\}$ into $A$.  We can denote such a function by one of
\begin{align*}
&(x_0,\dots,x_{n-1}),&
&(x_i\colon i<n),
\end{align*}
so that 
\begin{equation*}
A^n=\{(x_0,\dots,x_{n-1})\colon x_i\in A\}.
\end{equation*}
Thus, $A^2$ can be identified with $A\times A$, and $A^1$ with $A$
itself.  There is exactly one function from $0$ to $A$, namely $0$; so
\begin{equation*}
  A^0=\{0\}=1.
\end{equation*}
An $n$-ary \textbf{relation}\index{relation} on $A$ is a subset of $A^n$;
an \textbf{$n$-ary}\index{n-ary@$n$-ary}
\textbf{operation}\index{operation} on $A$ is a function from $A^n$ to
$A$.  Relations and operations that are $2$-ary, $1$-ary, or $0$-ary
can be called 
\textbf{binary,}\index{binary} \textbf{singulary,}\index{singulary}
or \textbf{nullary,}\index{nullary} respectively; after the appropriate
identifications, this agrees with the terminology used in
\S \ref{sect:f}. 
A nullary operation on $A$ can be identified with an element of $A$.  

Generalizing the terminology 
used at the beginning of \S\ref{sect:N} (page~\pageref{sect:N}),
we define a \textbf{structure}\index{structure}\label{structure}
as a set
together with some distinguished relations and operations on the set;
as before, the set is the \textbf{universe}\index{universe} of the structure.  
If the underlying set of a structure is denoted by a Latin letter, 
as $A$ or $B$,
then the structure itself may be denoted 
by the corresponding Fraktur letter, as $\str A$ or $\str B$.
See Appendix \ref{app:German}, page \pageref{app:German}.

The \textbf{signature}\index{signature}\label{signature} of a structure 
comprises a symbol 
for each distinguished relation and operation of the structure.  
For example, we have so far obtained $\N$ 
as a structure in the signature $\{1,+,\cdot,<\}$.
We may then write out this structure as
\begin{equation*}
(\N,1,+,\cdot,<).
\end{equation*}
In this way of writing the structure,
an expression like $+$ stands not for the \emph{symbol} of addition,
but for the actual operation on $\N$.
In general, if $s$ is a symbol of the signature of $\str A$, then the
corresponding relation or operation on $A$ can, for precision, 
be denoted by\label{interpret}
\begin{equation*}
   s^{\str A}.
\end{equation*}
Then $s^{\str A}$ is the \textbf{interpretation} of $s$ in $\str A$.

The reason why we might distinguish $s^{\str A}$ from $s$
is that two structures can have the same signature.
We must be clear what this means.
Each symbol of a signature carries with it two pieces of information:
\begin{compactenum}[1)]
\item 
whether it symbolizes a relation or an operation, and
\item
for which $n$ in $\upomega$ the relation or operation is $n$-ary.
\end{compactenum}
A relation symbol can be called a \textbf{predicate;}
a nullary operation symbol can be called a \textbf{constant.}
More than one symbol in a signature 
can symbolize an $n$-ary relation or an $n$-ary operation.
But we normally do not consider the sign $=$ of equality 
to belong to a signature.

If $\sig$ is a signature,
we denote the class of all structures with this signature by\label{Mod}
\begin{equation*}
  \Str.
\end{equation*}
Suppose $\str A$ and $\str B$ belong to $\Str$.
If $s\in\sig$, then $s^{\str A}$ is an $n$-ary operation or relation on $A$
if and only if $s^{\str B}$ is an $n$-ary operation or relation on $B$, 
respectively.
A \textbf{homomorphism}\label{hom} 
from $\str A$ to $\str B$
is a function $h$ from $A$ to $B$ 
that \emph{preserves} the relations and operations symbolized in $\sig$: 
this means
\begin{gather}\notag
  h(f^{\str A}(x_0,\dots,x_{n-1}))=f^{\str
    B}(h(x_0),\dots,h(x_{n-1})),\\\label{eqn:hom}
(x_0,\dots,x_{n-1})\in R^{\str A}\lto(h(x_0),\dots,h(x_{n-1}))\in
  R^{\str B},
\end{gather}
for all $n$-ary operation symbols $f$ of $\sig$
and all $n$-ary predicates $R$ of $\sig$, 
for all $n$ in $\upomega$.  
To indicate that $h$ is a homomorphism from $\str A$ to $\str B$, 
we may write
\begin{equation*}
h\colon\str A\to\str B
\end{equation*}
(rather than simply $h\colon A\to B$).
We have already seen a special case of a homomorphism in the Recursion Theorem
(Theorem~\ref{thm:rec}, page~\pageref{thm:rec}).
The following is easily proved.

\begin{theorem}\label{thm:hom-comp}
If $h\colon\str A\to\str B$ and $g\colon\str B\to\str C$, then
\begin{equation*}
g\circ h\colon\str A\to\str C.
\end{equation*}
\end{theorem}

A homomorphism is an
\textbf{embedding}\index{embedding} if it is injective and if the converse
of~\eqref{eqn:hom} also holds.  A surjective embedding is an
\textbf{isomorphism.}\index{isomorphism}

\begin{theorem}\label{thm:isom-inv}
The function $\id A$ is an isomorphism from $\str A$ to itself.
The following are equivalent conditions 
on a bijective homomorphism $h$ from $\str A$ to $\str B$:
\begin{compactenum}[1)]
\item
$\str B$ is an isomorphism from $\str A$ to $\str B$,
\item
$h\inv$ is a homomorphism from $\str B$ to $\str A$,
\item
$h\inv$ is an isomorphism from $\str B$ to $\str A$.
\end{compactenum}
\end{theorem}

If there is an isomorphism from a structure $\str A$ to a structure $\str B$,
then these two structures are said to be \textbf{isomorphic} to one another,
and we may write
\begin{equation*}
\str A\cong\str B.
\end{equation*}
In this case $\str A$ and $\str B$ are indistinguishable as structures,
and so (out of laziness perhaps) we may \emph{identify}\label{identify} them,
treating them as the \emph{same} structure.
We have already done this, in a sense, 
with $(\N,1,x\mapsto x+1)$ and $(\upomega\setminus\{0\},1,{}')$.
However, we never actually had a set called $\N$,
until we identified it with $\upomega\setminus\{0\}$.
 
A \textbf{substructure}\index{substructure} of a structure $\str B$ 
is a structure $\str A$ of the
same signature such that $A\included B$ 
and the \textbf{inclusion} $x\mapsto x$ of $A$ in
$B$ is an embedding of $\str A$ in $\str B$.
To indicate that $\str A$ is a substructure of $\str B$, we may write
\begin{equation*}
  \str A\included\str B.
\end{equation*}
 
\textbf{Model theory}\label{MTh} studies structures as such.
\textbf{Universal algebra} studies \textbf{algebras,}\label{algebra}
which are sets with distinguished operations,
but no distinguished relations. 
In other words, an algebra 
is a structure in a signature with no symbols for relations.

\begin{sloppypar}
We shall study mainly the algebras called \emph{groups} 
and the algebras called \emph{rings.}
Meanwhile, we have the algebra $(\N,1,+,\cdot)$,
and we shall have more examples in the next section.
\end{sloppypar}

A \textbf{reduct}\label{reduct} of a structure 
is obtained by ignoring some of its operations and relations, 
while the universe remains the same.
The original structure is then an \textbf{expansion} of the reduct.
For example, $(\N,+)$ is a reduct of $(\N,+,\cdot,<)$,
and the latter is an expansion of the former.

Let us finally note that the universe of a structure
is normally considered to be a set, and not just a class.
Thus the \emph{universal class} $\{x\colon x=x\}$ 
is not the universe of a structure with signature $\{\in\}$.
Set theory does study structures in this signature
that have some of the properties of the universal class.
We shall not do this.
However, in order to talk precisely about structures as such,
in Chapter~\ref{ch:MT} (page \pageref{ch:MT}) we shall adapt the logic 
that we developed in \S\ref{sect:sets} (page \pageref{sect:sets})
for talking about sets.


\section{Constructions of the integers and rationals}\label{sect:ZQ}
 
The next theorem is an example of something like \emph{localization,} 
which will be the topic of \S\ref{sect:loc} 
(p.~\pageref{sect:loc}).  
One learns the theorem implicitly in school, 
when one learns about fractions.
Perhaps fractions are our first encounter 
with nontrivial \emph{equivalence classes.}

On page~\pageref{ordering}, we defined an \emph{ordering}
as an irreflexive, transitive relation on a set.
A relation $R$ on a set $A$ is \textbf{reflexive} and \textbf{symmetric} if,
respectively,
\begin{align*}
  a&\mathrel Ra,&a\mathrel Rb&\liff b\mathrel Ra,
\end{align*}
for all $a$ and $b$ in $A$.
A reflexive, symmetric, transitive relation on a set 
is an \textbf{equivalence relation}\label{eq-rel} on that set.
If $E$ is an equivalence relation on $A$,
and $a\in A$, then the set
\begin{equation*}
  \{x\in A\colon a\mathrel Ex\}
\end{equation*}
is the \textbf{equivalence class} of $a$ in $A$ with respect to $R$.
We may denote by
\begin{equation*}
  A/E
\end{equation*}
the set of equivalence classes of elements of $A$ with respect to $E$;
this is the \textbf{quotient} of $A$ by $E$.
Since the equivalence class of an element of $A$ contains that element
and is included in $A$, we have
\begin{equation*}
  A=\bigcup A/E.
\end{equation*}
Moreover, two distinct equivalence classes are disjoint,
and so $A$ is the \textbf{disjoint union} of $A/E$.

Now let $\approx$ be the binary relation on $\N\times\N$ given by%%%%%
\footnote{As a binary relation on $\N\times\N$, 
the relation $\approx$ is a subset of $(\N\times\N)^2$, 
which we identify with $\N^4$.}
\begin{equation}\label{eqn:approx}
  (a,b)\approx(x,y)\liff ay=bx.
\end{equation}

\begin{lemma}\label{lem:approx}
The relation $\approx$ is an equivalence relation on $\N\times\N$.   
\end{lemma}

If $(a,b)\in\N\times\N$,
let its equivalence class with respect to $\approx$
be denoted by either of
\begin{align*}
&a/b,&
  &\frac ab.
\end{align*}
Let the set $(\N\times\N)/\mathord{\approx}$ 
of all such equivalence classes be denoted by\label{Qp}
\begin{equation*}
  \Qp.
\end{equation*}  
This set comprises the \textbf{positive rational numbers.}

\begin{theorem}\label{thm:Qp}
There are \emph{well-defined} binary operations $+$ and $\cdot$ on $\Qp$
given by the rules
\begin{align}\label{eqn:Qp+.}
\frac ab+\frac xy&=\frac{ay+bx}{by},&
  \frac ab\cdot\frac xy&=\frac{ax}{by}.
\end{align}
There is a well-defined singulary operation ${}\inv$ on $\Qp$ given by
\begin{equation}\label{eqn:inv}
\left(\frac xy\right)\inv=\frac yx.  
\end{equation}
There is a well-defined linear ordering $<$ of $\Qp$ given by
\begin{equation}\label{eqn:<}
\frac ab<\frac xy\liff ay<bx.
\end{equation}
The structure $(\N,+,\cdot,<)$ embeds in $(\Qp,+,\cdot,<)$ under the
map
\begin{equation*}
  x\mapsto\frac x1.
\end{equation*}
Addition and multiplication are commutative and associative on $\Qp$, 
and multiplication distributes over addition.
Moreover,
\begin{align}\label{eqn:Qp-group}
  \frac11\cdot\frac xy&=\frac xy,&
  \left(\frac xy\right)\inv\cdot\frac xy&=\frac11,
\end{align}
Finally,
\begin{equation}\label{eqn:Qp-ordered}
  \frac11<\frac ab\land\frac11<\frac xy\lto\frac11<\frac ab\cdot\frac xy.
\end{equation}
\end{theorem}

The operations on $\Qp$ in the theorem are said to be \textbf{well defined}
because it is not immediately obvious that they exist at all.
It is possible that $a/b=c/d$ although $(a,b)\neq(c,d)$.
In this case one must check that (for example) $(ay+bx)/(by)=(cy+dx)/(dy)$.
See page~\pageref{well-defined}.

\begin{sloppypar}
Because multiplication is commutative and associative on $\Qp$,
and \eqref{eqn:Qp-group} holds,
the structure $(\Qp,1/1,{}\inv,\cdot)$ is an \textbf{abelian group.}
Because in addition $\Qp$ is linearly ordered by $<$,
and \eqref{eqn:Qp-ordered} holds,
the structure $(\Qp,1/1,{}\inv,\cdot,<)$ is an \textbf{ordered group.}%%%%%
\footnote{In particular, all of our ordered groups will be abelian.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
The \textbf{positive} elements of this group 
are those elements $a$ such that $a>1/1$,
although we do not usually use this terminology when,
as at present, the ordered group is written \emph{multiplicatively.}
\end{sloppypar}

For the moment, a natural number is \emph{not} a positive rational number.
Therefore, even though we already have a function $(x,y)\mapsto x/y$ 
from $\N\times\N$ to $\Qp$,
we are free to use the same notation to define a binary operation on $\Qp$.
This operation will be given by
\begin{equation}\label{eqn:/}
  \frac xy=x\cdot y\inv.
\end{equation}
We easily have the following.

\begin{theorem}
For all $m$ and $n$ in $\N$,
\begin{equation}\label{eqn:m1n1}
  \frac{m/1}{n/1}=\frac mn.
\end{equation}
The rules \eqref{eqn:Qp+.}, \eqref{eqn:inv}, and \eqref{eqn:<} are correct
when the letters range over $\Qp$.
\end{theorem}

The meaning of \eqref{eqn:m1n1} 
is that the diagram in Figure \ref{fig:m1n1} commutes.
\begin{figure}
  \begin{equation*}
    \xymatrix@!0@R=3.46cm@C=2cm
{\N\times\N\ar[rr]^{(x,y)\mapsto\textstyle\frac xy}
           \ar[dr]_{(x,y)\mapsto\left(\textstyle\frac x1,\frac y1\right)}&&\Qp\\
&\Qp\times\Qp\ar[ur]_{(x,y)\mapsto x\cdot y\inv}&}
  \end{equation*}
  \caption{Division of positive rationals}\label{fig:m1n1}
  
\end{figure}
We may now \emph{identify} $n$ and $n/1$,
treating them as the same thing.
Then $\N\included\Qp$,
and the function $(x,y)\mapsto x/y$ from $\N\times\N$ to $\Qp$
is just the restriction of the binary operation on $\Qp$.

In the definition \eqref{eqn:approx} of $\approx$, 
if we replace multiplication with addition,
then instead of the positive rational numbers,
we obtain the \emph{integers.}
Probably this construction of the integers is not learned in school.
If it were, possibly students would never think 
that $-x$ is automatically a negative number.
In any case, by applying this construction of the integers
to the positive rational numbers, 
we obtain all of the rational numbers as follows.
Let $\sim$ be the binary relation on
$\Qp\times\Qp$ given by
\begin{equation}\label{eqn:sim}
  (a,b)\sim(x,y)\liff a+y=b+x.
\end{equation}

\begin{lemma}
The relation $\sim$ on $\Qp\times\Qp$ is an equivalence relation.    
\end{lemma}

If $(a,b)\in\Qp\times\Qp$, 
let its equivalence class with respect to $\sim$ be denoted by
\begin{equation*}
  a-b.
\end{equation*}
Let the set of such equivalence classes be denoted by
\begin{equation*}
  \Q.
\end{equation*}
This set comprises the \textbf{rational numbers.}
However, for the moment, a positive rational number is not a rational number.
We denote by
\begin{equation*}
  0
\end{equation*}
the rational number $1-1$.
 
\begin{theorem}\label{thm:Q}
There are well-defined operations $-$, $+$, and $\cdot$ on $\Q$ 
given by the rules
\begin{equation}\label{eqn:Qrules}
  \left.\quad
\begin{gathered}
-(x-y)=y-x,\\
(a-b)+(x-y)=(a+x)-(b+y),\\
(a-b)\cdot(x-y)=(ax+by)-(ay+bx).
\end{gathered}\quad
\right\}
\end{equation}
There is a dense linear ordering $<$ of $\Q$ given by
\begin{equation*}
  a-b<x-y\liff a+y<b+x.
\end{equation*}
The structure $(\Qp,+,\cdot,<)$ embeds in $(\Q,+,\cdot,<)$ 
under the map
\begin{equation*}
  x\mapsto(x+1)-1.
\end{equation*}
The structure $(\Q,0,-,+,<)$ is an ordered group,
and its positive elements are just those in the image of $\Qp$.
Multiplication is also commutative and associative on $\Q$,
and it distributes over addition.
\end{theorem}

As before, although we already have a function $(x,y)\mapsto x-y$ 
from $\Qp\times\Qp$ to $\Q$,
we are free to use the same notation for a binary operation on $\Q$
given by
\begin{equation*}
x-y=x+(-y).
\end{equation*}
We easily have the following.

\begin{theorem}
For all $a$ and $b$ in $\Qp$,
\begin{equation}\label{eqn:Qsub}
  \bigl((a+1)-1\bigr)-\bigl((b+1)-1\bigr)=a-b.
\end{equation}
The rules \eqref{eqn:Qrules} are correct
when the letters range over $\Q$.
\end{theorem}

The meaning of \eqref{eqn:Qsub} 
is that the diagram in Figure \ref{fig:Qsub} commutes.
\begin{figure}
  \begin{equation*}
    \xymatrix@!0@R=3.46cm@C=2cm
{\Qp\times\Qp\ar[rr]^{(x,y)\mapsto x-y}
           \ar[dr]_{(x,y)\mapsto\big((x+1)-1,(y+1)-1\big)}&&\Q\\
&\Q\times\Q\ar[ur]_{(x,y)\mapsto x+(-y)}&}
  \end{equation*}
  \caption{Subtraction of rationals}\label{fig:Qsub}
  
\end{figure}
We now identify $\Qp$ with its image in $\Q$,
so that a positive rational number 
is indeed just a rational number that is positive.

\begin{theorem}
$\Qp=\{x\in\Q\colon0<x\}$.
The singulary operation ${}\inv$ on $\Qp$
extends to an operation on $\Q\setminus\{0\}$ when
\begin{equation*}
  x\inv=-(-x)\inv
\end{equation*}
on $\{-x\colon x\in\Qp\}$.
Then $(\Q\setminus\{0\},1,{}\inv,\cdot)$ is an abelian group.
The binary operation $/$ on $\Qp$
is the restriction of the function $/$ 
from $\Q\times(\Q\setminus\{0\})$ to $\Q$ given by \eqref{eqn:/},
and \eqref{eqn:Qp+.}, \eqref{eqn:inv}, and \eqref{eqn:<} hold
when the letters range over $\Q$ (and the expressions are defined).
\end{theorem}

\begin{sloppypar}
Because $(\Q,0,-,+,<)$
and $(\Qp,1,{}\inv,{}\cdot{},<)$ are ordered groups,
where $\Qp=\{x\in\Q\colon x>0\}$,
and multiplication distributes over addition in $\Q$,
the structure $(\Q,0,-,+,1,\cdot,<)$ is an \textbf{ordered field.}
However, the ordering of $\Q$ is not \textbf{complete,} that is,
there are subsets with upper bounds, 
but no \emph{suprema} (least upper bounds).
An example is the set
\begin{equation*}
 \{x\in\Q\colon 0<x\land x^2<2\}. 
\end{equation*}
\end{sloppypar}

  We can now define\label{Z}
\begin{equation*}
  \Z=\{x-y\colon(x,y)\in\N\times\N\};
\end{equation*}
this is the subset of $\Q$ comprising the \textbf{integers.}

\begin{theorem}\label{thm:Z->Q}\mbox{}
  \begin{compactenum}
\item
$(\Z,0,-,+,1,\cdot,<)\included(\Q,0,-,+,1,\cdot,<)$.
\item
In particular,
$(\Z,0,-,+,1,\cdot,<)$ is well defined.
\item
$(\Z,0,-,+,<)$ is an ordered group.
\item
$\Q=\left\{x/y\colon x\in\Z\land y\in\Z\setminus\{0\}\right\}$.
  \end{compactenum}
\end{theorem}

Because of the theorem,
we can also think of $\Q$ as arising from $\Z$ 
by the same construction that gives us $\Qp$ from $\N$.
We shall generalize this construction of $\Q$ 
in \S\ref{sect:loc} (page \pageref{sect:loc}).
 
\section{A construction of the reals}\label{sect:R}
 
There is a technique due to Dedekind for completing $(\Q,<)$
to obtain the completely ordered set $(\R,<)$.
As Dedekind says explicitly \cite[pp.~39--40]{MR0159773},
the original inspiration for the technique
is the definition of \emph{proportion}
found in Book V of Euclid's \emph{Elements}
\cite{MR1932864,MR17:814b}.

In the geometry of Euclid,
a \emph{straight line} is what we now call 
a \emph{line segment} or just \emph{segment,} 
and two segments are \emph{equal} to one another
if they are congruent to one another.
Congruence of segments is an equivalence relation.
Let us refer to a congruence class of segments
as the \emph{length} of any one of its members.
Two lengths can be \emph{added} together
by taking two particular segments with those lengths 
and setting them end to end.
Then lengths of segments
compose the set of positive elements of an ordered group. 
In particular, individual lengths can be \emph{multiplied,}
in the original sense of being taken several times.
Indeed, if $A$ is a length, and $n\in\N$, 
we can define the multiple $nA$ of $x$ recursively:
\begin{align*}
  1A&=A,&(n+1)A=nA+A.
\end{align*}
It is assumed that, for any two lengths $A$ and $B$,
some multiple of $A$ is greater than $B$:
this is the \textbf{archimedean property.}
If $C$ and $D$ are two more lengths,
then $A$ has to $B$ the \emph{same ratio} that $C$ has to $D$,
provided that, for all $k$ and $m$ in $\N$,
\begin{equation*}
  kA>mB\liff kC>mD.
\end{equation*}
In this case, the four lengths $A$, $B$, $C$, and $D$ are \emph{proportional,} 
and we may write
\begin{equation*}
  A:B:\;:C:D.
\end{equation*}
We can write the condition for this proportionality as
\begin{equation*}
  \left\{\frac xy\in\Qp\colon xB<yA\right\}
=\left\{\frac xy\in\Qp\colon xD<yC\right\}
\end{equation*}
Dedekind's observation is that such sets can be defined
independently of all geometrical considerations.
Indeed, we may define a \textbf{positive real number}
as a nonempty, proper subset $C$ of $\Qp$ such that
\begin{compactenum}[1)]
\item
if $a\in C$ and $b\in\Qp$ and $b<a$, then $b\in C$, and
\item
if $C$ has a supremum in $\Qp$, this supremum does not belong to $C$.  
\end{compactenum}
Let the set of all positive real numbers be denoted by
\begin{equation*}
  \Rp.
\end{equation*}

\begin{theorem}\label{thm:R-complete}
The set $\Rp$ is completely ordered by proper inclusion.
There are well-defined operations $+$, ${}\inv$, and $\cdot$ on $\Qp$
given by the rules
\begin{gather*}
	C+D=\{x+y\colon x\in C\land y\in D\},\\
	C\inv=\{x\inv\colon x\in\Qp\land\Exists y(y\in\Qp\setminus C\land y<x)\},\\
	C\cdot D=\{x\cdot y\colon x\in C\land y\in D\}.
\end{gather*}
Then $(\Qp,+,{}\inv,\cdot)$ embeds in $(\Rp,+,{}\inv,\cdot)$
under $y\mapsto\{x\in\Qp\colon x<y\}$.
\end{theorem}

Let us identify $\Qp$ with its image in $\Rp$.
We may also write $\pincluded$ on $\Rp$ as $<$.

For every $n$ in $\upomega$, 
an $n$-ary operation $f$ on $\Rp$ is \textbf{continuous}
if, for every $(A_i\colon i<n)$ in $(\Rp)^n$,
for every $\epsilon$ in $\Qp$, 
there is $(\delta_i\colon i<n)$ in $(\Qp)^n$ such that, 
for all $(X_i\colon i<n)$ in $(\Rp)^n$, if
\begin{equation*}
\bigwedge_{i<n}A_i-\delta_i<X_i<A_i+\delta_i,
\end{equation*}
then
\begin{equation*}
f(A_i\colon i<n)-\epsilon<f(X_i\colon i<n)<f(A_i\colon i<n)+\epsilon.
\end{equation*}

\begin{theorem}
The operations $+$, ${}\inv$, and $\cdot$ on $\Rp$ are continuous.
Every composite of continuous functions on $\Rp$ is continuous.
\end{theorem}

\begin{lemma}
The only continuous singulary operation on $\Rp$ that is $1$ on $\Q$ 
is $1$ everywhere.
\end{lemma}

\begin{theorem}%\label{thm:R-of}
\sloppy
The structure $(\Rp,1,{}\inv,\cdot,<)$ 
is an ordered group,
and addition is commutative and associative on $\Rp$,
and multiplication distributes over addition on $\Rp$.
\end{theorem}

Now define $\sim$ on $\Rp\times\Rp$ as in~\eqref{eqn:sim}.
Just as before, this is an equivalence relation.
The set of its equivalence classes is denoted by
\begin{equation*}
  \R.
\end{equation*}
Just as before, we obtain the ordered field $(\R,0,-,+,{}\inv,\cdot,<)$.
But now, the ordering is complete.
We identify $\Rp$ with its image in $\R$.
The elements of $\R$ are the \textbf{real numbers.}

\begin{lemma}
  For every $n$ in $\N$, 
for every element $A$ of a completely and densely ordered group,
the equation
\begin{equation*}
  nX=A
\end{equation*}
is soluble in the group.
\end{lemma}

\begin{theorem}
\sloppy
Suppose $(G,0,-,+,<)$ is a completely and densely ordered group,
and $u$ is a positive element of $G$, 
and $b$ is an element of $\Rp$ such that $1<b$.
Then there is an isomorphism from $(G,0,-,+,<)$ to $(\Rp,1,{}\inv,\cdot,<)$
taking $u$ to $b$. 
\end{theorem}

\begin{sloppypar}
By the theorem, 
the completely ordered groups $(\R,0,-,+,<)$ and $(\Rp,1,{}\inv,\cdot,<)$ 
are isomorphic,
and indeed for every $b$ in $\Rp$ such that $b>1$,
there is an isomorphism taking $1$ to $b$.
This isomorphism is denoted by
\begin{equation*}
  x\mapsto b^x,
\end{equation*}
and its inverse is
\begin{equation*}
  x\mapsto\log_bx.
\end{equation*}
\end{sloppypar}

\begin{theorem}[Intermediate Value Theorem]
If $f$ is a continuous singulary operation on $\R$, and $f(a)\cdot f(b)<0$, 
then $f$ has a zero between $a$ and $b$.
\end{theorem}

Hence for example the function $x\mapsto x^2-2$ 
must have a zero in $\R$ between $1$ and $2$.
More generally,
if $A\included\R$, then the set of \emph{polynomial functions over $A$} 
is obtained from the set of constant functions taking values in $A$, 
along with $-$, $+$, $\cdot$, and the projections $(x_0,\dots,x_{n-1})\mapsto x_i$, 
by closing under taking composites.  
Then all polynomial functions over $\R$ are continuous, 
and so the Intermediate Value Theorem applies to the singulary polynomial functions.
Therefore the ordered field $\R$ is said to be \textbf{real-closed.}
However, there are smaller real-closed ordered fields:
we establish this in the next section.




\section{Countability}\label{sect:count}

A set is \textbf{countable} if it embeds in $\upomega$; 
otherwise the set is \textbf{uncountable.}

\begin{theorem}
The sets $\N$, $\Z$, and $\Q$ are all countable.
\end{theorem}

\begin{theorem}\label{thm:pow-un}
$\pow{\upomega}$ is uncountable.
\end{theorem}

\begin{proof}
Suppose $f$ is an injection from $\upomega$ to $\pow{\upomega}$.
Then the subset $\{x\colon x\notin f(x)\}$ of $\upomega$ is not in the range of $f$,
by a variant of the Russell Paradox:
if $\{x\colon x\notin f(x)\}=f(a)$, then $a\in f(a)\liff a\notin f(a)$.
\end{proof}

\begin{theorem}\label{thm:R-uncount}
The set $\R$ is uncountable.
\end{theorem}

\begin{proof}
For every subset $A$ of $\upomega$, 
let $g(A)$ be the set of rational numbers $x$ such that, 
for some $n$ in $\upomega$,
\begin{equation*}
x<\sum_{k\in A\cap n}\frac2{3^k}.
\end{equation*}
Then $g(A)$ is a real number by the original definition.
The function $A\mapsto g(A)$ from $\pow{\upomega}$ to $\R$ is injective.
\end{proof}

In the theorem, the image of the function $g$ is the \emph{Cantor Set};
see page \pageref{Cantor}.

If $A\included\R$, suppose we let $\rc A$ be the smallest field 
that contains all zeros from $\R$ of singulary polynomial functions over $A$.  
If we define $A_0=\Q$ and $A_{n+1}=\rc{{A_n}}$, 
then $\bigcup_{n\in\upomega}A_n$ will contain 
all zeros from $\R$ of singulary polynomial functions over itself.
Thus it will be real-closed.
In fact it will be $\rc{\Q}$.
But this field is countable.

We can say more about a set than whether it is countable or uncountable.
A class is \textbf{transitive} if it properly includes all of its elements.
A transitive \emph{set} is an \textbf{ordinal} 
if it is well-ordered by the relation of membership.
Then all of the elements of $\upomega$ are ordinals, and so is $\upomega$ itself.
The class of all ordinals can be denoted by
\begin{equation*}
\on.
\end{equation*}

\begin{theorem}
The class $\on$ is transitive and well-ordered by membership.
\end{theorem}

In particular, $\on$ cannot contain itself;
so it is not a set.
This result is the \textbf{Burali-Forti Paradox}\label{BF}~\cite{Burali-Forti}.

\begin{theorem}
Every well-ordered set $(A,<)$ is isomorphic to a unique ordinal.
The isomorphism is a certain function $f$ on $A$, 
and this function is determined by the rule
\begin{equation*}
f(b)=\{f(x)\colon x<b\}.
\end{equation*}
\end{theorem}
 
There are three classes of ordinals.
\begin{compactenum}
\item 
A \textbf{successor} is an ordinal $\alpha'$ for some ordinal $\alpha$.
\item
The least ordinal, $0$, is in a class by itself.
\item
A \textbf{limit} is an ordinal that is neither $0$ nor a successor.
\end{compactenum}
Then $\upomega$ is the least limit ordinal.

Two sets are \textbf{equipollent} if there is a bijection between them.
An ordinal is a \textbf{cardinal} 
if it is the least ordinal that is equipollent with it.

\begin{theorem}%\label{thm:fin-ord-card}
Every element of $\upomega$ is a cardinal.  So is $\upomega$ itself.
\end{theorem}

The class of cardinals can be denoted by
\begin{equation*}
\cn.
\end{equation*}
Every set is equipollent with at most one cardinal,
which is called the \textbf{cardinality}\label{cardinality} 
or \textbf{size} of that set.
The cardinality of an arbitrary set $A$ is denoted by
\begin{equation*}
\card A.
\end{equation*}
A countable set has cardinality $\upomega$ or less;
uncountable sets have cardinality greater than $\upomega$.
The \textbf{finite} sets are those whose cardinalities are less then $\upomega$;
other sets are \textbf{infinite.}

\chapter{Groups and Rings}

\section{Groups and rings}

\subsection{Groups}

Given a set $A$, we may refer to a bijection from $A$ to itself 
as a \textbf{symmetry}\index{symmetry} 
or \textbf{permutation}\index{permutation} of $A$.  
Let us denote the set of these symmetries by
\begin{equation*}
  \Sym A.
\end{equation*}
This set can be equipped with:
\begin{compactenum}[1)]
  \item
the element 
  $\id A$, which is the
  \textbf{identity}\index{identity} on $A$; 
\item
the singulary operation $f\mapsto f\inv$, 
which is \textbf{inversion;}\index{inversion}
\item
the binary operation $(f,g)\mapsto f\circ g$, 
which is \textbf{composition.}\index{composition}
\end{compactenum}
The $4$-tuple
\begin{equation*}
(\Sym A,\id A,{}\inv,\circ)
\end{equation*}
is the \textbf{complete group of symmetries}%
\index{complete group of symmetries} 
of $A$.  
We may speak of the set $\Sym A$ as the \emph{underlying set} of the group.
We may also use $\Sym A$ 
to denote the group
$(\Sym A,\id A,{}\inv,\circ)$.  
A \textbf{subgroup}\label{subgroup} of this group
is a subset of $\Sym A$ that contains $\gid$ 
and is closed under inversion and composition.
Such a subgroup can be called simply a 
\textbf{group of symmetries}\index{group of symmetries} of $A$.  
The following is easily verified.
  
\begin{theorem}\label{thm:sym}
For all sets $A$, 
for all elements $f$, $g$, and $h$ of a group of symmetries of $A$,
\begin{gather*}
\begin{aligned}
&  \begin{gathered}
f\circ\id A=f,\\
\id A\circ f=f,
  \end{gathered}
&&
  \begin{gathered}
    f\circ f\inv=\id A,\\
f\inv\circ f=\id A,
  \end{gathered}
\end{aligned}\\
    (f\circ g)\circ h=f\circ(g\circ h).	
\end{gather*}
\end{theorem}  

A group of symmetries is an example of an \textbf{algebra,}
that is, a set equipped with some operations.
More generally, a \textbf{structure} 
is a set equipped with operations \emph{and} relations.
A \textbf{group}\index{group} is an algebra
with the properties of a group of symmetries given by the last theorem
(Theorem~\ref{thm:sym}).  
That is, a group is an algebra $(G,\gid,{}\inv,\cdot)$ 
in which the following equations are \emph{identities} 
(are true for all values of the variables):
\begin{gather*}
  \begin{aligned}
    &\begin{gathered}
      x\cdot\gid=x,\\
\gid\cdot x=x,
    \end{gathered}
&&
    \begin{gathered}
x\cdot x\inv=\gid,\\
x\inv\cdot x=\gid,      
    \end{gathered}
  \end{aligned}\\
(x\cdot y)\cdot z=x\cdot(y\cdot z).
\end{gather*}
  \begin{comment}
    

\begin{gather*}
x\cdot\gid=x,\\
\gid\cdot x=x,\\
x\cdot x\inv=\gid,\\
x\inv\cdot x=\gid,\\
(x\cdot y)\cdot z=x\cdot(y\cdot z).
\end{gather*}


  \end{comment}
We may say also that these equations are the \emph{axioms} of groups: 
this means that their \emph{generalizations}
($\Forall xx\cdot\gid=x$ and so forth)
are true in every group, by definition.
According to these axioms, in every group $(G,\gid,{}\inv,\cdot)$,
\begin{compactenum}[1)]
\item
the binary operation $\cdot$ is \textbf{associative,}
\item
the element $\gid$ is an \textbf{identity} with respect to $\cdot$,
\item
the singulary operation ${}\inv$ is \textbf{inversion} 
with respect to $\cdot$ and~$\gid$.
\end{compactenum}
The identity and the inversion 
will turn out to be uniquely determined by the binary operation, 
by Theorem~\ref{thm:u} on page~\pageref{thm:u}.

A group is called \textbf{abelian}\label{abelian} 
if its binary operation is commutative.
If $A$ has at least three elements, then $\Sym A$ is not abelian.
However, every one-element set $\{a\}$ becomes an abelian group when we define
\begin{align*}
\gid&=a,&a\inv&=a,&a\cdot a&=a.
\end{align*}
This group is a \textbf{trivial group.}  
For example, both $\Sym0$ and $\Sym1$ are trivial groups.
All trivial groups are isomorphic to one another.
Therefore we tend to identify them with one another,
referring to each of them as \emph{the} trivial group,
which we shall denote by
\begin{equation*}
  \trivgp.
\end{equation*}
Besides this and the symmetry groups, 
we have the following seven examples of groups, namely\label{Z-as-group}
\begin{align*}
  &(\Z,0,-,+),&
  &(\Q,0,-,+),&
  &(\R,0,-,+)
\end{align*}
along with
\begin{align*}
&(\Qp,1,{}\inv,\cdot),&
&(\Q\setminus\{0\},1,{}\inv,\cdot),\\
&(\Rp,1,{}\inv,\cdot),&
&(\R\setminus\{0\},1,{}\inv,\cdot),\\
\end{align*}
In the first three examples, 
the symbols $-$ and $+$ mean something different in each case, 
although we understand $0$ to be the same in each case.
In the last four examples, 
the symbols ${}\inv$ and $\cdot$ mean something different in each case, 
although we understand $1$ to be the same in each case.
All seven examples are abelian.
The last four of them
are the origin of a terminological convention.
In an arbitrary group $(G,\gid,{}\inv,\cdot)$,
the operation $\cdot$ is usually called \textbf{multiplication.} 
We usually write $g\cdot h$ as $gh$.  
The element $g\inv$ is the \textbf{inverse} of $g$.  
The element $\gid$ is the \textbf{identity,} and 
it is sometimes denoted by $1$ rather than $\gid$. 

Evidently the first three examples 
use different notation.
These groups are said to be written \textbf{additively.}\label{additive}
Additive notation is often used for abelian groups,
but almost never for other groups.
It will be useful to have one more example of an abelian group.
Actually there will be one example for each positive integer.
If $a$ and $b$ are arbitrary integers for which the equation
\begin{equation*}
ax=b
\end{equation*}
has a solution in $\Z$, 
then we say that $a$ \textbf{divides} $b$,\label{divides}
or $a$ is a \textbf{divisor} or \textbf{factor} of $b$,
or $b$ is a \textbf{multiple} of $a$,
and we may write
\begin{equation*}
a\divides b.
\end{equation*}
Using the notation due to Gauss \cite[p.~1]{Gauss}, 
for a positive integer $n$ and arbitrary integers $a$ and $b$ we write
\begin{equation*}
a\equiv b\pmod n
\end{equation*}
if $n\divides a-b$.
In this case we say $a$ and $b$ are \textbf{congruent} 
with respect to the \textbf{modulus} $n$.
This manner of speaking is abbreviated 
by putting the Latin word \emph{modulus} into the ablative case:
$a$ and $b$ are congruent \textbf{\emph{modulo}} $n$.%%%%%
\footnote{The ablative case of Latin
corresponds roughly to the \emph{-den hali} of Turkish.
Gauss writes in Latin; however, instead of \emph{modulo} $n$, 
he says \emph{secundum modulum} $n$, 
\enquote{according to the modulus $n$} \cite[p.~2]{Gauss-Latin}.}
%%%%%
Still following Gauss, 
we may say too that $a$ is a \textbf{residue} of $b$ 
with respect to the modulus $n$.


\begin{theorem}\label{thm:mod-n}
Let $n\in\N$.  
\begin{compactenum}
\item
Congruence \emph{modulo} $n$ is an equivalence relation on $\Z$.
\item
If $a\equiv x$ and $b\equiv y\pmod n$, then
\begin{equation*}
-a\equiv-x\And a+b\equiv x+y\And ab\equiv xy\pmod n.
\end{equation*}
\end{compactenum}
\end{theorem}

The set of congruence-classes of integers \emph{modulo} $n$ can be denoted by
\begin{equation*}%\label{mod}
\Zmod n.
\end{equation*}
If $a$ is some integer, 
we can denote its congruence-class \emph{modulo} $n$ 
by something like $[a]$ or $\bar a$, or more precisely by
\begin{equation*}
a+n\Z.
\end{equation*}

\begin{theorem}\label{thm:res}
For every positive integer $n$,
the function
\begin{equation*}
x\mapsto x+n\Z
\end{equation*}
from $\{0,\dots,n-1\}$ to $\Zmod n$ is a bijection.
\end{theorem}

Again given a positive integer $n$,
we may treat an arbitary integer 
as a name for its own congruence-class \emph{modulo} $n$.
In particular, by the last theorem,
we may consider $\Zmod n$ as being the set $\{0,\dots,n-1\}$,
where these $n$ elements are understood to be distinct.
By Theorem~\ref{thm:mod-n}, 
we have a well-defined algebra $(\Zmod n,0,-,+,1,\cdot)$,
where $0$ and $1$ stand for 
their respective congruence-classes $n\Z$ and $1+n\Z$.
The following theorem is now an easy consequence of Theorem~\ref{thm:mod-n}.

\begin{theorem}\label{thm:Zmod-group}
For each $n$ in $\N$, the algebra $(\Zmod n,0,-,+)$ is an abelian group.
\end{theorem}

A \textbf{homomorphism} from a group $(G,\gid,{}\inv,\cdot)$ 
to a group $(H,\gid,{}\inv,\cdot)$ is a function $h$ from $G$ to $H$ 
that \enquote{preserves structure} in the sense that
\begin{align*}
  h(\gid)&=\gid,&
h(x\inv)&=h(x)\inv,&
h(x\cdot y)&=h(x)\cdot h(y).
\end{align*}
An \textbf{embedding} of groups is a homomorphism 
that is injective as a function.
The (multiplicative) groups of positive rational numbers, 
of nonzero rational numbers, of positive real numbers, 
and of nonzero real numbers, 
and the (additive) groups of integers, rational numbers, real numbers, 
and integers with respect to some modulus, 
are not obviously symmetry groups.
But they can be \emph{embedded} in symmetry groups.
Indeed, every element $g$ of a group $G$ (written multiplicatively) 
determines a singulary operation $\uplambda_g$ on $G$, given by
\begin{equation*}
  \uplambda_g(x)=gx.
\end{equation*}
(Here $\uplambda$ stands for \enquote{left} as in \enquote{multiplication from the left.})
Then we have the following.

\begin{theorem}[Cayley]\label{thm:Cay}%
\index{theorem!Cayley's Th---}
For every group $(G,\gid,{}\inv,\cdot)$,
the function
\begin{equation*}
x\mapsto\uplambda_x
\end{equation*}
embeds $(G,\gid,{}\inv,\cdot)$ 
in the group $(\Sym G,\id G,{}\inv,\circ)$ of symmetries. 
\end{theorem}

\begin{proof}
We first observe that
\begin{align*}
\uplambda_{\gid}&=\id G,&\uplambda_{g\cdot h}&=\uplambda_g\circ\uplambda_h,
\end{align*}
because
\begin{gather*}
	\uplambda_{\gid}(x)=\gid\cdot x=x=\id G(x),\\
	\uplambda_{g\cdot h}(x)=(g\cdot h)\cdot x
	=g\cdot(h\cdot x)=\uplambda_g(\uplambda_h(x))
=(\uplambda_g\circ\uplambda_h)(x).
\end{gather*}
Consequently each $\lambda_g$ has an inverse, and
\begin{equation*}
(\uplambda_g)\inv=\uplambda_{g\inv}.
\end{equation*}
\sloppy
This establishes that $x\mapsto\uplambda_x$ is a homomorphism
from $(G,\gid,{}\inv,\cdot)$ to $(\Sym G,\id G,{}\inv,\circ)$.
It is an embedding, since if $\uplambda_g=\uplambda_h$, then in particular
\begin{equation*}
g=g\gid=\uplambda_g(\gid)=\uplambda_h(\gid)=h\gid=h.\qedhere
\end{equation*}
\end{proof}

By Cayley's Theorem, every group can be considered as a symmetry group.

%\subsection{Monoids and semigroups}
\subsection{Simplifications}

A \textbf{reduct} of a structure is a structure with the same underlying set, 
but equipped with fewer operations and relations.
The original structure is then called an \textbf{expansion} of the reduct.
We shall establish that a group $(G,\gid,{}\inv,\cdot)$ 
is determined by the reduct $(G,\cdot)$ 
and that a homomorphism of such reducts is a homomorphism of the whole groups.

A \textbf{semigroup} is an algebra $(S,\cdot)$,
where $\cdot$ is an associative operation on $S$.
If $(G,\gid,{}\inv,\cdot)$ is a group, 
the the reduct $(G,\cdot)$ is a semigroup. 
%\subsection{Simplifications}
Often the semigroup $(G,\cdot)$ itself is called a group.  
But this usage must be justified.

\begin{theorem}\label{thm:u}
A semigroup can expand to a group in at most one way.
\end{theorem}

\begin{proof}
Let $(G,\gid,\inv,\cdot)$ be a group.
If $\gid'$ were a second identity, then
\begin{align*}
\gid'x&=\gid x,& \gid'xx\inv&=\gid xx\inv,& \gid'&=\gid.
\end{align*}
If $a'$ were a second inverse of $a$, then
\begin{align*}
a'a&=a\inv a,& a'aa\inv&=a\inv aa\inv,&a'&=a\inv.\qedhere
\end{align*}
\end{proof}

Establishing that a particular algebra is a group 
is made easier by the following.

\begin{theorem}\label{thm:left}
Any algebra satisfying the identities
\begin{gather*}
	{\gid}x=x,\\
	x\inv x=\gid,\\
	x(yz)=(xy)z
\end{gather*}
is a group.
In other words, 
  any semigroup with a left-identity and with left-inverses is a group.  
\end{theorem}

\begin{proof}
We need to show $x\gid=x$ and $xx\inv=\gid$.  To establish the latter,
using the given identies we have
\begin{equation*}
(xx\inv)(xx\inv)=x(x\inv x)x\inv=x{\gid}x\inv=xx\inv,
\end{equation*}
and so
\begin{multline*}
xx\inv
={\gid}xx\inv
=(xx\inv)\inv(xx\inv)(xx\inv)\\
=(xx\inv)\inv(xx\inv)={\gid}.
\end{multline*}
Hence also
\begin{equation*}
x{\gid}=x(x\inv x)=(xx\inv)x={\gid}x=x.\qedhere
\end{equation*}
\end{proof}

The theorem has an obvious \enquote{dual} involving right-identities and right-inverses.  By the theorem, the semigroups that expand to groups are precisely the semigroups that satisfy the axiom
\begin{gather*}
\Exists z(\Forall xzx=x\land\Forall x\Exists y yx=z),
\end{gather*}
which is logically equivalent to
\begin{equation}\label{eqn:sg-ax}
\Exists z\Forall x\Forall y\Exists u(zx=x\land uy=z).
\end{equation}
We shall show that this sentence is more complex than need be.

Thanks to Theorem~\ref{thm:u}, 
if a semigroup $(G,\cdot)$ does expand to a group, 
then we may unambiguously refer to $(G,\cdot)$ itself as a group.  
We may even refer to $G$ as a group, although, theoretically, 
it may lead to ambiguity.

\begin{theorem}\label{thm:solutions}
Let $G$ be a nonempty semigroup.  The following are equivalent.
\begin{compactenum}
\item\label{item:exp}
$G$ expands to a group.
%\item\label{item:exp-u}
%$G$ expands uniquely to a group.
\item\label{item:sol}
Each equation $ax=b$ and $ya=b$ with parameters from $G$ has a
solution in $G$.
\item\label{item:sol-u}
Each equation $ax=b$ and $ya=b$ with parameters from $G$ has a
unique solution in $G$.
\end{compactenum}
\end{theorem}

\begin{proof}
Immediately \eqref{item:sol-u}$\lto$\eqref{item:sol}.  
Almost as easily, \eqref{item:exp}$\lto$\eqref{item:sol-u}.  
For, if $a$ and $b$ belong to some semigroup that expands to a group, 
we have $ax=b\liff x=a\inv b$; 
and we know by Theorem~\ref{thm:u} that $a\inv$ is uniquely determined.  
Likewise for $ya=b$.

Finally we show \eqref{item:sol}$\lto$\eqref{item:exp}.
Suppose $G$ is a nonempty semigroup 
in which all equations $ax=b$ and $ya=b$ have solutions.  
If $c\in G$, let $\gid$ be a solution to $yc=c$.  
If $b\in G$, let $d$ be a solution to $cx=b$.  Then
\begin{equation*}
  {\gid}b={\gid}(cd)=({\gid}c)d=cd=b.
\end{equation*}
Since $b$ was chosen arbitrarily, $\gid$ is a left identity.  
Since the equation $yc={\gid}$ has a solution, $c$ has a left inverse.  
But $c$ is an arbitrary element of $G$.  
By Theorem~\ref{thm:left}, we are done.
\end{proof}

Now we know that the semigroups that expand to groups 
are just the semigroups that satisfy the axiom
\begin{equation*}
  \Forall x\Forall y(\Exists zxz=y\land\Exists wwx=y).
\end{equation*}
This may not look simpler than \eqref{eqn:sg-ax}, but it is.  
It should be understood as 
\begin{equation*}
\Forall x\Forall y\Exists z\Exists w(xz=y\land wx=y), 
\end{equation*}
which is a sentence of the general form $\forall\exists$;
whereas \eqref{eqn:sg-ax} is of the form $\exists\forall\exists$).  

\begin{theorem}\label{thm:gp-hom}
  A map $f$ from one group to another is a homomorphism, 
provided it is a homomorphism of semigroups, that is,
\begin{equation*}
 f(xy)=f(x)f(y). 
 \end{equation*}
\end{theorem}

\begin{proof}
In a group, if $a$ is an element, then the identity is the unique
solution of $xa=a$, and $a\inv$ is the unique solution of $yaa=a$.  A
semigroup homomorphism $f$ takes solutions of
these equations to solutions of $xb=b$ and $ybb=b$, where $b=f(a)$. 
\end{proof}

\emph{Inclusion} of a substructure in a larger structure is a homomorphism. 
Therefore we have, as a special case of Theorem~\ref{thm:gp-hom}, 
that if $(G,\gid,{}\inv,\cdot)$ and $(H,\gid,{}\inv,\cdot)$ 
are groups, then
\begin{equation*}
(G,\cdot)\included(H,\cdot)
\implies(G,\gid,{}\inv,\cdot)\included(H,\gid,{}\inv,\cdot).
\end{equation*}

\subsection{Direct products of groups}

As on page~\pageref{A^B}, if $\Omega$ and $A$ are sets,
then $A^{\Omega}$ is the set of functions from $\Omega$ to $A$.
If $A$ is the underlying set of a group,
then a multiplication can be defined on $A^{\Omega}$ 
so that this power is also a group.
The following will be used on page~\pageref{right-not-left}
in case $\Omega$ is $\upomega$.

\begin{theorem}\label{thm:power}
  If $\Omega$ is a set and $(G,\cdot)$ is a group,
then $(G^{\Omega},\cdot)$ is a group, where
for all $f$ and $g$ in $G^{\Omega}$ and all $x$ in $\Omega$,
\begin{equation*}
  (f\cdot g)(x)=f(x)\cdot g(x).
\end{equation*}
\end{theorem}

The foregoing theorem can be generalized as follows.
We can think of the power $A^{\Omega}$ as the product of copies of $A$,
each copy being indexed by an element of $\Omega$.
Then we can replace some of these copies with different sets.
To be precise, we define an \textbf{indexed set}\label{indexed-set} 
as a set
together with a function whose range is that set.
If that function is $f$ in $A^B$,
then the corresponding indexed set can be denoted by
\begin{equation*}
  (f(x)\colon x\in B)
\end{equation*}
(this notation was introduced on page \pageref{indexed-first}).
We may identify this indexed set with the function $f$ itself.  
Note however that the same set can be the range 
of many functions with many domains
(unless the set is empty; then it is the range of only one function).
That is, we may know $\{f(x)\colon x\in B\}$ 
without knowing what $f$ and $B$ are.
However, knowing $(f(x)\colon x\in B)$ 
means knowing $f$ and hence knowing $B$ and $\{f(x)\colon x\in B\}$. 

An indexed set $(a_n\colon n\in\upomega)$ is also called a \textbf{sequence} 
and can be written also as
\begin{equation*}
  (a_0,a_1,a_2,\dots).
\end{equation*}
The word \emph{family} is a synonym for \emph{set;} 
it is often used for sets 
whose elements are themselves sets 
whose elements will be of interest.%%%%%
\footnote{In the usual formulation of set theory, 
every element of every set is itself a set.
Since for example a \emph{group} 
is a set equipped with a certain operation of multiplication,
the elements of a group must themselves be sets;
but in ordinary mathematics these elements are not thought of as sets,
and so one does not refer to the underlying set of a group as a family.
One may however speak of a set of groups as a family.}





Suppose $\mathscr A$ is an indexed family $(A_i\colon i\in\Omega)$, 
where each $A_i$ is a group.
We can form the \textbf{direct product} of the family $\mathscr A$.  
This direct product is denoted by one of the expressions
\begin{align*}
&\prod_{i\in\Omega}A_i,&&\prod\mathscr A.
\end{align*}
If $a$ belongs to this direct product, this means
\begin{equation*}
a=(a_i\colon i\in\Omega),
\end{equation*}
where $a_i\in A_i$ in each case.  
Thus $a$ is simply a function on $\Omega$ that, 
at every element $i$ of this domain, 
takes a value in $A_i$; we write this value as $a_i$, 
though as in Chapter~\ref{ch:intro} 
it could be written also%%%%%
\footnote{Even the notation $i^a$ might be used.  
Indeed, $x^{\sigma}$ is used below 
(page \pageref{upper-sigma}) 
for the image under an automorphism $\sigma$ 
of an element $x$ of a given field.} 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
as $a(i)$ or $a^i$.
For each $j$ in $\Omega$,
there is a function $\uppi_j$ from $\prod\mathscr A$ to $A_j$ given by
\begin{equation*}
  \uppi_j(x)=x_j,
\end{equation*}
so that, for each $a$ in $\prod\mathscr A$,
\begin{equation}\label{eqn:uppi}
  a=\bigl(\uppi_i(a)\colon i\in\Omega\bigr).
\end{equation}
The function $\uppi_j$ 
is the \textbf{coordinate projection} onto $A_j$.\label{coord-proj}

\begin{theorem}\label{thm:group-prod}
  If $\mathscr G$ is an indexed family 
$\bigl((G_i,\cdot)\colon i\in\Omega\bigr)$ of groups,
then $(\prod\mathscr G,\cdot)$ is a group, where
\begin{equation*}
  (x_i\colon i\in\Omega)\cdot(y_i\colon i\in\Omega)
=(x_i\cdot y_i\colon i\in\Omega).
\end{equation*}
Each of the coordinate projections $\uppi_j$ 
on $\prod\mathscr G$ is a homomorphism of groups.
If $H$ is a group,
and $f_j$ is a homomorphism from $(H,\cdot)$ to $(G_j,\cdot)$ 
for each $j$ in $\Omega$,
then the map
\begin{equation*}
x\mapsto\bigl(f_i(x)\colon i\in\Omega\bigr)  
\end{equation*}
is the unique homomorphism $f$ from $H$ to $\prod\mathscr G$ such that, 
for each $j$ in $\Omega$,
\begin{equation*}
  \uppi_j\circ f=f_j.
\end{equation*}
\end{theorem}

In the indexed set $(a_i\colon i\in\Omega)$,
each element $a_i$ can be called a \textbf{term.}
Then the multiplication on $\prod_{i\in\Omega}G_i$ defined in the theorem
can be described as \textbf{termwise} multiplication.
The theorem is easily generalized 
to cover arbitrary algebras and even structures.
This will lead to the definition of \emph{ultraproducts.}
See for example Theorem~\ref{thm:ring-prod} 
on page \pageref{thm:ring-prod} below.

\subsection{Rings}

A homomorphism from a structure to itself is an
\textbf{endomorphism.}\index{endomorphism}
Recall from page~\pageref{abelian} 
that a group in which the multiplication is commutative 
is said to be an \textbf{abelian group,}
and (page~\pageref{additive}) its operation is usually written additively.
The set of endomorphisms of an abelian group can be made into an
abelian group in which: 
\begin{compactenum}[1)]
\item 
the identity is the constant function $x\mapsto\gid$;
\item
additive inversion converts $f$ to $x\mapsto-f(x)$;
\item
addition converts $(f,g)$ to $x\mapsto f(x)+g(x)$.
\end{compactenum}
If $E$ is an abelian group, let the abelian group of its endomorphisms
be denoted by
\begin{equation*}
  \End E.
\end{equation*}

A \textbf{monoid} is an algebra $(M,\gid,\cdot)$,
where $\cdot$ is an associative operation,
and $\gid$ is an identity with respect to this operation.
The set of endomorphisms of the abelian group $E$ 
is the underlying set of a monoid in which 
the identity is the identity function $\id E$, 
and multiplication is functional composition.
This multiplication distributes in both senses over addition: 
\begin{align*}
  f\circ(g+h)&=f\circ g+f\circ h,& (f+g)\circ h&=f\circ h+g\circ h.
\end{align*}
We may denote the two combined structures---abelian group and
mon\-oid together---by
\begin{equation*}
  (\End E,\id E,\circ);
\end{equation*}
this is the \textbf{complete ring of
  endomorphisms of}\index{complete ring of endomorphisms} $E$.  
A substructure of $(\End E,\id E,\circ)$ can be called
simply a \textbf{ring of endomorphisms}\index{ring of endomorphisms of} $E$.  

A \textbf{ring} is a structure $(R,0,-,+,1,\cdot)$ such that
\begin{compactenum}[1)]
\item
$(R,0,-,+)$ is an abelian group,
\item
$(R,1,\cdot)$ is a monoid,
\item
the multiplication distributes in both senses over addition.
\end{compactenum}
Then rings of endomorphisms are indeed rings.
It may be convenient to write a ring as $(R,1,\cdot)$,
where $R$ is implicitly an abelian group.
We might even say simply that $R$ is a ring.
Let us note the trivial example:

\begin{theorem}
\mbox{}
  \begin{compactenum}
  \item 
In every ring, $0\cdot x=0$.
\item
In a ring, $1=0$ if and only if there are no other elements.
  \end{compactenum}
\end{theorem}

A one-element ring is \textbf{trivial.}

Some authors might not require
a ring to have a multiplicative identity.%%%%%
\footnote{For Lang \cite[ch.~II, \S1, p.~83]{Lang-alg},
a ring is as we have defined it.
For Hungerford \cite[ch.~III, \S1, p.~115]{MR600654},
what we call a ring is a \emph{ring with identity.}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
We require it, so that the next theorem holds.
As with a group, so with a ring, 
an element $a$ determines a singulary operation $\uplambda_a$ on the structure, 
the operation being given by
\begin{equation*}
  \uplambda_a(x)=ax.
\end{equation*}
Then we have an analogue of Cayley's Theorem (page~\pageref{thm:Cay}):

\begin{theorem}\label{thm:x-lambda_x}
For every ring $(R,1,\cdot)$,
the function
\begin{equation*}
x\mapsto\uplambda_x
\end{equation*}
embeds $(R,1,\cdot)$ in $(\End R,\id R,\circ)$.
\end{theorem}

In a ring, if the multiplication commutes,
then the ring is a \textbf{commutative ring.}\index{commutative ring}
For example, the algebras\label{Z-as-ring}
\begin{align*}
&(\Z,0,-,+,1,\cdot),&
&(\Q,0,-,+,1,\cdot),&
&(\R,0,-,+,1,\cdot)
\end{align*}
are commutative rings.
The following is easy to check.

\begin{theorem}\label{thm:Zmod-ring}
$(\Zmod n,0,-,+,1,\cdot)$ is a commutative ring.
\end{theorem}

If $R$ is a sub-ring of a commutative ring $S$, and $a\in S$,
then we denote by\label{R[a]}
\begin{equation*}
  R[a]
\end{equation*}
the smallest sub-ring of $S$ that includes $R$ and contains $a$.
Then every nonzero element of $R[a]$ can be written in the form
\begin{equation*}
  b_0+b_1a+\dots+b_na^n
\end{equation*}
for some $b_i$ in $R$, for some $n$ in $\upomega$.
We may replace $a$ with $X$,
this being, not an element of a particular ring,
but an \textbf{indeterminate.}
Then we obtain the \textbf{polynomial ring}
\begin{equation*}
  R[X],
\end{equation*}
whose elements are \textbf{formal sums}
\begin{equation*}
  b_0+b_1X+\dots+b_nX^n.
\end{equation*}
We can continue this construction, getting rings\label{poly-ring}
\begin{equation*}
  R[X_0,\dots,X_{m-1}].
\end{equation*}

In an arbitrary ring, 
an element with both a left and a right multiplicative inverse can be
called simply \textbf{invertible;}\index{invertible} it is also called
a \textbf{unit.}\index{unit}  

\begin{theorem}\label{thm:units}
In a ring, the units compose a group with respect to
multiplication.  In particular, a unit has a unique
left inverse, which is also a right inverse.
\end{theorem}

The group of units of a ring $R$ is denoted by
\begin{equation*}
  \units R.
\end{equation*}
For example, $\units{\Z}=\{1,-1\}$.  
Evidently all two-element groups
are isomorphic to this one.

By the theorem, if an element of a ring
has both a left inverse and a right inverse,
then they are equal.
However, possibly an element can have a right inverse,
but not a left inverse.
We can construct an example 
by means of Theorem~\ref{thm:power}.\label{right-not-left}
Let $G$ be any nontrivial group.\label{exa:no-unit}
An arbitrary element $(x_n\colon n\in\upomega)$ of $G^{\upomega}$ 
can be written also as
\begin{equation*}
(x_0,x_1,\dots).
\end{equation*}
Then $\End{G^{\upomega}}$ contains elements $f$ and $g$ given by
\begin{gather*}
f(x_0,x_1,\dots)=(x_1,x_2,x_3,x_4,\dots),\\
g(x_0,x_1,\dots)=(x_0,x_0,x_1,x_2,\dots),
\end{gather*}
so that
\begin{gather*}
fg(x_0,x_1,\dots)=(x_0,x_1,x_2,\dots),\\
gf(x_0,x_1,\dots)=(x_1,x_1,x_2,\dots).
\end{gather*}
In particular, $g$ is a right inverse of $f$, but not a left inverse.

\subsection{Fields}

If $R$ is a commutative ring, and
$\units R=R\setminus\{0\}$, then $R$ is called a \textbf{field.}\index{field}
For example, $\Q$ and $\R$ are fields.  
The field $\C$ can be defined as $\R\times\R$ with the appropriate operations.
Additional examples are given by Theorem \ref{thm:fin-fld} below.

A positive integer $n$ is \textbf{prime}\index{prime}
if $n\neq1$ and the only divisors of $n$ in $\N$ are $1$ and $n$.
The \textbf{greatest common divisor} of two positive integers $a$ and $b$
is just that: 
the largest of the positive integers that divide both $a$ and $b$.
It can be denoted by
\begin{equation*}
  \gcd(a,b).
\end{equation*}
This can be found by the \textbf{Euclidean algorithm,}\label{Euc-alg}
used in Propositions VII.1 and 2 of Euclid's \emph{Elements} 
\cite{MR1932864,MR17:814b}.
The algorithm constructs a sequence $(a_0,a_1,\dots)$,
where $a_0$ is the greater of $a$ and $b$,
and $a_1$ is the lesser,
and for each $k$ in $\upomega$,
if $a_{k+1}\divides a_k$, then $a_{k+2}$ is undefined,
but if $a_{k+1}\ndivides a_k$,
then $a_{k+2}$ is the remainder on dividing $a_k$ by $a_{k+1}$,
that is, $a_{k+2}$ is the least positive integer $r$ such that
\begin{equation*}
  a_{k+1}\divides a_k-r;
\end{equation*}
equivalently, 
$a_{k+2}$ is the least positive integer 
in the set
\begin{equation*}
  \{a_k-a_{k+1}x\colon x\in\N\}.
\end{equation*}
Then $a_0>a_1>a_2>\cdots$, so the sequence must terminate,
since $\N$ is well-ordered (Theorem~\ref{thm:wo}, page \pageref{thm:wo}).

\begin{theorem}\label{thm:Euc-alg}
For all positive integers $a$ and $b$,
the last entry of the sequence constructed by the Euclidean algorithm 
is $\gcd(a,b)$.
This is the least positive element of the set
\begin{equation*}
  \{ax+by\colon(x,y)\in\Z\times\Z\}.
\end{equation*}
\end{theorem}

\begin{theorem}\label{thm:fin-fld}
  The ring $\Zmod n$ is a field if and only if $n$ is prime.
\end{theorem}

A special case of the theorem 
is that the trivial ring $\Zmod1$ is not a field.
If $p$ is prime, then, considered as a field, $\Zmod p$ will be denoted by
\begin{equation*}
  \F_p.
\end{equation*}

\section{Quotients}

\subsection{Congruence relations}\label{subsect:cong-rel}

The groups $(\Zmod n,0,-,+)$
and the rings $(\Zmod n,0,-,+,1,\cdot)$ 
are instances of a general construction.
Suppose $\sim$ is an equivalence relation\label{eq-rel-2}
on a set $A$, so that it partitions $A$ into equivalence classes
\begin{equation*}
  \{x\in A\colon x\sim a\};
\end{equation*}
each such class can be denoted by an expression like one of the following:%
\label{eqc-a}\label{a-simcl}
\begin{align*}
&a\simcl,&
&\eqc a,&
\overline a.
\end{align*}
Each element of an equivalence class 
is a \textbf{representative} of that class.  
The \textbf{quotient}\index{quotient} of $A$ by $\sim$
is the set of equivalence classes of $A$ with respect to $\sim$; 
this set can be denoted by 
\begin{equation*}
A\modsim.
\end{equation*}
Suppose for some $n$ in $\upomega$ and some set $B$, 
we have a function $f$ from $A^n$ to $B$.
Then there may or may not be a function $\tilde f$ 
from $(A\modsim)^n$ to $B$ such that the equation
\begin{equation}\label{eqn:wd}
\tilde f([x_0],\dots,[x_{n-1}])=f(x_0,\dots,x_{n-1})
\end{equation}
is an identity.
If there is such a function $\tilde f$, then it is unique.
In this case, the function $\tilde f$ is said to be 
\textbf{well-defined}\label{well-defined} by the given identity \eqref{eqn:wd}.
Note however that there are no \enquote{ill-defined} functions.
An ill-defined function would be a nonexistent function.
The point is that choosing a function $f$ 
and writing down the equation \eqref{eqn:wd} 
does not automatically give us a function $\tilde f$.
To know that there is such a function, we must check that
\begin{multline*}
a_0\sim x_0\land\dots\land a_{n-1}\sim x_{n-1}\\
\lto f(a_0,\dots,a_{n-1})=f(x_0,\dots,x_{n-1}).
\end{multline*}
When this does hold (for all $a_i$), 
so that $\tilde f$ exists as in \eqref{eqn:wd}, then
\begin{equation}\label{eqn:tilde}
\tilde f\circ\mathrm p=f,
\end{equation}
where $\mathrm p$ is the function $(x_0,\dots x_{n-1})\mapsto([x_0],\dots,[x_{n-1}])$ from $A^n$ to $(A\modsim)^n$.  
Another way to express the equation~\eqref{eqn:tilde} 
is to say that the diagram in Figure~\ref{fig:well-def} 
\textbf{commutes.}\label{commutes}
\begin{figure}
  \begin{equation*}
\xymatrix@!{
A^n\ar^f[r]\ar_{\mathrm p}[d]&B\\
(A\modsim)^n\ar_{\tilde f}[ur]&
}
\end{equation*}

  \caption{A well-defined function}\label{fig:well-def}
  
\end{figure}

\begin{sloppypar}
Suppose now $\str A$ is an algebra with universe $A$.
If for all $n$ in $\upomega$, 
for every distinguished $n$-ary operation $f$ of $\str A$,
there is an $n$-ary operation $\tilde f$ on $(A\modsim)^n$
as given by \eqref{eqn:wd}, then $\sim$ 
is a \textbf{congruence-relation} 
or \textbf{congruence}\label{congruence} on $\str A$.
In this case, the $\tilde f$ 
are the distinguished operations of an algebra with universe $A\modsim$.  
This new algebra is the \textbf{quotient} of $\str A$ by $\sim$ 
and can be denoted by
\begin{equation*}
\str A\modsim.
\end{equation*}
For example, by Theorem~\ref{thm:mod-n} on page~\pageref{thm:mod-n}, 
for each $n$ in $\N$, 
congruence \emph{modulo} $n$ is a congruence on $(\Z,0,-,+,1,\cdot)$.
Then the structure $(\Zmod n,0,-,+)$ 
can be understood as the quotient $(\Z,0,-,+)\modsim$,
and $(\Zmod n,0,-,+,1,\cdot)$ as $(\Z,0,-,+,1,\cdot)\modsim$.
The former quotient is an abelian group by Theorem~\ref{thm:Zmod-group},
and the latter quotient is a commutative ring by Theorem~\ref{thm:Zmod-ring} 
on page~\pageref{thm:Zmod-ring}.
These theorems are special cases of the next two theorems.
In fact the first of these makes verification 
of Theorem~\ref{thm:Zmod-group} easier.
\end{sloppypar}

\begin{theorem}\label{thm:cong}
Suppose $\sim$ is a congruence-relation on a semigroup $(G,\cdot)$.
\begin{compactenum}
\item
$(G,\cdot)\modsim$ is a semigroup.
\item
If $(G,\cdot)$ expands to a group, 
then $\sim$ is a congruence-relation on this group,
and the quotient of the group by $\sim$ is a group.  
If the original group is abelian,
then so is the quotient.
\end{compactenum}
\end{theorem}

\begin{theorem}\label{thm:ring-q}
\sloppy
Suppose $(R,0,-,+,1,\cdot)$ is a ring,
and $\sim$ is a con\-gruence-relation on the reduct $(R,+,\cdot)$.
Then $\sim$ is a congruence-relation on $(R,0,-,+,1,\cdot)$,
and the quotient $(R,0,-,+,1,\cdot)\modsim$ is also a ring.
If the original ring is commutative, so is the quotient.
\end{theorem}

\subsection{Normal subgroups of groups}

We defined subgroups of symmetry groups on page~\pageref{subgroup},
and of course subgroups of arbitrary groups are defined the same way.
A \textbf{subgroup}\index{subgroup} of a group 
is a subset containing the identity 
that is closed under multiplication and inversion.  

The subset $\N$ of $\Qp$ contains the identity 
and is closed under multiplication, 
but is not closed under inversion, and so it is not a subgroup of $\Qp$.
The subset $\upomega$ of $\Z$ contains the additive identity 
and is closed under addition,
but is not closed under additive inversion, 
and so it is not a subgroup of $\Z$.

\begin{theorem}\label{thm:subgp}
  A subset of a group is a subgroup if and only if it is non-empty and
  closed under the binary operation $(x,y)\mapsto xy\inv$.
\end{theorem}

If $ H$ is a subgroup of $G$, we write
\begin{equation*}
H\subgp G.  
\end{equation*}
One could write $H\leq G$ instead, 
if one wanted to reserve the expression $H<G$ for the case 
where $H$ is a \emph{proper} subgroup of $G$.  
We shall not do this.%%%%%
\footnote{I do think it is useful to reserve the notation $A\pincluded B$ 
for the case where $A$ is a proper subset of $B$, 
writing $A\included B$ when $A$ is allowed to be equal to $B$.}

\begin{theorem}\label{thm:subgroups}
  An arbitrary intersection of subgroups is a subgroup.
\end{theorem}

Suppose $H\subgp G$.  If $a\in G$, let
\begin{gather*}
  aH=\{ax\colon x\in H\},\\
Ha=\{xa\colon x\in H\}.
\end{gather*}
Each of the sets $aH$ is a \textbf{left coset}\index{left!---
  coset}\index{coset} 
of $H$, and the set $\{xH\colon x\in G\}$ of left cosets
is denoted by 
\begin{equation*}
  G/H.
\end{equation*}
Each of the sets $Ha$ is a \textbf{right coset}\index{right!--- coset} of $H$, 
and the set $\{Hx\colon x\in G\}$ of right cosets
is denoted by 
\begin{equation*}
  H\backslash G.
\end{equation*}
Note that $H$ itself is both a left and a right coset of itself.

\begin{theorem}\label{thm:cosets}
Suppose $H\subgp G$.
The left cosets of $H$ in $G$ compose a partition of $G$.  
Likewise for the right cosets.  
For each $a$ in $G$, the map $x\mapsto ax$ is a bijection from $H$ to $aH$,
and $x\mapsto xa$ is a bijection from $H$ to $Ha$.
Thus all cosets are in bijection with one another.
The map $xH\mapsto Hx\inv$ 
is a well-defined bijection from $G/H$ to $H\backslash G$. 
\end{theorem}

\begin{proof}
We have $a\in aH$.  Suppose $aH\cap bH\neq\emptyset$.  
Then $ah=bh_1$ for some $h$ and $h_1$ in $H$, so that $a=bh_1h\inv$, 
which is in $bH$.  
Thus $a\in bH$, and hence $aH\included bH$.  
By symmetry of the argument, we have also $bH\included aH$, 
and therefore $aH=bH$.  Hence the left cosets compose a partition of $G$.  
By symmetry again, the same is true for the right cosets.
  \end{proof}

\begin{corollary}\label{cor:cosets-1}
If $H\subgp G$, then the relation $\sim$ on $G$ defined by
\begin{equation*}
a\sim x\liff aH=xH
\end{equation*}
is an equivalence relation, and
\begin{equation*}
G/H=G\modsim.
\end{equation*}
\end{corollary}

\begin{corollary}\label{cor:cosets-2}
If $H\subgp G$ and $aH=Hb$, then $aH=Ha$.
\end{corollary}

\begin{proof}
Under the assumption, $a\in Hb$, so $Ha\included Hb$, and therefore $Ha=Hb$.
\end{proof}

\begin{theorem}\label{thm:n}
  Suppose $H\subgp G$.  The following are equivalent:
  \begin{compactenum}
    \item
$G/H$ is a group whose multiplication is given by
\begin{equation*}
(xH)(yH)=xyH.
\end{equation*}
\item
Every left coset of $H$ is a right coset.
\item
$aH=Ha$ for all $a$ in $G$.
\item
$a\inv Ha=H$ for all $a$ in $G$.
  \end{compactenum}
\end{theorem}

\begin{proof}
Immediately the last two conditions are equivalent, 
and they imply the second.  
The second implies the third, 
by Corollary~\ref{cor:cosets-2} (p.~\pageref{cor:cosets-2}).

Suppose now the first condition holds.  For all $h$ in $H$, since $hH=H$, we have
\begin{equation*}
aH=\gid aH=\gid HaH=hHaH=haH,
\end{equation*}
hence $a\inv haH=H$, so $a\inv ha\in H$.  Thus $a\inv Ha\included H$, so $a\inv Ha=H$.

Conversely, if the third condition holds, then $(xH)(yH)=xHHy=xHy=xyH$.
In this case, the equivalence relation $\sim$ on $G$ given as in Corollary~\ref{cor:cosets-1} (p.~\pageref{cor:cosets-1}) by
\begin{equation*}
a\sim x\liff aH=xH
\end{equation*}
is a congruence-relation, and so, by Theorem~\ref{thm:cong} (p.~\pageref{thm:cong}), $G/H$ is a group with respect to the proposed multiplication.
\end{proof}

A subgroup $H$ of $G$ meeting any of these equivalent conditions is
called \textbf{normal,}\index{normal!--- subgroup} and in this case we write
\begin{equation*}
  H\nsubgp G.
\end{equation*}
As trivial examples, we have
\begin{align*}
G&\nsubgp G,&\trivgp&\nsubgp G.
\end{align*}
Only slightly less trivially, 
all subgroups of abelian groups are normal subgroups.  
If $f$ is a homomorphism from a group $G$ to a group $H$, 
then we define\label{ker-im}
\begin{gather*}
  \Ker f=\{x\in G\colon f(x)=\gid\},\\
\im f=\{f(x)\colon x\in G\};
\end{gather*}
these are, respectively, 
the \textbf{kernel} and \textbf{image} of the homomorphism $f$.
A homomorphism whose inverse 
is a well-defined homomorphism is an \textbf{isomorphism.}

\begin{theorem}\label{thm:1-isom}
If $f$ is a homomorphism from a group $G$ to a group $H$, 
then
\begin{equation*}
  \Ker f\nsubgp G
\end{equation*}
and there is a well-defined isomorphism
\begin{equation*}
 x\Ker f\mapsto f(x) 
\end{equation*}
from $G/\Ker f$ to $\im f$. 
\end{theorem}


\subsection{Ideals of rings}

\begin{theorem}
Suppose $(R,1,\cdot)$ is a ring and $A\subgp R$.
The group $R/A$ expands to a ring with multiplication given by
\begin{equation*}
  (x+A)(y+A)=xy+A
\end{equation*}
if and only if
\begin{equation}\label{eqn:raar}
r\in R\And a\in A\implies ra\in A\And ar\in A.
\end{equation}
\end{theorem}

\begin{proof}
If $R/A$ does expand to a ring, and $a\in A$, 
then $a+A$ is $0$ in this ring, 
and hence so are $ra+A$ and $ar+A$,
so that \eqref{eqn:raar} holds.
Conversely, suppose this holds.
If $a+A=x+A$ and $b+A=y+A$, then $A$ contains $a-x$ and $b-y$,
so $A$ contains also
\begin{equation*}
(a-x)\cdot y+a\cdot(b-y),
\end{equation*}
which is $ab-xy$, so $ab+A=xy+A$.
\end{proof}
Under the equivalent conditions of the theorem,
$A$ is called an \textbf{ideal} of $R$.
We can express \eqref{eqn:raar} as
\begin{align*}
RA&\included A,&AR&\included A.
\end{align*}

A homomorphism of rings has the obvious definition.
If $(R,1,\cdot)$ and $(S,1,\cdot)$ are rings,
then a homomorphism from the former to the latter
is a homomorphism $f$ from $R$ to $S$ (these considered as groups)
such that
\begin{align*}
  f(1)&=1,&f(x)\cdot f(y)&=f(x\cdot y).
\end{align*}
We define the kernel and image of a homomorphism of rings 
as we do for a homomorphism of groups.
Then we have the following analogue of Theorem~\ref{thm:1-isom}.

\begin{theorem}
If $f$ is a homomorphism from a ring $R$ to a ring $S$, 
then $\Ker f$ is an ideal of $R$,
and there is a well-defined isomorphism
\begin{equation*}
  x+\Ker f\mapsto f(x)
\end{equation*}
from $R/\Ker f$ to $\im f$. 
\end{theorem}

If $R$ is a ring, and $A$ is a subset of $R$,
then there is at least one ideal of $R$ that includes $A$,
namely the \textbf{improper ideal} $R$ itself.
There is a \emph{smallest} ideal that includes $A$, by the following.

\begin{theorem}\label{thm:ideal-int}
The intersection of a family of ideals of a ring is an ideal.
\end{theorem}

Thus, by the terminology to be developed on page \pageref{Moore},
the family of ideals of a ring is a \emph{Moore family.}
Given a subset $A$ of a ring $R$, we define
\begin{equation*}
  (A)=\bigcap\{I\colon\text{$I$ is an ideal of $R$ and $A\included I$}\}.
\end{equation*}
This is an ideal that includes $A$ 
and is included in every ideal that includes $A$.
It is the ideal \textbf{generated by} $A$.

\section{Direct products and sums of commutative rings}

Analogously to Theorem~\ref{thm:group-prod} on page \pageref{thm:group-prod},
we have the following.

\begin{theorem}\label{thm:ring-prod}
  If $\mathscr R$ is an indexed family 
$(R_i\colon i\in\Omega)$ of rings,
then $\prod\mathscr R$ is a ring under the termwise operations.
Each of the coordinate projections $\uppi_j$ 
on $\prod\mathscr R$ is a homomorphism of rings.
If $S$ is a ring,
and $f_j$ is a homomorphism from $S$ to $R_j$ 
for each $j$ in $\Omega$,
then the map
\begin{equation*}
x\mapsto\bigl(f_i(x)\colon i\in\Omega\bigr)  
\end{equation*}
is the unique homomorphism $f$ from $S$ to $\prod\mathscr R$ such that, 
for each $j$ in $\Omega$,
\begin{equation*}
  \uppi_j\circ f=f_j.
\end{equation*}
\end{theorem}


Suppose $\mathscr R$ is an indexed family $(R_i\colon i\in\Omega)$ 
of commutative rings.
If $a\in\prod\mathscr R$, we define
\begin{equation*}
  \supp a=\{i\in\Omega\colon a_i\neq0\};
\end{equation*}
this is the \textbf{support} of $a$.
Then $a$ has \textbf{finite support} 
if (obviously) its support is a finite set, that is,
\begin{equation*}
  \card{\supp a}<\upomega.
\end{equation*}

\begin{theorem}
The elements having finite support 
in the direct product of an indexed family of commutative rings
compose an ideal of the product ring.
That is, if $\mathscr R$ is an indexed family of commutative rings,
then the subset
\begin{equation*}
  \left\{x\in\prod\mathscr R\colon\card{\supp x}<\upomega\right\}
\end{equation*}
of $\prod\mathscr R$ is an ideal.
\end{theorem}

The ideal of $\prod\mathscr R$ given by the theorem 
is the \textbf{direct sum} of $\mathscr R$ and can be denoted by one of
\begin{align*}
  &\bigoplus\mathscr R,&&\bigoplus_{i\in\Omega}R_i,
\end{align*}
the latter assuming $\mathscr R$ is $(R_i\colon i\in\Omega)$.
In this case, for each $j$ in $\Omega$,
there is a function $\upiota_j$ from $R_j$ to $\bigoplus\mathscr R$
that can be given by
\begin{equation*}
  \uppi_i\bigl(\upiota_j(x)\bigr)=
  \begin{cases}
    x,&\text{ if }i=j,\\
0,&\text{ if }i\neq j.
  \end{cases}
\end{equation*}
This $\upiota_j$ is the \textbf{coordinate injection} 
of $R_j$ in $\bigoplus\mathscr R$.

\begin{theorem}
If $\mathscr R$ 
is an indexed family $(R_i\colon i\in\Omega)$ of commutative rings,
then each of the coordinate injections $\upiota_j$ 
of $R_j$ in $\bigoplus\mathscr R$ 
is an embeddings of rings,
and for each $a$ in $\bigoplus\mathscr R$,
\begin{equation}\label{eqn:sum-ip}
  a=\sum_{i\in\supp a}\upiota_i\bigl(\uppi_i(a)\bigr).
\end{equation}
\end{theorem}

Since $\upiota_i(\uppi_i(a))=0$ when $i\notin\supp a$,
it makes sense to write \eqref{eqn:sum-ip} in the form
\begin{equation}\label{eqn:upiota}
  a=\sum_{i\in\Omega}\upiota_i(\uppi_i(a)).
\end{equation}
This should be compared with \eqref{eqn:uppi} on page~\pageref{eqn:uppi},
namely
\begin{equation*}
 a=\bigl(\uppi_i(a)\colon i\in\Omega\bigr). 
\end{equation*}
The latter holds for all $a$ in $\prod\mathscr R$;
but in \eqref{eqn:upiota}, the sum is defined
only when $a\in\bigoplus\mathscr R$, that is, 
only finitely many of the summands are nonzero.

If $a$ is an element of an arbitrary commutative ring,
then the ideal generated by $\{a\}$ is denoted by
\begin{equation*}
  (a)
\end{equation*}
as well as $(\{a\})$.
Such an ideal is called a \textbf{principal ideal.}

\begin{theorem}
  If $R$ is a commutative ring and $a\in R$, then
  \begin{equation*}
    (a)=\{ax\colon x\in R\}.
  \end{equation*}
\end{theorem}

The principal ideal in the last theorem can be denoted also by one of
\begin{align*}
  &aR,&&Ra.
\end{align*}
Thus the ring $\Zmod n$ 
(Theorem~\ref{thm:Zmod-ring}, page~\pageref{thm:Zmod-ring})
can be written as one of
\begin{align*}
  &\Z/(n),&&\Z/n\Z.
\end{align*}
There is a homomorphism $k\mapsto\underbrace{1+\dots+1}_k$ from $\Z$ to $R$,
whose kernel is $(n)$ for some $n$ in $\upomega$;
in this case $n$ is called the \textbf{characteristic}\label{char} of $R$.

An ideal is in particular a ring.
Thus, if $A$ is a subset of the commutative ring $R$, 
we can form an indexed family $(Ra\colon a\in A)$ of commutative rings.
Such a family has a direct sum.

\begin{theorem}\label{thm:(A)}
If $R$ is a commutative ring and $A$ is a subset,
then
\begin{align*}
  (A)
&=\left\{\sum_{a\in A}x_a\colon x\in\bigoplus_{a\in A}Ra\right\}\\
&=\left\{\sum_{a\in A}x_aa\colon x\in\bigoplus_{a\in A}R\right\}.
\end{align*}
\end{theorem}

That is, the ideal $(A)$ consists of the
\textbf{$R$-linear combinations}\index{linear combination} of elements of $A$.
The ideal can be denoted by one of
\begin{align*}
  &\sum_{a\in A}Ra,&&\sum_{a\in A}(a).
\end{align*}
If $A=\{a_i\colon i<n\}$, then $(A)$ can be written as one of
\begin{align*}
  &(a_0,\dots,a_{n-1}),&&Ra_0+\dots+Ra_{n-1},&&(a_0)+\dots+(a_{n-1}).
\end{align*}
Such an ideal is said to be \textbf{finitely generated.}\label{fin-gen-id}

\section{Ultraproducts of fields}

The improper ideal of a commutative ring $R$ is the principal ideal
\begin{equation*}
  (1).
\end{equation*}
The subset $\{0\}$ of $R$ is the \textbf{zero ideal}
and can be considered%%%%%
\footnote{Since every ideal contains $0$,
the zero ideal is also the ideal $(\emptyset)$ generated by the empty set.
However, when we write this ideal as $(0)$,
we mean by $0$ the zero element of the ring,
rather than the first von Neumann natural number (page~\pageref{nat}), 
which is the empty set.
There is no need to include $0$ in the generating set of any ideal.
Nonetheless, there is no harm in including it,
and we do want to consider the zero ideal as being a principal ideal.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
as the principal ideal
\begin{equation*}
  (0).
\end{equation*}


A \textbf{proper ideal} of a ring is an ideal that is not improper, 
that is, is not the whole ring.
An ideal of a commutative ring is called a \textbf{maximal ideal}
if it is a proper ideal, but is not properly included in a proper ideal.
(Thus the improper ideal is not a maximal ideal.)
An ideal $I$ of the commutative ring $R$ is maximal just in case,
for every ideal $J$ of $R$,
\begin{equation*}
  I\pincluded J\iff J=R.
\end{equation*}

\begin{theorem}\label{thm:max-field}
Let $R$ be a commutative ring.
\begin{compactenum}
\item 
The ideal $(0)$ of $R$ is maximal if and only if $R$ is a field.
\item
An ideal $I$ of $R$ is maximal 
if and only if the quotient $R/I$ is a field.
\end{compactenum}
\end{theorem}

\begin{proof}
If $R$ is a field and $(0)\pincluded I$,
then $I\setminus(0)$ contains some $a$, 
and then $a\inv\cdot a\in I$, so $I=R$.
Conversely, if $(0)$ is maximal, then for all $a$ in $R\setminus(0)$
we have $(a)=(1)$, so $a$ is invertible.

Every ideal of $R/I$ is $J/I$ for some subgroup $J$ of $R$.
Moreover, this $J$ must be an ideal of $R$.
In this case, $J$ is maximal 
if and only if $J/I$ is a maximal ideal of $R/I$.\
\end{proof}

Suppose $\mathscr K$ is an indexed family $(K_i\colon i\in\Omega)$ of fields.
For example, each $K_i$ might be $\R$, 
or each $K_i$ might be a different finite field.  
Suppose $M$ is a maximal ideal of the ring $\prod\mathscr K$.  
By the last theorem, the quotient $\prod\mathscr K/M$ is a field.
Such a field is called an \textbf{ultraproduct} 
of the indexed family $\mathscr K$.  
The ultraproduct is called \textbf{principal} or \textbf{nonprincipal,}
according as $M$ itself is principal or nonprincipal.

If $I$ is an arbitrary ideal of $\prod\mathscr K$,
we define
\begin{equation*}
  \Supp I=\{\supp x\colon x\in I\}.
\end{equation*}

\begin{theorem}\label{thm:I}
Let $\mathscr K$ be an indexed family of fields.
If $I$ is an ideal of $\prod\mathscr K$, then
\begin{equation*}
  I=\left\{x\in\prod\mathscr K\colon\supp x\in\Supp I\right\}.
\end{equation*}  
\end{theorem}

\begin{proof}
Obviously 
$I\included\left\{x\in\prod\mathscr K\colon\supp x\in\Supp I\right\}$.
%Assume as usual that the index set of $\mathscr K$ is $\Omega$.  
For the reverse inclusion,
if $a\in I$ and $\supp b=\supp a$ or even $\supp b\included\supp a$, 
then $b\in I$,
since $b=ca$, where
\begin{equation*}
  c_i=
  \begin{cases}
    b_i/a_i,&\text{ if }i\in\supp b,\\
0,&\text{ if }i\notin\supp b;
  \end{cases}
\end{equation*}
this shows 
$\left\{x\in\prod\mathscr K\colon\supp x\in\Supp I\right\}\included I$.
\end{proof}

If $A\included\Omega$, we define the element $\chi_A$ of $\F_2{}^{\Omega}$ by
\begin{equation*}
\chi_A(i)
%=\uppi_i(\chi_A)
=\begin{cases}
    1,&\text{ if }i\in A,\\
0,&\text{ if }i\in\Omega\setminus A.
  \end{cases}
\end{equation*}

If $A$ and $B$ are both subsets of $\Omega$, we define
\begin{equation*}
  A\symdiff B=(A\setminus B)\cup(B\setminus A);
\end{equation*}
this is the \textbf{symmetric difference} of $A$ and $B$.
See Figure~\ref{fig:sym-diff}.
\begin{figure}[ht]
\psset{unit=5mm,linewidth=1pt,fillstyle=solid}
\mbox{}\hfill
\begin{pspicture}(-3,-2.866)(3,2)
  \pscustom[fillcolor=gray]{%
\psarc(1,0)2{-120}{120}
\psarc(-1,0)2{60}{300}}
\pscustom[fillcolor=white]{%
\psarc(-1,0)2{-60}{60}
\psarc(1,0)2{120}{240}}
\end{pspicture}
\hfill
\begin{pspicture}(-3,-2)(3,3.73)
\pscustom[fillcolor=gray]{%
\psarc(-1,0){2}{120}{300}
\psarc(1,0)2{-120}{60}
\psarc(0,1.73)20{180}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\pscustom[fillcolor=white]{%
\psarc(0,1.73)2{180}{240}
\psarc(1,0)2{180}{240}
\psarc(-1,0)2{-60}0
\psarc(0,1.73)2{-60}0
\psarc(1,0)2{60}{120}
\psarc(-1,0)2{60}{120}}
%%%%%%%%%%%%%%%%%%%%%%%%%%
\pscustom[fillcolor=gray]{%
\psarc(1,0)2{120}{180}
\psarc(0,1.73)2{-120}{-60}
\psarc(-1,0)20{60}}
\end{pspicture}
\hfill\mbox{}
\caption{Symmetric differences of two sets and three sets}\label{fig:sym-diff}
\end{figure}

\begin{theorem}\label{thm:pow}
  For every set $\Omega$, the map
\begin{equation*}
A\mapsto\chi_A  
\end{equation*}
from $\pow{\Omega}$ to $\F_2{}^{\Omega}$ is a bijection,
whose inverse is
\begin{equation*}
 x\mapsto\supp x. 
\end{equation*}
Moreover,
\begin{align*}
  \chi_{A\symdiff B}&=\chi_A+\chi_B,&
\chi_{A\cap B}&=\chi_A\cdot\chi_B.
\end{align*}
In $\pow{\Omega}$,
\begin{equation*}
  A\cup B=A\symdiff B\symdiff(A\cap B).
\end{equation*}
\end{theorem}

\begin{corollary}\label{cor:pow}
$\pow{\Omega}$ is a ring
in which sums are symmetric differences
and products are intersections.
Moreover, an ideal of $\pow{\Omega}$
is just a subset $I$ such that
\begin{gather*}
\emptyset\in I,\\
  X\in I\And Y\in I\implies X\cup Y\in I,\\
Y\in I\And X\included Y\implies Y\in I.
\end{gather*}
\end{corollary}
See Figure \ref{fig:pow-ideal}.
\begin{figure}[ht]
\centering
\psset{unit=15mm}
\begin{pspicture}(-1,-2)(1,2)
%\psgrid
\psarc(-1,0)2{-60}{60}
\psarc(1,0)2{120}{240}
\pscustom[fillstyle=solid,fillcolor=gray]{%
\psline(0.6,-0.2)(0,0.4)(-0.6,-0.2)
\psset{linewidth=0pt}  % doesn't have the effect I want
\psarc(1,0)2{210}{240}
\psarc(-1,0)2{-60}{-30}
}
\psdots(0,-1.73)(0,1.73)(0.6,-0.2)(0,0.4)(-0.6,-0.2)
\uput[ur](0.6,-0.2){$Y$}
\uput[u](0,0.4){$X\cup Y$}
\uput[ul](-0.6,-0.2){$X$}
\uput[d](0,-1.73){$\emptyset$}
\uput[u](0,1.73){$\Omega$}
\end{pspicture}
\caption{An ideal of $\pow{\Omega}$}\label{fig:pow-ideal}
\end{figure}
There are two important examples of ideals of the ring $\pow{\Omega}$: 
\begin{compactenum}
\item
If $A\included\Omega$,
then $\pow A$ is the principal ideal of $\pow{\Omega}$ generated by $A$.  
\item
The set of finite subsets of $\Omega$ 
is an ideal of $\pow{\Omega}$, 
called the \textbf{Fr\'echet ideal} of $\pow{\Omega}$;
this ideal can be denoted by
\begin{equation*}\label{powfO}
   \powf{\Omega}.
\end{equation*}
\end{compactenum}



If $\mathscr K$ is again the indexed family $(K_i\colon i\in\Omega)$ of fields,
we want to show that the map $I\mapsto\Supp I$ is a bijection
from the family of ideals of $\prod\mathscr K$ 
to the family of ideals of $\pow{\Omega}$.

The underlying set of the field $\F_2$ 
can be considered as the subset $\{0,1\}$ of each field $K_i$.
The field $\F_2$ is not a \emph{subfield} of $K_i$
unless $K$ has characteristic $2$;
but it can be understood as is a multiplicative submonoid.
Hence $\F_2{}^{\Omega}$ is a multiplicative submonoid of $\prod\mathscr K$.
For each subset $A$ of $\Omega$, 
the function $\chi_A$ can be understood as belonging to $\prod\mathscr K$.


If $x$ belongs to an arbitrary field, we define\label{x^*}
\begin{equation*}
  x^*=
  \begin{cases}
    1/x,&\text{ if }x\neq0,\\
    0,&\text{ if }x=0.
  \end{cases}
\end{equation*}
If now $x$ belongs to $\prod\mathscr K$,
we can let
\begin{equation}\label{eqn:*}
  x^*=(x_i{}^*\colon i\in\Omega).
\end{equation}
Then easily
\begin{equation*}
  \supp{x^*}=\supp x.
\end{equation*}

\begin{theorem}
Let $\mathscr K$ be again the indexed family $(K_i\colon i\in\Omega)$ of fields.
The map $I\mapsto\{x^*x\colon x\in I\}$ is a bijection
from the family of ideals of $\prod\mathscr K$
to the family of ideals of $\F_2{}^{\Omega}$.
\end{theorem}

\begin{proof}
If $x\in\prod\mathscr K$, then
\begin{equation*}
  x^*x=\chi_{\supp x}.
\end{equation*}
Hence we have the commutative diagram in Figure~\ref{fig:F}.
\begin{figure}
  \begin{equation*}
\xymatrix@!0@C=3.46cm@R=2cm{
                                    &\F_2{}^{\Omega}\ar@/^/[dd]^{x\mapsto\supp x}\\
\prod\mathscr K\ar@/^/[ur]^{x\mapsto x^*x} 
               \ar@/_/[dr]_{x\mapsto\supp x}&\\
                                    &\pow{\Omega}\ar@/^/[uu]^{A\mapsto\chi_A}
}
  \end{equation*}
  \caption{Products of fields}\label{fig:F}
  
\end{figure}
If $I$ is an ideal of $\prod\mathscr K$,
let us denote $\{x^*x\colon x\in I\}$ by $I^*$.
Then this is an ideal of $\F_2{}^{\Omega}$
if and only if $\Supp I$ is an ideal of $\pow{\Omega}$.
Evidently
\begin{equation*}
  I^*=\F_2{}^{\Omega}\cap I.
\end{equation*}
Since $\F_2{}^{\Omega}$ is a submonoid of $\prod\mathscr K$,
we have that $I^*$ is an ideal of $\F_2{}^{\Omega}$
if and only if it is closed under addition,
or equivalently $\Supp I$ is closed under symmetric differences.
But $\Supp I$ is so closed, 
since in $\prod\mathscr K$ we have
\begin{equation*}
  \supp x\symdiff\supp y\included\supp{x+y}
\end{equation*}
and so
\begin{equation*}
  \supp x\symdiff\supp y=\supp{(x+y)\cdot\chi_{\supp x\symdiff\supp y}}.
\end{equation*}
So $I^*$ is indeed an ideal of $\F_2{}^{\Omega}$.
Since $(I^*)=I$, the map $I\mapsto I^*$ is injective.
Suppose $J$ is an arbitrary ideal of $\F_2{}^{\Omega}$,
and let $I=\{x\in\prod\mathscr K\colon x^*x\in J\}$.
Evidently this is nonempty.
If it contains $x$ and $y$, then it contains $x-y$, since
\begin{equation*}
  \supp{x-y}\included\supp x\cup\supp y.
\end{equation*}
Also, if $z\in\prod\mathscr K$, then $I$ contains $zx$, since
\begin{equation*}
  \supp{zx}\included\supp x.
\end{equation*}
Thus $I$ is an ideal of $\prod\mathscr K$, and $I^*=J$.
\end{proof}

Under the one-to-one correspondence of the theorem,
\begin{compactenum}[1)]
\item
a principal ideal $\pow A$ of $\pow{\Omega}$
corresponds to the obvious image of $\prod_{i\in A}K_i$ in $\prod\mathscr K$;
\item
the Fr\'echet ideal of $\pow{\Omega}$
corresponds to the ideal $\bigoplus\mathscr K$ of $\prod\mathscr K$.
\end{compactenum}

\begin{theorem}
Let $\mathscr K$ be an indexed family $(K_i\colon i\in\Omega)$ of fields.
\begin{compactenum}
\item 
If $j\in\Omega$, then
\begin{equation*}
  \Ker{\uppi_i}=(\upiota_j(1))=(\chi_{\{j\}}).
\end{equation*}
This is a maximal ideal of $\prod\mathscr K$, 
and every principal maximal ideal of $\prod\mathscr K$ is of this form.
Thus every principal ultraproduct of $\mathscr K$ 
is isomorphic to one of the $K_j$. 
\item
Every nonprincipal maximal ideal of $\prod\mathscr K$ 
includes the ideal $\bigoplus\mathscr K$.
\end{compactenum}
\end{theorem}

We are going to be interested in nonprincipal ultraproducts.




\chapter{Products of fields}

The main results of this chapter are the following.
\begin{compactenum}
\item 
All maximal ideals of a commutative ring are \emph{prime ideals} 
(Corollary \ref{cor:max-prime}, page \pageref{cor:max-prime}).
\item
Every proper ideal of a commutative ring is included in a maximal ideal
(Theorem \ref{thm:MI}, page \pageref{thm:MI}),
by \emph{Zorn's Lemma} (page \pageref{thm:ZL}).
\item
The set $\spec$ of prime ideals of a commutative ring $R$
is a \emph{compact Kolmogorov topological space} 
(Theorem \ref{thm:spec-top}, page \pageref{thm:spec-top})
whose closed sets are in one-to-one correspondence
with the \emph{radical ideals} of $R$ 
(Corollary \ref{cor:rad}, page \pageref{cor:rad}).
\item
A proper ideal of a commutative ring $R$ is radical
if and only if $R/I$ is \emph{reduced} 
(Theorem \ref{thm:rad-reduced}, page \pageref{thm:rad-reduced}).
\item
A commutative ring is \emph{regular} if and only if
it is reduced and all of its prime ideals are maximal
(Theorem \ref{thm:reg-eq}, page \pageref{thm:reg-eq}).
\item
A commutative ring is regular if and only if it embeds,
as a regular ring, in a product of fields
(Theorem \ref{thm:reg-prod}, page \pageref{thm:reg-prod}).
\item
The Tychonoff Theorem (page \ref{thm:Tychonoff})
is equivalent to the Axiom of Choice 
(Theorem \ref{thm:tych-ac}, page \pageref{thm:tych-ac}).
\end{compactenum}

\section{Prime ideals}

\subsection{Properties}

The following is Proposition VII.30 of Euclid's \emph{Elements}
\cite{MR1932864,MR17:814b}.
It will motivate the definition of \emph{prime ideal} below.

\begin{theorem}[Euclid's Lemma]\label{thm:Euc-Lem}
If $p$ is a prime number,
then for all integers $a$ and $b$,
\begin{equation*}
p\divides ab\And p\ndivides a\implies p\divides b.
\end{equation*}
\end{theorem}

\begin{proof}
Given that $p\ndivides a$, we know that $\gcd(p,a)=1$,
so we can solve $ax+py=1$ 
by Theorem \ref{thm:Euc-alg} (page \pageref{thm:Euc-alg}).
We obtain
\begin{equation*}
 abx+pby=b,
\end{equation*}
so if $p\divides ab$, then, 
since immediately $p\divides pby$,
we must have $p\divides b$.
\end{proof}

Noting that, in $\Z$,
\begin{equation*}
a\divides b\iff b\in(a),
\end{equation*}
we refer to an ideal $\mathfrak p$ of a commutative ring $R$ as
\textbf{prime}\index{prime}\label{prime} 
if $\mathfrak p$ is a \emph{proper} ideal of $R$ and, 
for all $a$ and $b$ in $R$,
\begin{equation}\label{eqn:p-ideal}
  ab\in \mathfrak p\And a\notin \mathfrak p\implies b\in \mathfrak p.
\end{equation}
(See Appendix \ref{app:German}, page~\pageref{app:German}, 
for Fraktur letters like $\mathfrak p$.)
Then the prime ideals of $\Z$ are precisely the ideals $(0)$ and $(p)$,
where $p$ is prime.
A trivial ring has no prime ideal.

We shall establish an analogue of Theorem~\ref{thm:max-field} 
(page~\pageref{thm:max-field}), with prime ideals in place of maximal ideals.
A \textbf{zero-divisor}\index{zero-divisor}\index{divisor!zero ---} 
of the commutative ring $R$ is a nonzero element $b$ 
such that the equation
\begin{equation*}
  bx=0
\end{equation*}
has a nonzero solution in $R$.
So zero-divisors are not units. 
For example, if $m>1$ and $n>1$, 
then $m+(mn)$ and $n+(mn)$ are zero-divisors in $\Zmod {mn}$.  
The unique element of the trivial ring $\Zmod 1$ is a unit, 
but not a zero-divisor.

\begin{theorem}\label{thm:zero-div-prime}
In a non-trivial commutative ring, the zero-divisors, together with $0$ itself,
compose a prime ideal.
\end{theorem}

A commutative ring is an \textbf{integral domain} 
if it has no zero-divisors and $1\neq0$.
If $n\in\N$, the ring $\Zmod n$ is an integral domain 
if and only if $n$ is prime.%%%%%
\footnote{Lang refers to integral domains 
as \emph{entire} rings \cite[p.~91]{Lang-alg}.
It would appear that integral domains 
were originally defined as subgroups of $\C$
that are closed under multiplication 
\emph{and} that include the integers \cite[p.~47]{Cohn-ANT}.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Hence the characteristic of an integral domain must be prime or $0$.
Fields are integral domains,
but $\Z$ is an integral domain that is not a field.
We now establish an analogue of Theorem~\ref{thm:max-field} 
(page~\pageref{thm:max-field}).

\begin{theorem}\label{thm:prime-ID}
Let $R$ be a commutative ring.
\begin{compactenum}
\item
The ideal $(0)$ of $R$ is prime if and only if
$R$ is an integral domain.
\item
An ideal $I$ of $R$ is prime if and only if 
the quotient $R/I$ is an integral domain.
\end{compactenum}
\end{theorem}

\begin{proof}
\begin{asparaenum}
\item
This is immediate from the definitions of integral domain and prime ideal,
once we note that $x\in(0)$ means $x=0$.
\item
The ideal $(0)$ of $R/I$ is $\{I\}$, and
\begin{equation*}
(a+I)(b+I)=I\iff ab\in I.\qedhere
\end{equation*}
\end{asparaenum}
\end{proof}

We might summarize Theorems \ref{thm:max-field} and \ref{thm:prime-ID} thus:
\begin{equation*}
\text{prime ideal}:\text{integral domain}
::\text{maximal ideal}:\text{field}.
\end{equation*}
Since fields are integral domains, we have:

\begin{corollary}\label{cor:max-prime}
  Maximal ideals are prime.
\end{corollary}

The converse of the corollary fails easily, 
since $(0)$ is a prime but non-maximal ideal of $\Z$.
However, every prime ideal of $\Z$ other than $(0)$ is maximal.
The same is true for $\Q[X]$ 
(see Theorem~\ref{thm:K[a]}, page \pageref{thm:K[a]}),
but not for $\Q[X,Y]$,
which has the prime but non-maximal ideal $(X)$.

In some commutative rings, \emph{every} prime ideal is maximal.
This is so for fields, since their only proper ideals are $(0)$.
We are going to show that all prime ideals 
of direct products of indexed families of fields
are maximal.
Thus the quotient of such a product by an arbitrary prime ideal
will be an ultraproduct.

We first consider a special case: the direct power $\F_2{}^{\Omega}$.
By Theorem~\ref{thm:pow} (page~\pageref{thm:pow}),
we can consider $\pow{\Omega}$ as a ring
in which the sum of two sets is their symmetric difference,
and the product of two sets is their intersection;
and this ring is isomorphic to $\F_2{}^{\Omega}$.
  
The rings $\pow{\Omega}$ and $\F_2{}^{\Omega}$
are examples of \emph{Boolean rings.}
An arbitrary nontrivial ring is called \textbf{Boolean}\index{Boolean} 
if it satisfies the identity
  \begin{equation*}
    x^2=x.
  \end{equation*}
Immediately from the definition,
every sub-ring of a Boolean ring is a Boolean ring.

\begin{theorem}\label{thm:Br-2}
Every Boolean ring is commutative and satisfies the equivalent identities
\begin{align*}
  2x&=0,&-x&=x.
\end{align*}
\end{theorem}

\begin{proof}
In a Boolean ring,
\begin{align*}
  x+y=(x+y)^2
&=x^2+xy+yx+y^2\\
&=x+xy+yx+y,
\end{align*}
so $0=xy+yx$.
Replacing $y$ with $x$ gives $0=2x^2=2x$.
Hence generally $yx=-xy=xy$.
\end{proof}

\begin{theorem}\label{thm:Boole}
Let $I$ be an ideal of a Boolean ring $R$.
\begin{compactenum}
\item 
If $I$ is prime, then $I$ is maximal.
\item
If $I$ is maximal, then
\begin{equation*}
 R/I\cong\F_2. 
\end{equation*}
\item
$I$ is maximal if and only if
  \begin{equation*}
    x\in R\setminus I\iff 1+x\in I.
  \end{equation*}
\end{compactenum}
\end{theorem}

  \begin{proof}
In a Boolean ring, by the last theorem,
\begin{equation*}
x\cdot(1+x)=x+x^2=x+x=0,
\end{equation*}
and also
\begin{equation*}
  x\in\{0,1\}\iff 1+x\in\{0,1\}.
\end{equation*}
Therefore every $x$ is a zero-divisor unless $x$ is $0$ or $1$.
Thus there are no Boolean integral domains besides $\{0,1\}$,
which is the field $\F_2$.
\end{proof}

\subsection{Existence}

So far, we do not know whether an arbitrary nontrivial commutative ring
has a maximal or even a prime ideal.
However, settling the question is easy
in one special case.

For an arbitrary set $\Omega$,
a subset $C$ of $\pow{\Omega}$ is called a \textbf{chain}
if proper inclusion is also a total relation on $C$,
so that $C$ is linearly ordered by proper inclusion
(see Theorem~\ref{thm:<tot}, page \pageref{thm:<tot}).

\begin{lemma}
\mbox{}
\begin{compactenum}
\item 
  The union of a chain of ideals of a commutative ring is an ideal.
\item
  The union of a chain of proper ideals of a commutative ring is a proper ideal.
\end{compactenum}
\end{lemma}

\begin{theorem}\label{thm:MI-countable}
  Every countable commutative ring has a maximal ideal.
\end{theorem}

\begin{proof}
Suppose $R$ is a countable nontrivial commutative ring.
This means there is a function $k\mapsto a_k$ from $\upomega$ onto $R$.
Using the Recursion Theorem (page~\pageref{thm:rec}),
we define recursively a function $k\mapsto I_k$ from $\upomega$
into the set of ideals of $R$.
Let $I_0=(0)$, which is a proper ideal of $R$.
If $(I_k\cup\{a_k\})$ is a proper ideal of $R$,
we let $I_{k+1}$ be this ideal;
otherwise $I_{k+1}=I_k$.
By induction, each $I_k$ is a proper ideal of $R$.
Let
\begin{equation*}
  J=\bigcup_{k\in\upomega}I_k.
\end{equation*}
By the lemma,
$J$ is a proper ideal of $R$
Moreover, every element of $R\setminus J$ is $a_k$ for some $k$,
and then $a_k\notin I_{k+1}$, so $(I_k\cup\{a_k\})$ must be the improper ideal.
Therefore $(J\cup\{a_k\})$ is improper.
Thus $J$ is a maximal ideal of $R$.
\end{proof}

One way that countable rings arise is as follows.
Let $S$ be a commutative ring.
Then the additive subgroup of $S$ generated by $1$
is actually a sub-ring of $S$.
This sub-ring is the \textbf{prime ring} of $S$.
It is the image in $S$ of $\Z$
under the homomorphism $k\mapsto\underbrace{1+\dots+1}_k$
mentioned also on page \pageref{char},
and so it is isomorphic either to $\Z$ itself
or to a quotient $\Zmod n$.

Suppose $R$ is the prime ring of $S$.
If $a\in S$, we defined the notation $R[a]$ on page \pageref{R[a]}:
it stands for the smallest sub-ring of $S$ that includes $R$ and contains $a$.
If $(a_k\colon k\in\upomega)$ is an indexed family of elements of $S$,
we define the sub-rings
\begin{equation*}
  R[a_0,\dots,a_{n-1}]
\end{equation*}
of $S$ recursively, in the obvious way:
The ring is $R$ if $n=0$, and also
\begin{equation*}
  R[a_0,\dots,a_k]=\bigl(R[a_0,\dots,a_{k-1}]\bigr)[a_k].
\end{equation*}
The rings that can be written in this form 
are called \textbf{finitely generated.}

Note that being finitely generated has different meanings
for commutative rings and ideals.
(See page \pageref{fin-gen-id}.)
As an improper ideal, every commutative ring can be written as $(1)$
and is thus finitely generated as an ideal.
But a commutative ring as such need not be finitely generated:
an example is $\Q$.

\begin{theorem}\label{thm:fin-gen}
  Every finitely generated nontrivial commutative ring is countable
and therefore has a maximal ideal.
\end{theorem}

We shall adapt the proof of Theorem \ref{thm:MI-countable}
to  rings whose underlying sets are well-ordered.
We need a generalization of the Recursion Theorem.

\begin{theorem}[Transfinite Recursion]
For all sets $A$, for all ordinals $\alpha$, 
for all functions $f$ 
from $\bigcup\{A^{\beta}\colon\beta<\alpha\}$ to $A$,
there is a unique element
\begin{equation*}
(a_{\beta}\colon\beta<\alpha)
\end{equation*}
of $A^{\alpha}$ such that, for all $\beta$ in $\alpha$,
\begin{equation*}
f(a_{\gamma}\colon\gamma<\beta)=a_{\beta}.
\end{equation*}
\end{theorem}

\begin{proof}
\sloppy
We first prove uniqueness.
Suppose, if possible,
$(a'_{\beta}\colon\beta<\alpha)$ is another element of $A^{\alpha}$ as desired,
and let $\beta$ be minimal such that $a_{\beta}\neq a'_{\beta}$.
Then
\begin{equation*}
(a_{\gamma}\colon\gamma<\beta)
=(a'_{\gamma}\colon\gamma<\beta),
\end{equation*}
so by definition $a_{\beta}=a'_{\beta}$.
We now prove existence.
If the theorem fails for some $\alpha$,
let $\alpha$ be minimal such that it fails.
Say $f\colon\bigcup\{A^{\beta}\colon\beta<\alpha\}\to A$.
By hypothesis, for each $\beta$ in $\alpha$,
there is a unique element $(a_{\gamma}\colon\gamma<\beta)$ of $A^{\beta}$
such that, for all $\gamma$ in $\beta$,
\begin{equation*}
f(a_{\delta}\colon\delta<\gamma)=a_{\gamma}.
\end{equation*}
As before, $a_{\gamma}$ is independent 
of the choice of $\beta$ such that $\gamma<\beta<\alpha$.
Then for all $\beta$ in $\alpha$ we are free to define
\begin{equation*}
a_{\beta}=f(a_{\gamma}\colon\gamma<\beta).
\end{equation*}
Then the element $(a_{\beta}\colon\beta<\alpha)$ of $A^{\alpha}$
shows that the theorem does not fail for $\alpha$.
\end{proof}

Our proof used the method of the \textbf{minimal counterexample:}
we showed that there could not be such a counterexample.
The Transfinite Recursion Theorem
is used for example to show that there is a bijection,
denoted by
\begin{equation*}
\alpha\mapsto\aleph_{\alpha},
\end{equation*}
from the class $\on$ of ordinals
to the class $\cn\setminus\upomega$ of infinite cardinals:
$\aleph_{\alpha}$ is the least infinite cardinal 
that is greater than all of the cardinals in 
$\{\aleph_{\beta}\colon\beta<\alpha\}$.
(One must show that such cardinals exist.)
The \emph{Continuum Hypothesis} is that $\card{\R}=\aleph_1$,
but we shall make no use of this.

\begin{theorem}
  Every nontrivial commutative ring with a cardinality has a maximal ideal.
\end{theorem}

\begin{proof}
Let $R$ be a nontrivial commutative ring,
and suppose $\alpha\mapsto a_{\alpha}$ is a surjection 
from a cardinal $\kappa$ onto $R$.
If $\alpha<\kappa$, 
and a function $\beta\mapsto I_{\beta}$ on $\alpha$ has been defined
whose range is a chain of proper ideals of $R$, 
we define $I_{\alpha}$ to be 
$\left(\bigcup_{\beta<\alpha}I_{\beta}\cup\{a_{\alpha}\}\right)$,
if this is a proper ideal of $R$, 
and otherwise $I_{\alpha}=\bigcup_{\beta<\alpha}I_{\beta}$.
Then $\bigcup_{\alpha<\kappa}I_{\kappa}$ is a maximal ideal of $R$.
\end{proof}
 
\subsection{Zorn's Lemma}

We want to show that the last theorem applies to every ring,
so that every nontrivial ring has a maximal ideal.
Doing this will be our first use of the Axiom of Choice;
and here as always, we shall make this use explicit.

\begin{theorem}[Well Ordering]\label{thm:woth}
By the Axiom of Choice\ac,
every set has a cardinality.
\end{theorem}

\begin{proof}
Given a set $A$, we define
\begin{equation*}
  A^*=\bigl\{\{X\}\times X\colon X\in\pow A\setminus\{\emptyset\}\bigr\}.
\end{equation*}
By the Axiom of Choice,
there is a set 
that contains exactly one element of each element of $A^*$.
Such a set is a function $g$ from $\pow A\setminus\{\emptyset\}$ to $A$
such that $f(X)\in X$ for each nonempty subset of $X$.
Now say $c\notin A$.
Given an ordinal $\alpha$,
we define a function from $\alpha$ to $A\cup\{c\}$
by letting
\begin{equation*}
  f_{\alpha}(\beta)=g(A\setminus\{f_{\alpha}(\gamma)\colon\gamma<\beta\}),
\end{equation*}
if possible; otherwise, $f_{\alpha}(\beta)=c$.
If $\beta<\alpha$, then $f_{\beta}\pincluded f_{\alpha}$.
Now let $\beta$ be the least $\alpha$ 
such that $c$ is in the range of $f_{\alpha}$.
(Such $\alpha$ must exist;
otherwise $\on$ embeds in $A$.)
Then $\beta$ must be $\gamma'$ for some $\gamma$,
and then $f_{\gamma}$ is a bijection from $\gamma$ to $A$.
\end{proof}

Not only \emph{can} we use the Axiom of Choice to prove the foregoing theorem,
but we \emph{must} use it, or something equivalent to it:

\begin{theorem}
  The Well Ordering Theorem implies the Axiom of Choice.
\end{theorem}

\begin{proof}
Suppose every set has a cardinality,
and $A$ is a set of nonempty, pairwise-disjoint sets.
Let $\alpha\mapsto a_{\alpha}$ be a bijection 
from some cardinal $\kappa$ to $\bigcup A$,
and let $B$ contain those $a_{\alpha}$ such that, for some $X$ in $A$,
$\alpha$ is the least $\beta$ such that $a_{\beta}\in X$.
\end{proof}

For algebraic results that logically require the Axiom of Choice,
it may be more convenient to use this in the form of \emph{Zorn's Lemma.}
Suppose $\Omega$ is a set and $A\included\pow{\Omega}$.
Then proper inclusion ($\pincluded$)
is a transitive irreflexive relation on $A$ and on each of its subsets
(see Theorems~\ref{thm:<trans} and~\ref{thm:<irr}, page~\pageref{thm:<trans}).
Suppose $C\included A$.
An \textbf{upper bound} of $C$
is a set that includes each element of $C$.
In particular, $\bigcup C$ is an upper bound,
and every upper bound includes this union.
A \textbf{maximal element} of $A$ is an element 
that is not properly included in any other element.

\begin{theorem}[Zorn's Lemma]\label{thm:ZL}
By the Axiom of Choice\ac,
for all sets $\Omega$, for all subsets $A$ of $\pow{\Omega}$,
if $A$ contains an upper bound for each of its chains,
then $A$ has a maximal element.%%%%%
\footnote{In 1935, Zorn \cite{MR1563165} 
presented this result
for the case where the upper bounds of the chains 
are actually the unions of the chains.
He called the conclusion the \enquote{maximum principle}
and suggested that using it would make proofs more algebraic
than when the Well-Ordering Theorem is used.
Zorn promised to prove the converse in a later paper,
which would imply the full equivalence
of the maximum principle and the Axiom of Choice;
but it seems such a paper never appeared.
Earlier, in 1922, Kuratowski \cite[(42), p.~89]{Kuratowski-Zorn}
proved ``Zorn's Lemma''
for the case where the chains in question are well-ordered.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{theorem}

\begin{proof}
By the Axiom of Choice, 
there is a bijection $\alpha\mapsto B_{\alpha}$ 
from some cardinal $\kappa$ to $A$.
By the Recursion Theorem,
there is a function $\alpha\mapsto C_{\alpha}$ from $\kappa$ to $A$
such that, for all $\alpha$ in $\kappa$, 
if $\{C_{\beta}\colon\beta<\alpha\}$ is a chain,
and if $\gamma$ is minimal such that $B_{\gamma}$ is an upper bound of this chain,
then
\begin{equation*}
C_{\alpha}=
\begin{cases}
	B_{\gamma},&\text{ if }B_{\gamma}\not\included B_{\alpha},\\
	B_{\alpha},&\text{ if }B_{\gamma}\included B_{\alpha};
\end{cases}
\end{equation*}
in particular, $\{C_{\beta}\colon\beta\leq\alpha\}$ is a chain.
If $\{C_{\beta}\colon\beta<\alpha\}$ is \emph{not} a chain,
then we can define $C_{\alpha}=B_0$.
But we never have to do this:
for all $\alpha$ in $\kappa$,
the set $\{C_{\beta}\colon\beta<\alpha\}$ \emph{is} a chain,
since there can be no minimal counterexample to this assertion.
Indeed, if $\alpha$ is minimal 
such that $\{C_{\beta}\colon\beta<\alpha\}$ is not a chain,
there must be $\beta$ and $\gamma$ in $\alpha$
such that $\gamma<\beta$
and neither of $C_{\beta}$ and $C_{\gamma}$ includes the other.
But by assumption $\{C_{\delta}\colon\delta<\beta\}$ is a chain,
and so by definition $\{C_{\delta}\colon\delta\leq\beta\}$ is a chain,
and in particular one of $C_{\beta}$ and $C_{\gamma}$ must include the other.

By a similar argument, $\{C_{\alpha}\colon\alpha<\kappa\}$ is a chain,
so it has an upper bound $D$ in $A$.
Suppose for some $\alpha$ we have $D\included B_{\alpha}$.
Then $C_{\alpha}=B_{\alpha}$,
since otherwise, by definition, $C_{\alpha}=B_{\gamma}$ for some $\gamma$
such that $B_{\gamma}\not\included B_{\alpha}$:
in this case $C_{\alpha}\not\included B_{\alpha}$,
so $C_{\alpha}\not\included D$, which is absurd.
Thus $C_{\alpha}=B_{\alpha}$,
and hence $B_{\alpha}\included D$, so $D=B_{\alpha}$.
Therefore $D$ is a maximal element of $A$.
\end{proof}

We sometimes want to use Zorn's Lemma in a more general form.
If $<$ is an arbitrary ordering of a set $A$, 
a \textbf{chain} of $(A,<)$ is a subset of $A$
that is linearly ordered by $<$.
If $C\included A$, an \textbf{upper bound} of $C$ (with respect to $<$) in $A$
is an element $a$ of $A$ such that, for all $x$ in $C$, $x\leq a$.
A \textbf{maximal element} of $A$ (with respect to $<$)
is an element $b$ such that, for all $x$ in $A$, if $b\leq x$, then $b=x$.

\begin{corollary}\label{cor:Zorn}
By the Axiom of Choice\ac,
an order whose every chain has an upper bound
has a maximal element.
\end{corollary}

\begin{proof}
  Given an order $(A,<)$,
for each $b$ in $A$ we let
\begin{equation*}
  (b)=\{x\in A\colon x\leq b\}.
\end{equation*}
Now let
\begin{equation*}
  \mathscr A=\{(x)\colon x\in A\}.
\end{equation*}
Then $x\mapsto(x)$ is an isomorphism from $(A,<)$ to $(\mathscr A,\pincluded)$;
so since the claim holds for the latter structure,
it holds for the former.
\end{proof}

We now have easily:

\begin{theorem}[Maximal Ideal]\label{mith}
By Zorn's Lemma\ac,
every nontrivial commutative ring has a maximal ideal.
\end{theorem}

\begin{proof}
The family of proper ideals of a nontrivial commutative ring
has an upper bound (namely the union)
for each of its chains.
\end{proof}

\begin{theorem}\label{thm:mit-ac}
  The Maximal Ideal Theorem
implies the Axiom of Choice.
\end{theorem}

\begin{proof}
  The proof is given in Rubin and Rubin \cite[p.~113]{MR798475},
where it is attributed to Hodges, \enquote{Krull implies Zorn} 
(J. London Math.\ Soc.\ \textbf{19} (1979), 285--287).
\end{proof}

Then the following statements are equivalent:
\begin{compactitem}
\item 
the Axiom of Choice;
\item
the Well Ordering Theorem;
\item
Zorn's Lemma;
\item
the Maximal Ideal Theorem.
\end{compactitem}
By Corollary \ref{cor:max-prime} (page \pageref{cor:max-prime}),
we obtain the following.

\begin{theorem}[Prime Ideal]\label{thm:PI}
By the Maximal Ideal Theorem\ac,
every nontrivial commutative ring has a prime ideal.  
\end{theorem}

Recall from Theorem \ref{thm:Boole}
that all prime ideals of Boolean rings are maximal.  

\begin{theorem}[Boolean Prime Ideal]\label{thm:BPI}
By the Prime Ideal Theorem\PI,
every Boolean ring has a maximal ideal. 
\end{theorem}

We shall show later that the Boolean Prime Ideal Theorem
implies the Prime Ideal Theorem.
However, these theorems do \emph{not} imply the Maximal Ideal Theorem.%%%%%
\footnote{See the discussion in Hodges \cite[pp.~272f.]{MR94e:03002}
or Rubin and Rubin \cite[p.~99]{MR798475}.
The latter comprehensive reference does not however mention
that the Prime Ideal Theorem
is implied by the Boolean Prime Ideal Theorem.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
So we are going to be careful
about which theorems need the full Axiom of Choice
(or one of its equivalent forms)
and which need only the Prime Ideal Theorem.
For example, we have the following.

\begin{theorem}\label{thm:MI}
Suppose $I$ is a proper ideal of a commutative ring $R$.
\begin{compactenum}
\item 
  By the Maximal Ideal Theorem,
$I$ is included in a maximal ideal of $R$.
\item
  By the Prime Ideal Theorem,
$I$ is included in a prime ideal of $R$.
\end{compactenum}
\end{theorem}

\begin{proof}
  By the Maximal Ideal Theorem,
$R/I$ has a maximal ideal $M$.
Then $\{x\in R\colon x+I\in M\}$ is a maximal ideal of $R$.
Similarly in the prime case.
\end{proof}

\section{Determinacy}

This section is about why the Axiom of Choice is not
\enquote{obviously} or \enquote{intuitively} correct.
The axiom contradicts another set-theoretic axiom 
that might be considered \enquote{obviously} or \enquote{intuitively} correct.  
That axiom is the Axiom of Determinacy, according to which, 
in certain \emph{games} of infinite length, 
one of the players always has a winning strategy. 

We consider games with two players.  Hodges \cite{Hodges-Building}
calls these players $\forall$ and $\exists$, after Abelard and Eloise;
but I propose to call them simply $0$ and $1$, for notational
purposes.  A \textbf{game} that $0$ and $1$ can play is determined by
a partition $A_0\amalg A_1$ of the set ${}^{\upomega}2$ of binary
sequences on $\upomega$.  A particular \textbf{play} of the game can
be analyzed as a sequence of \textbf{rounds,} indexed by $\upomega$.
In round $m$, player $0$ chooses an element $a_{2m}$ of $2$; this is
the \textbf{move} of $0$ in this round.  Then player $1$ moves by
choosing an element $a_{2m+1}$ of $2$.  The play itself is then the
sequence $(a_n\colon n\in\upomega)$ or $a$, which is an element of
${}^{\upomega}2$.  The play is \textbf{won} by that player $e$ such
that $a\in A_e$; and then player $1-e$ has \textbf{lost.} 

Each player $e$ may use a \textbf{strategy,} namely a function $f_e$
from $\bigcup_{m\in\upomega}{}^{m+e}2$ to $2$.  (So $f_0$ assigns an
element of $2$ to each finite binary sequence; $f_1$ does this to
every \emph{nonempty} finite binary sequence.)  If both $f_0$ and $f_1$ are
chosen, then a play is determined, namely the sequence $(a_n\colon
n\in\upomega)$ given by 
\begin{align*}
a_{2m}&=f_0(a_1,a_3,\dots,a_{2m-1}),&
a_{2m+1}&=f_1(a_0,a_2,\dots,a_{2m}),
\end{align*}
or simply by
\begin{equation*}
a_{2m+e}=f_e(a_{1-e},a_{3-e},\dots,a_{2m-1+e}).
\end{equation*}
That is, $f_e$ determines the move of player $e$ from the previous
moves by the \emph{other} player.  The player's own previous moves
need not be formally considered, since they themselves were already
determined by the player's strategy and the other player's previous
moves. 

Suppose player $1-e$ has chosen strategy $f_{1-e}$.  For every $b$ in
${}^{\upomega}2$, player $e$ might choose a strategy $f_e$ that is
constant on each set ${}^{m+e}2$, having the value $b_m$ there.  The
resulting play will be $a$, where 
\begin{align*}
a_{2m+1-e}&=f_{1-e}(b_0,b_1,\dots,b_{m-e}),&a_{2m+e}=b_m.
\end{align*}
This shows that, for every choice of $f_{1-e}$, there are continuum-many plays that can result if player $1-e$ uses this strategy.

If, using a strategy $f_e$, player $e$ wins all plays of a game, then $f_e$ is a \textbf{winning} strategy for that game.  The game is \textbf{determined} if one of the players has a winning strategy.  The \textbf{Axiom of Determinacy} is that in every game, one of the players has a winning strategy: in other words, for every choice of the $A_e$, one of the following sentences of infinitary logic is true:
\begin{gather*}
	\Exists{x_0}\Forall{x_1}\Exists{x_2}\cdots\;(x_0,x_1,x_2,\dots)\in A_0,\\
	\Forall{x_0}\Exists{x_1}\Forall{x_2}\cdots\;(x_0,x_1,x_2,\dots)\in A_1.	
\end{gather*}
However, this Axiom is false under the assumption of the Axiom of
Choice, or more precisely under the assumption that the Continuum can
be well-ordered, so that there is a least ordinal, called
$2^{\upomega}$, whose cardinality is that of ${}^{\upomega}2$. 

Indeed, every ordinal is $\alpha+n$ for some unique limit ordinal
$\alpha$ and finite ordinal $n$.  Then $\alpha+n$ is even or odd,
according as $n$ is even or odd.  Assuming the Axiom of Choice, we can
list all possible strategies as
$(f^{\alpha}\colon\alpha<2^{\upomega})$, where $f^{\alpha}$ will be a
strategy for $e$ if and only if $\alpha+e$ is even.   

We shall now define a list $(a^{\alpha}\colon\alpha<2^{\upomega})$ of
possible plays (that is, elements of ${}^{\upomega}2$) so that,  
\begin{compactitem}
\item
for all $\alpha$, if $\alpha+e$ is even, then $e$ can use strategy
$f^{\alpha}$ for the play $a^{\alpha}$; that is, for all $m$ in $\upomega$,
\begin{equation*}
a^{\alpha}_{2m+e}=f^{\alpha}(a^{\alpha}_{1-e},a^{\alpha}_{3-e},\dots,a^{\alpha}_{2m-1+e});
\end{equation*}
\item
 $a^{\alpha}\neq a^{\beta}$ for all distinct $\alpha$ and $\beta$ such
  that $\alpha+\beta$ is odd.
\end{compactitem}
We do this recursively.  If $(a^{\beta}\colon\beta<\alpha)$ has
been defined, and $\alpha<2^{\upomega}$, then since there are
continuum-many plays in which the strategy $f^{\alpha}$ is used, one
of them, to be called $a^{\alpha}$, is not among those $a^{\beta}$
such that $\beta<\alpha$ and $\beta+\alpha$ is odd. 

Since, if $\alpha+e$ is even, player $e$ can use strategy $f^{\alpha}$
for the play $a^{\alpha}$, this means player $1-e$ has \emph{some}
strategy that, with $f^{\alpha}$, determines $a^{\alpha}$.    That is,
player $1-e$ can win against strategy $f^{\alpha}$, provided
$a^{\alpha}\in A_{1-e}$.  We now choose the partition of
${}^{\upomega}2$ so that  
\begin{align*}
\{a^{\alpha}\colon\alpha\text{ even}\}&\included A_1,&
\{a^{\alpha}\colon\alpha\text{ odd}\}&\included A_0.
\end{align*}
Then neither player has a winning strategy for the game: the game is
not determined. 



\section{Spectra}\label{sect:spectra}

The \textbf{spectrum}\label{spectrum} 
of a commutative ring is the set of its prime ideals.
The spectrum of a commutative ring $R$ can be denoted by
\begin{equation*}
  \spec.
\end{equation*}
We are going to define a \emph{topology}\label{spec-prom} on $\spec$.
Let us recall what this means.

\subsection{Topologies}

Topologies can be defined in terms of \emph{open sets} or \emph{closed sets.}
We shall use \emph{closed sets.}
Given an arbitrary set $A$,
let us understand a \textbf{topology} on $A$
to be a family $\tau$ of subsets of $A$ such that
\begin{compactenum}[1)]
\item\label{item:fin-u}
if $X$ and $Y$ are in $\tau$, then $X\cup Y\in\tau$;
\item\label{item:arb-int}
if $\mathscr X\included\tau$, then $\bigcap\mathscr X\in\tau$;
\item\label{item:0}
$\emptyset\in\tau$.
\end{compactenum}
In words,
\begin{inparaenum}[(1)]
  \item
 $\tau$ is closed under finite unions and
\item
arbitrary intersections,
and
\item
 $\tau$ contains the empty set.
\end{inparaenum}
The pair $(A,\tau)$ is called a \textbf{topological space.}

In condition \eqref{item:arb-int} of the definition,
we allow $\mathscr X$ to be $\emptyset$,
and then we understand $\bigcap\emptyset$ to be $A$ itself;
thus we have (4) $A\in\tau$.
Perhaps most writers will give this fourth condition 
as part of the \emph{definition} of a topology as a fourth condition,
without noting that it can be derived from condition \eqref{item:arb-int}.

Conditions \eqref{item:fin-u} and \eqref{item:0} together 
are that $\tau$ is the universe
of a substructure of the monoid $(\pow A,\emptyset,\cup)$:
in short, $\tau$ is a submonoid of $(\pow A,\emptyset,\cup)$.
(It would be ambiguous to say $\tau$ is a submonoid of $\pow A$ simply,
because $(\pow A,A,\cap)$ is also a monoid.)

The elements of the topology $\tau$ on $A$
are the \textbf{closed} subsets of $A$
with respect to the topology.
The complement in $A$ of a closed subset is an \textbf{open} subset.
For example,
in the \emph{Euclidean topology} on $\R$,
the open subsets are the unions of open intervals.
Hence the closed subsets of $\R$ in this topology
are intersections of closed intervals.
In particular, finite unions of closed intervals are closed sets.
However, some closed subsets of $\R$ are not unions of closed intervals.
The \textbf{Cantor Set}\label{Cantor} is an example:
this is the complement of the union of $(-\infty,0)$ and $(1,\infty)$
and all of the intervals
\begin{equation*}
  \left(\sum_{k<n}\frac{2e_k}{3^{k+1}}+1,\sum_{k<n}\frac{2e_k}{3^{k+1}}+2\right),
\end{equation*}
where $n\in\N$ and $(e_k\colon k<n)\in 2^n$.
The Cantor set is the set to which a bijection from $\pow{\upomega}$ 
is defined in the proof of the uncountability of $\R$ 
(Theorem~\ref{thm:R-uncount}, page \pageref{thm:R-uncount}).

Given a topology $\tau$ on $A$
and an arbitrary subset $X$ of $A$, we define
\begin{equation*}
  \bar X=\bigcap\{Y\in\tau\colon X\included Y\}.
\end{equation*}

\begin{theorem}
  In an arbitrary topological space,
  \begin{compactenum}[1)]
  \item 
$X\included\bar X$ and 
\item
$\bar X$ is closed,
  \end{compactenum}
  so $\bar X$ is the smallest closed subset that includes $X$.
Moreover,
\begin{align}\label{eqn:cl}
  X&\included\bar X,&
X\included\bar Y&\implies\bar X\included\bar Y.
\end{align}
\end{theorem}

The set $\bar X$ is called 
the \textbf{closure} of $X$ with respect to the topology.

\subsection{Closure operations and Moore families}

An arbitrary operation $X\mapsto\bar X$ on $\pow A$ 
with the properties in~\eqref{eqn:cl}
is called a \textbf{closure operation} on $A$.
We easily have the following.

\begin{theorem}\label{thm:cl}
An operation $X\mapsto\bar X$ on $\pow A$ is a closure operation on $A$
if and only if
\begin{align*}
  X&\included\bar X,&
X\included Y&\implies\bar X\included\bar Y,&
\bar{\bar X}&=\bar X.
\end{align*}
\end{theorem}

To obtain a closure operation from a topology
does not actually require every part of the definition of a topology.
Weakening the definition,
we shall say that a subset $\mathscr F$ of $\pow A$ 
is a \textbf{Moore family}\label{Moore} on $A$ if
\begin{equation*}
  \mathscr X\included\mathscr F\implies\bigcap\mathscr X\in\mathscr F.
\end{equation*}
Again we understand $\bigcap\emptyset$ to be $A$; so this is in $\mathscr F$.
A topology on $A$ is then just a Moore family on $A$
that is also a submonoid of $(\pow A,\emptyset,\cup)$.

We have already encountered Moore families.
By Theorem~\ref{thm:ideal-int} (page \pageref{thm:ideal-int}),
the family of ideals of a commutative ring $R$ is a Moore family on $R$.
But the family of \emph{prime} ideals of $R$ 
is \emph{not} always a Moore family on $R$.
For example, in $\Z$, $(2)\cap(3)=(6)$, which is not prime.
Birkhoff \cite[p.~111]{MR0227053}
attributes to Moore the following theorem.%%%%%
\footnote{The precise reference is to E. H. Moore's 
\emph{Introduction to a form of general analysis,} 1910.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{theorem}\label{thm:Moore}
Let $A$ be a set.
  \begin{compactenum}
  \item 
If $X\mapsto\bar X$ is a closure operation on $A$,
then $\{\bar X\colon X\included A\}$ is a Moore family on $A$.
\item
If $\mathscr F$ is a Moore family on $A$,
then the operation
\begin{equation*}
X\mapsto\bigcap\{Y\in\mathscr F\colon X\included Y\}  
\end{equation*}
on $\pow A$ is a closure operation on $A$.
\item
The given conversions between closure operations and Moore families 
are inverses of one another.
  \end{compactenum}
\end{theorem}

\begin{proof}
Suppose $X\mapsto\bar X$ is a closure operation on $A$,
and $\mathscr F=\{\bar X\colon X\included A\}$.
Let $\mathscr X\included\mathscr F$.
If $Y\in\mathscr X$, then
\begin{align*}
  \bigcap\mathscr X&\included Y,&
  \overline{\bigcap\mathscr X}&\included Y.
\end{align*}
Therefore
\begin{equation*}
\bigcap\mathscr X\included\overline{\bigcap\mathscr X}
\included\bigcap\mathscr X,
\end{equation*}
so these last inclusions must be equations, and
$\bigcap\mathscr X\in\mathscr F$.
The rest is easy.
\end{proof}

For example,
since the family of ideals of a commutative ring is a Moore family,
the operation $X\mapsto(X)$ on the ring is a closure operation.

In general, if $X\mapsto\bar X$ is a closure operation on $A$,
it is reasonable to say that each subset $\bar X$ of $A$ is \textbf{closed}
and is the \textbf{closure} of $X$,
with respect to the given closure operation.
However, the resulting Moore family of closed subsets of $A$
need not be a topology,
because it need not be closed under finite unions
and it need not contain $\emptyset$.
For example, the ideals of a commutative ring
do not compose a topology on the ring.

\subsection{Galois correspondences}

Closure operations arise in the following setting.
Let $A$ and $B$ be two arbitrary sets,
and suppose there are functions 
$X\mapsto X^*$ from $\pow A$ to $\pow B$
and $Y\mapsto Y^{\dag}$ from $\pow B$ to $\pow A$
such that
\begin{align*}
X\included X_1\implies X_1{}^*&\included X^*,&
Y\included Y_1\implies Y_1{}^{\dag}&\included Y^{\dag}
\end{align*}
(that is, the two functions are inclusion-reversing), and also
\begin{align*}
X&\included(X^*)^{\dag},&Y&\included(Y^{\dag})^*.
\end{align*}
Then the two functions
constitute a \textbf{Galois correspondence}\label{galois} 
between $\pow A$ and $\pow B$.
We shall show on page \pageref{Galois} 
how the original Galois correspondence in field theory 
is a special case.
The general definition is apparently due to \O ystein Ore,%%%%%
\footnote{Ore's situation is even more general,
with arbitrary (partially) ordered sets in place of $\pow A$ and $\pow B$.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
who proves the following \cite[Theorem 2, \S2, p.~496]{MR0010555}:

\begin{theorem}\label{thm:Gal-cor}
\sloppy
Suppose $X\mapsto X^*$ and $Y\mapsto Y^{\dag}$
constitute a Galois correspondence between $\pow A$ and $\pow B$.
Then the operations
\begin{align*}
X&\mapsto(X^*)^{\dag},&Y&\mapsto(Y^{\dag})^*  
\end{align*}
are closure operations on $A$ and $B$ respectively.
The closed subsets of $A$ and the closed subsets of $B$ 
are in one-to-one, inclusion-reversing correspondence 
under the Galois correspondence.
\end{theorem}

\begin{proof}
The defining properties of a Galois correspondence give
\begin{align*}
  X^*&\included((X^*)^{\dag})^*,&
  ((X^*)^{\dag})^*&\included X^*,
\end{align*}
and therefore
\begin{equation*}
  X^*=((X^*)^{\dag})^*.
\end{equation*}
By symmetry
\begin{equation*}
Y^{\dag}=((Y^{\dag})^*)^{\dag}.
\end{equation*}
Then we have, as special cases,
\begin{align*}
(Y^{\dag})^*&=(((Y^{\dag})^*)^{\dag})^*,&
  (X^*)^{\dag}&=(((X^*)^{\dag})^*)^{\dag}.
\end{align*}
All claims now follow.
\end{proof}

It will be useful to note the following.

\begin{theorem}\label{thm:basis}
Suppose $X\mapsto X^*$ and $Y\mapsto Y^{\dag}$
constitute a Galois correspondence between $\pow A$ and $\pow B$.
Then
\begin{align*}
  X^*&=\bigcap_{a\in X}\{a\}^*,&
Y^{\dag}&=\bigcap_{b\in Y}\{b\}^{\dag}.
\end{align*}
\end{theorem}

\begin{proof}
Let $b\in X$.  Then
\begin{gather*}
  X^*\included\bigcap_{a\in X}\{a\}^*\included\{b\}^*,\\
  (\{b\}^*)^{\dag}\included\left(\bigcap_{a\in X}\{a\}^*\right)^{\dag}
\included(X^*)^{\dag},\\
X\included \bigcup_{a\in X}(\{a\}^*)^{\dag}
\included\left(\bigcap_{a\in X}\{a\}^*\right)^{\dag}
\included(X^*)^{\dag}.
\end{gather*}
Since $(X^*)^{\dag}$ is the closure of $X$, 
while $\left(\bigcap_{a\in X}\{a\}^*\right)^{\dag}$ is closed, we have
\begin{equation*}
\left(\bigcap_{a\in X}\{a\}^*\right)^{\dag}=(X^*)^{\dag}.
\end{equation*}
Since both $\bigcap_{a\in X}\{a\}^*$ and $X^*$ are closed, we are done.
\end{proof}

In particular, the subsets $\{a\}^*$ of $B$ compose a \emph{basis}
of the induced Moore family of closed subsets of $B$,
in the sense of the next subsection (page \pageref{subsect:bases}).

The notion of a Galois correspondence
is a generalization from the following special case.
Let $A$ and $B$ be two arbitrary sets,
and let $R$ be a relation from $A$ to $B$, so that, formally,
\begin{equation*}
  R\included A\times B.
\end{equation*}
Given subsets $X$ of $A$ and $Y$ of $B$, we define
\begin{align*}
  X^*&=\bigcap_{a\in X}\{y\in B\colon a\mathrel Ry\},&
  Y^{\dag}&=\bigcap_{b\in Y}\{x\in A\colon x\mathrel Rb\}.
\end{align*}
These definitions are due to Birkhoff,%%%%%
\footnote{In the third edition of his \emph{Lattice Theory} 
\cite[ch.~V, \S7, p.~122]{MR0227053}, 
Birkhoff cites the first edition of his book, 
from 1940, as being the origin.}
who refers to the functions $X\mapsto X^*$ and $Y\mapsto Y^{\dag}$
as \textbf{polarities.}
Then he easily observes the following.

\begin{theorem}\label{thm:polarities}
The polarities induced by a relation
constitute a Galois correspondence.  
\end{theorem}

\begin{sloppypar}
For example, suppose $L$ is a field with subfield $K$.
Then a Galois correspondence---\label{Galois}%
the original Galois correspondence---%
is induced by the relation $R$ between $L$ and $\Aut{L/K}$
given by\label{upper-sigma}
\begin{equation*}
  x\mathrel R\sigma\iff x^{\sigma}=x.
\end{equation*}
The existence of a one-to-one correspondence 
between the closed subsets of $L$ 
and the closed subsets of $\Aut{L/K}$ is now easy:
it follows from Theorem~\ref{thm:Gal-cor} (page \pageref{thm:Gal-cor}).
The hard part is identifying what those closed subsets are.
Easily they are subfields of $L$ that include $K$, 
and subgroups of $\Aut{L/K}$, respectively.
If $F$ is such a subfield, and $G$ is such a subgroup,
then the Galois correspondence is given by
\begin{align*}
  F^*&=\Aut{L/F},&G^{\dag}&=\Fix F.
\end{align*}
But it is not always the case that $F$ and $G$ are closed.
It \emph{is} the case if $\Aut{L/K}$ is finite and $K$ is closed:
this is the great theorem of the original Galois theory.%%%%%
\footnote{This follows from the theorem that Hungerford 
\cite[Ch. V, Theorem 2.15, p.~252]{MR600654} 
names for Artin.
In his \emph{Galois Theory}
\cite[Theorem 13, p.~36]{MR1616156},
Artin first shows $\card{\Aut{L/K}}\leq[L:K]$.
\enquote{Artin's Theorem} \cite[Theorem 14, p.~42]{MR1616156} is that, 
if $G$ is a finite subgroup of $\Aut L$ and $K=G^{\dag}$, 
then $[L:K]=\card G$.
In this case, we must also have $[L:K]=\card{(G^{\dag})^*}$;
so $G=(G^{\dag})^*$ and thus $G$ is closed.
Also $L/K$ must be separable,
and from this it follows that,
if $K\included F\included L$, then $F$ is closed.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{sloppypar}

\O ystein Ore shows that 
\emph{every} Galois correspondence arises from a relation
\cite[Theorem 10, \S5, p.~503]{MR0010555}:

\begin{theorem}
\sloppy
For every Galois correspondence between power sets $\pow A$ and $\pow B$,
there is a relation between $A$ and $B$
whose induced polarities constitute the Galois correspondence.
\end{theorem}

\begin{proof}
  Let the Galois correspondence be constituted 
by $X\mapsto X^*$ and $Y\mapsto Y^{\dag}$.
By Theorem \ref{thm:basis},
if we define the relation $R$ between $A$ and $B$ by
\begin{equation*}
  x\mathrel Ry\iff y\in\{x\}^*,
\end{equation*}
then $X\mapsto X^*$ is the induced polarity.
The same is true for $Y\mapsto Y^{\dag}$ by symmetry, since
\begin{multline*}
  y\in\{x\}^*
\implies\{y\}\included\{x\}^*
\implies(\{x\}^*)^{\dag}
\included\{y\}^{\dag}\\
\implies x\in\{y\}^{\dag}.\qedhere
\end{multline*}
\end{proof}

Finally we observe 
that every Moore family arises from a Galois correspondence:

\begin{theorem}\label{thm:Gal-from-Moore}
A Moore family $\mathscr F$ on a set $A$
consists of the closed subsets of $A$
determined by the Galois correspondence
induced by the relation $\in$ between $A$ and $\mathscr F$. 
\end{theorem}

\begin{proof}
The given Galois correspondence is
\begin{align*}
  X&\mapsto\{Y\in\mathscr F\colon X\included Y\},&
\mathscr Y&\mapsto\bigcap\mathscr Y.\qedhere
\end{align*}
\end{proof}

\subsection{Bases}\label{subsect:bases}


If $\mathscr F$ is a Moore family on $A$,
and $\mathscr B$ is a subset of $\mathscr F$ such that
\begin{equation*}
  F\in\mathscr F\implies F=\bigcap\{X\in\mathscr B\colon F\included X\},
\end{equation*}
then $\mathscr B$ is a \textbf{basis} for $\mathscr F$.

\begin{theorem}
Let $A$ be a set.
\begin{compactenum}
\item 
A Moore family on $A$ is a basis of itself.
\item
The family of Moore families on $A$
is a Moore family on $\pow A$.
\item
Every subset of $\pow A$ is a basis of its closure
with respect to the Moore family of Moore families on $A$.
\end{compactenum}
\end{theorem}

If $\mathscr B$ is a basis of the Moore family $\mathscr F$ on $A$,
then $\mathscr B$ may be said to \textbf{generate} $\mathscr F$.
As a corollary of Theorem~\ref{thm:basis}, we have:

\begin{theorem}\label{thm:basis-exp}
Suppose $X\mapsto X^*$ and $Y\mapsto Y^{\dag}$
constitute a Galois correspondence between $\pow A$ and $\pow B$.
The sets $\{a\}^*$, where $a\in A$, compose a basis for the Moore family
of closed subsets of $B$.
\end{theorem}

We can also generalize Theorem~\ref{thm:Gal-from-Moore}:

\begin{theorem}\label{thm:Gal-from-Moore-basis}
If $\mathscr B$ generates the Moore family $\mathscr F$ on $A$,
then $\mathscr F$ consists of the closed subsets of $A$
determined by the Galois correspondence
induced by the relation $\in$ between $A$ and $\mathscr B$. 
\end{theorem}

\begin{proof}
The given Galois correspondence is
\begin{align*}
  X&\mapsto\{Y\in\mathscr B\colon X\included Y\},&
\mathscr Y&\mapsto\bigcap\mathscr Y.\qedhere
\end{align*}
\end{proof}

A basis for a \emph{topology} $\tau$ on $A$
need not contain $\emptyset$ or be closed under $\cup$;
that is, the basis need not be a submonoid of $\tau$.
However, it may be, since $\tau$ is a basis of itself.

\begin{theorem}\label{thm:subm}
  If $\mathscr B$ is a submonoid of $(\pow A,\emptyset,\cup)$,
then the Moore family generated by $\mathscr B$
is a topology on $A$.
\end{theorem}

If $\mathscr C$ is an arbitrary subset of $\pow A$,
then $\mathscr C$ generates a submonoid $\mathscr B$ 
of $(\pow A,\emptyset,\cup)$,
and $\mathscr C$ may be called a \textbf{sub-basis}
of the topology generated by $\mathscr B$.

As a corollary of Theorems \ref{thm:basis-exp} and \ref{thm:subm}, we have:

\begin{theorem}\label{thm:basis-cor}
Suppose $X\mapsto X^*$ and $Y\mapsto Y^{\dag}$
constitute a Galois correspondence between $\pow A$ and $\pow B$.
If also $A$ has an element $1$ and a binary operation $\cdot$ such that
\begin{align*}
\emptyset&=\{1\}^*,&\{x\}^*\cup\{y\}^*&=\{x\cdot y\}^*,
\end{align*}
then the closed subsets of $B$ compose a topology on $B$.
\end{theorem}

The topology on $\spec$ (promised on page \pageref{spec-prom})
will arise in this way in Theorem \ref{thm:spec-top}.
Indeed, \emph{every} topology arises in this way,
by Theorem~\ref{thm:Gal-from-Moore-basis}.

In the theorem, the structure $(A,1,{}\cdot{})$ need not be a monoid.
However, if we define the binary relation $\sim$ on $A$ by
\begin{equation*}
  x\sim y\iff\{x\}^*=\{y\}^*,
\end{equation*}
then $\sim$ will be a congruence-relation on $(A,1,{}\cdot{})$
in the sense of \S\ref{subsect:cong-rel} (page \pageref{subsect:cong-rel}),
and the quotient $(A,1,{}\cdot{})\modsim$ will be a monoid.
We shall establish a variant of this result 
as Theorem \ref{thm:Lin} (page \pageref{thm:Lin}).
Meanwhile, in the situations of interest,
$(A,1,{}\cdot{})$ will already be known to be a monoid,
and $\sim$ will be equality.

All of the notions of this section 
can now be defined in terms of a relation between two sets:
\begin{compactenum}
\item 
A \emph{Galois correspondence} between $\pow A$ and $\pow B$
consists of the polarities 
induced by a relation from $A$ to $B$.
\item
A \emph{Moore family} of subsets of $A$
consists of the closed subsets of $A$
with respect to the Galois correspondence between $\pow A$ and $\pow B$
induced by some relation $R$ from $A$ to $B$
for some set $B$.
\item
That Moore family is a \emph{topology} on $A$,
if $B$ has an element $1$ and a binary operation $\cdot$ such that
\begin{align*}
  \lnot\;a&\mathrel R1,&
a&\mathrel R(x\cdot y)\iff a\mathrel Rx\Or a\mathrel Ry.
\end{align*}
\end{compactenum}


\subsection{The topology on a spectrum}

We define three possible properties of a topology $\tau$ on a set $A$.
\begin{asparaenum}
  \item
The topology $\tau$ is \textbf{compact} if,
for every subset $\mathscr X$ of $\tau$ such that
\begin{equation*}
  \bigcap\mathscr X=\emptyset,
\end{equation*}
there is a finite subset $\{X_0,\dots,X_{n-1}\}$ of $\mathscr X$ such that
\begin{equation*}
  X_0\cap\dots\cap X_{n-1}=\emptyset.
\end{equation*}
If $\tau$ has basis $\mathscr B$,
it is enough to assume $\mathscr X\included\mathscr B$.
We may use the definition in the contrapositive form.
A subset $\mathscr X$ of $\tau$ has the \textbf{finite intersection property}
if its every finite subset has nonempty intersection.
Then $\tau$ is compact if and only if
its every subset with the finite intersection property
has nonempty intersection.
\item
Two points of $A$ are \textbf{topologically indistinguishable}\label{top-indist}
if every member of $\tau$ contains either both or neither of the points.
It is enough if this is true for every member of a given basis.
The topology $\tau$ is \textbf{Kolmogorov,} or $T_0$,
if \emph{no} two distinct points of $A$
are topologically indistinguishable.
\item
The topology $\tau$ is \textbf{Hausdorff}\label{Hausdorff}
if for all distinct elements $x_0$ and $x_1$ of $A$
there are elements $F_0$ and $F_1$ of $\tau$ such that
\begin{align*}
  x_0&\notin F_0,& x_1&\notin F_1,&F_0\cup F_1&=A.
\end{align*}
Again it is enough to require $F_0$ and $F_1$ to belong to a given basis.
\end{asparaenum}

Given an element $a$ of a commutative ring $R$, 
let us use the notation\label{var}
\begin{equation*}
  \var a=\{\primei\in\spec\colon a\in\primei\}.
\end{equation*}
This gives us the following.

\begin{theorem}\label{thm:spec-top}
Let $R$ be a commutative ring.
\begin{compactenum}
\item 
The set $\{\var x\colon x\in R\}$ 
is a basis for a topology on $\spec$, since
\begin{align}\label{eqn:var-xy}
\emptyset&=\var1,&
\var x\cup\var y&=\var{xy}.
\end{align}
\item
The topology is Kolmogorov.
\item 
\sloppy
By the Prime Ideal Theorem (page \pageref{mith})\PI,
$\spec$ is nonempty, and its topology is compact.
\item
If $R$ is a Boolean ring, the topology is Hausdorff,
and the complement of every $\var x$ is $\var{1+x}$.
\end{compactenum}
\end{theorem}

\begin{proof}
  \begin{asparaenum}
    \item
Let a Galois correspondence $X\mapsto X^*$ and $Y\mapsto Y^{\dag}$
between $\pow{R}$ and $\pow{\spec}$ 
be determined by the relation $\in$ between $R$ and $\spec$.
Then
\begin{equation*}
  \{x\}^*=\var x.
\end{equation*}
Since elements $\primei$ of $\spec$ are prime ideals, we have
\begin{gather*}
    xy\in\primei\iff x\in\primei\Or y\in\primei,\\
\primei\in\var{xy}\iff\primei\in\var x\Or \primei\in\var y,
\end{gather*}
and so \eqref{eqn:var-xy} holds.
By Theorem~\ref{thm:basis-cor},
the sets $\var x$ compose a basis of a topology on $\spec$.
\item
If $\primei$ and $\mathfrak q$ are distinct elements of $\spec$,
we may assume $a\in\primei\setminus\mathfrak q$,
and so $\var a$ contains $\primei$, but not $\mathfrak q$.
\item
Suppose $A\included R$.  Then
\begin{equation*}
 \bigcap_{x\in A}\var x=\{\primei\in\spec\colon A\included\primei\}.
\end{equation*}
If $(A)$ is a proper ideal of $R$, 
then by Theorem~\ref{thm:MI} (page~\pageref{thm:MI})
it is included in a prime ideal,
which belongs to $\spec$ and therefore to $\bigcap_{x\in A}\var x$.
It follows that, if
\begin{equation*}
  \bigcap_{x\in A}\var x=\emptyset,
\end{equation*}
\sloppy
then $(A)$ must contain $1$.
In this case, by Theorem~\ref{thm:(A)} (page~\pageref{thm:(A)}),
there is $x$ in $\bigoplus_{a\in A}R$ such that
\begin{equation*}
  1=\sum_{a\in A}x_aa.
\end{equation*}
Then
\begin{equation*}
\bigcap_{a\in\supp x}\var a=\emptyset.  
\end{equation*}
Since $\supp x$ is a finite subset of $A$, 
the topology of $\spec$ is compact.
\item
In a Boolean ring $R$, since $1+1=0$ 
(Theorem \ref{thm:Br-2}, page \pageref{thm:Br-2}),
every element of $\spec$ contains exactly one of $x$ and $1+x$
(Theorem~\ref{thm:Boole}, page \pageref{thm:Boole}),
so $\spec$ is the disjoint union of $\var x$ and $\var{1+x}$.
If $\primei$ and $\mathfrak q$ are distinct elements of $\spec$,
then we may assume $\mathfrak q\setminus\primei$ has an element $a$,
and then
\begin{align*}
  \primei&\notin\var a,&
\mathfrak q&\notin\var{1+a},&
\var a\cup\var{1+a}&=\spec.\qedhere
\end{align*}
  \end{asparaenum}
\end{proof}

\begin{sloppypar}
The topology on $\spec$ given by the theorem 
is the \textbf{Zariski topology} on $\spec$.
The corresponding closed subsets of $R$
are just the intersections of collections of prime ideals of $R$;
we shall characterize such intersections
in Corollary \ref{cor:rad} in the next section.
\end{sloppypar}

\section{Radical ideals}%\label{sect:spectra}

We develop an analogue of Theorems~\ref{thm:max-field} 
(page~\pageref{thm:max-field}) and~\ref{thm:prime-ID} 
(page~\pageref{thm:prime-ID}).
An element $a$ of a commutative ring $R$ is called \textbf{nilpotent} 
if some power $a^n$ of the element is $0$.
In particular, $0$ itself is nilpotent.
The ring $R$ is called \textbf{reduced} if it has no nonzero nilpotents.
For example, every Boolean ring is reduced.
An ideal $I$ of $R$ is called \textbf{radical} if
\begin{equation*}
  x^2\in I\implies x\in I.
\end{equation*}
Every prime ideal of every commutative ring is radical.
Indeed, every \emph{intersection} of prime ideals is radical.
Thus, under the Galois correspondence 
induced by the relation $\in$ between $R$ and $\spec$,
all of the closed subsets of $R$ are radical ideals.
We shall establish the converse.
Meanwhile, we have the analogue promised above.

\begin{theorem}\label{thm:rad-reduced}
  Let $R$ be a commutative ring.
  \begin{compactenum}
  \item 
The ideal $(0)$ of $R$ is radical if and only if $R$ is reduced.
\item
An ideal $I$ of $R$ is radical if and only if the quotient $R/I$ is reduced.
  \end{compactenum}
\end{theorem}

Thus
\begin{align*}
  \text{radical ideal}:\text{reduced ring}
&::\text{prime ideal}:\text{integral domain}\\
&::\text{maximal ideal}:\text{field}.
\end{align*}
But the following easy result 
does not hold for maximal ideals or prime ideals.
Recall from page \pageref{Moore} that a \emph{Moore family} on a set
is just a family of subsets that is closed under arbitrary intersections.
Then the following is an analogue 
of Theorem~\ref{thm:ideal-int} (page \pageref{thm:ideal-int}).

\begin{theorem}\label{thm:rad-M}
  The radical ideals of a commutative ring $R$ compose a Moore family on $R$.
\end{theorem}

By this and Theorem~\ref{thm:Moore} (page~\pageref{thm:Moore}), 
the Moore family of radical ideals of $R$
induces a closure operation
\begin{equation*}
  X\mapsto\surd(X)
\end{equation*}
on $R$.  If $I$ is an ideal of $R$,
then $\surd I$ is called the \textbf{radical} of $I$:
it is the smallest radical ideal that includes $I$.
Then $I$ is radical if and only if $I=\surd I$.

Given a subset $X$ of $R$, 
we characterized $(X)$ in Theorem~\ref{thm:(A)} (page \pageref{thm:(A)}).
Now we can characterize $\surd(X)$:

\begin{theorem}
  If $I$ is an ideal of the commutative ring $R$,
then
\begin{equation*}
  \surd I=\bigcup_{n\in\N}\{x\in R\colon x^n\in I\}.
\end{equation*}
\end{theorem}

But the following characterization will be of more theoretical interest.

\begin{theorem}\label{thm:rad}
By Zorn's Lemma\ac,
for all subsets $A$ of a commutative ring $R$,
\begin{equation}\label{eqn:rad(A)}
\surd(A)=\bigcap\{\mathfrak p\in\spec\colon A\included\mathfrak p\}.
\end{equation}
\end{theorem}

\begin{proof}
Since prime ideals are radical,
and $\surd(A)$ is the smallest radical ideal that includes $A$,
it is clear that
\begin{equation*}
\surd(A)\included\bigcap\{\mathfrak p\in\spec\colon A\included\mathfrak p\}.
\end{equation*}
To prove the reverse inclusion,
suppose $x\in R\setminus\surd(A)$;
we show the intersection in \eqref{eqn:rad(A)} does not contain $x$ either.  
Using Zorn's Lemma, we let $\mathfrak b$ be an ideal of $R$ 
that is maximal with respect to including $\surd(A)$, 
but not containing any power of $x$.  
Say $y$ and $z$ are not in $\mathfrak b$.  
By maximality, we have
\begin{align*}
x&\in\mathfrak b+(y),&
x&\in\mathfrak b+(z),
\end{align*}
and therefore, by multiplying,
\begin{equation*}
x^2\in\mathfrak b+(yz),
\end{equation*}
so $yz\notin\mathfrak b$ (since $x^2\notin\mathfrak b$).  
Thus $\mathfrak b$ is prime,
so it belongs to the intersection in~\eqref{eqn:rad(A)}.
Therefore this intersection does not contain~$x$.
Thus
\begin{equation*}
\surd(A)\includes\bigcap\{\mathfrak p\in\spec\colon A\included\mathfrak p\}.
\qedhere
\end{equation*}
\end{proof}

When $R$ is a Boolean ring,
$\surd(A)$ is just $(A)$,
and also the theorem needs only the Prime Ideal Theorem,
because in this case,
for a prime ideal \emph{not} to contain $x$
is the same as containing $1+x$.

\begin{corollary}\label{cor:rad}
For every commutative ring $R$,
under the Galois correspondence 
induced by the relation $\in$ between $R$ and $\spec$,
the closed subsets of $R$ are precisely the radical ideals.
\end{corollary}

For all commutative rings $R$,
Theorem~\ref{thm:ring-prod} (page~\pageref{thm:ring-prod}) 
guarantees us a homomorphism 
\begin{equation}\label{eqn:x|->x+p}
 x\mapsto\bigl(x+\primei\colon\primei\in\spec\bigr) 
\end{equation}
from $R$ to $\prod_{\primei\in\spec}R/\primei$.

\begin{theorem}\label{thm:red-emb}
A commutative ring $R$ is reduced if,
and by Zorn's Lemma\ac\ only if,
the homomorphism in \eqref{eqn:x|->x+p} 
is an embedding.
\end{theorem}

\begin{proof}
The homomorphism is an embedding if and only if
\begin{equation*}
 \bigcap\spec=(0). 
\end{equation*}
By the last theorem, $\bigcap\spec=\surd(0)$.
By Theorem \ref{thm:rad-reduced},
$R$ is reduced if and only if $(0)=\surd(0)$.
\end{proof}

The \textbf{clopen} subsets of a topological space
are the subsets that are both closed and open.
The following, based originally on \cite{MR1501865},
is an analogue of Cayley's Theorem for groups 
(page \pageref{thm:Cay}) 
and Theorem~\ref{thm:x-lambda_x} for associative rings 
(page~\pageref{thm:x-lambda_x}).

\begin{theorem}[Stone Representation Theorem for Boolean Rings]%
\label{thm:Stone}
Suppose $R$ is a Boole\-an ring.
\begin{compactenum}
\item 
By the Prime Ideal Theorem\PI,
the Boolean ring $R$ 
embeds in the Boolean ring $\pow{\spec}$ under the map
  \begin{equation}\label{eqn:x|->p}
    x\mapsto\{\primei\in\spec\colon x\notin\primei\}.
  \end{equation}
\item
This map is $x\mapsto\var{1+x}$.
\item
The image of this map is the set of clopen subsets of $\spec$. 
\end{compactenum}
\end{theorem}

\begin{proof}
  \begin{sloppypar}
The map in \eqref{eqn:x|->p} 
is part of the commutative diagram in Figure~\ref{fig:Stone}.%
\begin{figure}[ht]
\centering
\makebox[0pt][c]{
\begin{math}
  \xymatrix@=5.9cm@!{
R
\ar[r]_{x\mapsto\{\primei\in\spec\colon x\notin\primei\}}
\ar[d]^(0.3){x\mapsto(x+\primei\colon\primei\in\spec)}
&\pow{\spec}\\
\displaystyle\prod_{\primei\in\spec}R/\primei
\ar[r]^{(e_{\primei}+\primei\colon\primei\in\spec)
\mapsto(e_{\primei}\colon\primei\in\spec)}
\ar[ur]|{x\mapsto\supp x}
&\F_2{}^{\spec}
\ar[u]^(0.3){x\mapsto\supp x}
}
\end{math}}  
  \caption{Stone Representation Theorem}\label{fig:Stone}
  
\end{figure}
We can spell out the details as follows.
By Theorem \ref{thm:Boole} (page \pageref{thm:Boole}),
for each $\primei$ in $\spec$, 
the quotient $R/\primei$ is isomorphic to the field $\F_2$,
and so $\prod_{\primei\in\spec}R/\primei$ is isomorphic to $\F_2{}^{\spec}$.
The inverse of this isomorphism is easier to write down: it is
\begin{equation*}
  (e_{\primei}\colon\primei\in\spec)
\mapsto(e_{\primei}+\primei\colon\primei\in\spec).
\end{equation*}
The power $\F_2{}^{\spec}$ 
is in turn isomorphic to $\pow{\spec}$ under $x\mapsto\supp x$ 
by Theorem~\ref{thm:pow} (page \pageref{thm:pow}).
Then $x\mapsto\supp x$ is also an isomorphism 
from $\prod_{\primei\in\spec}R/\primei$ to $\pow{\spec}$.
Preceding this with the embedding of $R$ in $\prod_{\primei\in\spec}R/\primei$ 
given by the last theorem, we obtain the map in \eqref{eqn:x|->p}.
  \end{sloppypar}

By Theorem~\ref{thm:spec-top} (page \pageref{thm:spec-top}),
this map is $x\mapsto\var{1+x}$,
and all of the sets $\var x$ are clopen.
Conversely, suppose a closed subset $F$ of $\spec$ is also open.
We have
\begin{equation*}
F=\bigcap_{x\in I}\var x,
\end{equation*}
where $I=\bigcap F$.
Being a closed subset of a compact space,
the complement of $F$ in $\spec$ is compact.
Therefore $I$ has a finite subset $\{x_0,\dots,x_{n-1}\}$ such that
\begin{gather*}
  F=\var{x_0}\cap\dots\cap\var{x_{n-1}}=\var{x_0\dotsm x_{n-1}},\\
  \begin{aligned}
    \spec\setminus F
&=\var{1+x_0}\cup\dots\cup\var{1+x_{n-1}}\\
&=\var{(1+x_0)\dotsm(1+x_{n-1})},
  \end{aligned}\\
F=\var{1+(1+x_0)\dotsm(1+x_{n-1})}.\qedhere
\end{gather*}
\end{proof}

We shall see this theorem in another form
as Theorem \ref{thm:Stone2} (page \pageref{thm:Stone2}).
Meanwhile, for an arbitrary commutative ring $R$,
since each quotient $R/\primei$ is an integral domain,
it will be seen to embed in a field (see page~\pageref{qf}),
and so, by Theorem~\ref{thm:red-emb}, 
every reduced ring will embed in a product of fields.


\section{Localization}\label{sect:loc}

It will be useful now to generalize the construction of $\Q$ from $\Z$
that is suggested by Theorem~\ref{thm:Z->Q} (page~\pageref{thm:Z->Q}).
A subset of a commutative ring 
is called \textbf{multiplicative}\index{multiplicative} 
if it is nonempty and closed under multiplication.  
For example, $\Z\setminus\{0\}$ is a multiplicative subset of $\Z$,
and more generally, we have the following.

\begin{theorem}
  An ideal $\mathfrak p$ of a commutative ring $R$ is prime 
if and only if the complement $R\setminus\mathfrak p$ is multiplicative.
\end{theorem}

For example, by Theorem \ref{thm:zero-div-prime} 
(page \pageref{thm:zero-div-prime}),
the elements of a nontrivial commutative ring
that are neither $0$ nor zero-divisors
compose a multiplicative subset.
Other examples of multiplicative subsets of a commutative ring $R$
are $\{1\}$ and and $\units R$.
However, the complements of prime ideals 
are the only examples of multiplicative subsets that will interest us.

\begin{lemma}
If $S$ is a multiplicative subset of a commutative ring $R$, 
then on $R\times S$ there is an equivalence relation $\sim$ given by
\begin{equation}\label{eqn:q}
  (a,b)\sim (c,d)\iff (ad-bc)\cdot e=0\text{ for some $e$ in }S.
\end{equation}
\end{lemma}

\begin{proof}
Reflexivity and symmetry are obvious.  
For transitivity, note that, 
if $(a,b)\sim(c,d)$ and $(c,d)\sim(e,f)$, 
so that, for some $g$ and $h$ in $S$,
\begin{align*}
0&=(ad-bc)g=adg-bcg,&0&=(cf-de)h=cfh-deh,
\end{align*}
then $(a,b)\sim(e,f)$ since
\begin{align*}
(af-be)cdgh
&=afcdgh-becdgh\\
&=adgcfh-bcgdeh
=bcgcfh-bcgcfh=0.\qedhere
\end{align*}
\end{proof}

In the notation of the lemma, 
the equivalence class of the element $(a,b)$ of $R\times S$ 
is denoted by one of
\begin{align*}
  &a/b,&
  &\frac ab,
\end{align*}
and the quotient $(R\times S)\modsim$ is denoted by one of
\begin{align*}
  &S\inv R,&&R[S\inv].
\end{align*}
If $0\in S$, then $S\inv R$ has exactly one element, which is $0/0$.  
If $R$ is an integral domain and $0\notin S$, 
then the relation $\sim$ in the theorem is given simply by
\begin{equation*}
  (a,b)\sim(c,d)\iff ad=bc.
\end{equation*}
However, we shall be interested in commutative rings
that are not integral domains.

\begin{theorem}\label{thm:loc}
  Suppose $R$ is a commutative ring with multiplicative subset $S$.
  \begin{compactenum}
  \item 
  In $S\inv R$, if $c\in S$,
  \begin{equation*}
\frac ab=\frac{ac}{bc}.
\end{equation*}
  \item
  $S\inv R$ is a commutative ring
in which the operations are given by
\begin{align*}
  \frac ab\cdot\frac cd&=\frac{ac}{bd},&
  \frac ab\pm\frac cd&=\frac{ad\pm bc}{bd}.  
\end{align*}
  \item
There is a ring-homomorphism $\phi$ from $R$ to $S\inv R$ where, 
for every $a$ in $S$,
  \begin{equation*}
\phi(x)=\frac{xa}a.
\end{equation*}
In particular, if $1\in S$, then $\phi(x)=x/1$.
\item
The homomorphism $\phi$ is injective if and only if 
$S$ contains neither $0$ nor zero-divisors.
\newcounter{local}
\setcounter{local}{\value{enumi}}
  \end{compactenum}
   Suppose in particular $R$ is an integral
domain and $0\notin S$.  
\begin{compactenum}
\setcounter{enumi}{\value{local}}
\item
$S\inv R$ is an integral domain (and $\phi$ is an embedding).
\item
If $S=R\setminus\{0\}$, then $S\inv R$ is a field,
and if $\psi$ is an embedding of $R$ in a field $K$, 
then there is an embedding $\tilde{\psi}$ of $S\inv R$ in $K$ 
such that $\tilde{\psi}\circ\phi=\psi$.
(See Figure \ref{fig:qf}.)
\begin{figure}[ht]
\begin{equation*}
\xymatrix@!{
R\ar[r]^{\psi}\ar[d]_{\phi}&K\\
S\inv R\ar@{.>}[ur]_{\tilde{\psi}}&
}
\end{equation*}
\caption{The universal property of the quotient field}\label{fig:qf}
\end{figure}
\end{compactenum}
\end{theorem}

\begin{corollary}\label{cor:ID}
  A commutative ring is an integral domain
if and only if it is a subring of a field.
\end{corollary}

See page \pageref{id} for a model-theoretic consequence of the corollary.

When $S$ is the complement of a prime ideal $\primei$, 
then $S\inv R$ is called 
the \textbf{localization}\index{local!---ization} of $R$ at $\primei$  
and can be denoted by
\begin{equation*}
  R_{\primei}.
\end{equation*}
If $R$ is an integral domain, so that $(0)$ is prime, 
then localization $R_{(0)}$ (which is a field by the theorem)
is the \textbf{quotient-field}\label{qf}%
\index{quotient!--- field}\index{field!quotient ---} 
of $R$. 
In this case, the last part of the theorem 
describes the quotient field 
in terms of a \emph{universal property} 
in the sense of page~\pageref{up}.
However, it is important to note that,
if $R$ is not an integral domain,
then the homomorphism $x\mapsto x/1$ from $R$ to $R_{\primei}$
might not be an embedding.
The following will be generalized as Theorem~\ref{thm:reg-quot-loc}
(page~\pageref{thm:reg-quot-loc}).

\begin{theorem}\label{thm:Br-quot-loc}
For every Boolean ring $R$,
for every $\primei$ in $\spec$,
the homomorphism
\begin{equation*}
  x\mapsto\frac x1
\end{equation*}
% $x\mapsto x/1$ 
from $R$ to $R_{\primei}$
is surjective and has kernel $\primei$.
Thus
\begin{equation*}
R_{\primei}\cong R/\primei
\end{equation*}
(which is isomorphic to $\F_2$ by Theorem \ref{thm:Boole},
page \pageref{thm:Boole}).
\end{theorem}

\begin{proof}
  If $a\in R$ and $b\in R\setminus\primei$,
then $a/b=a/1$ since $(a-ab)\cdot b=0$.
Thus $x\mapsto x/1$ is surjective.
If $a\in\primei$,
then $1+a\in R\setminus\primei$, and $a\cdot(1+a)=0$,
so $a/1=0/1$.
Thus the kernel of $x\mapsto x/1$ includes $\primei$.
Therefore the kernel must \emph{be} $\primei$,
since this ideal is maximal by Theorem \ref{thm:Boole},
and $R_{\primei}$ is not trivial.
\end{proof}

A \textbf{local ring}\index{ring!local ---}\index{local!--- ring}
is a commutative ring with a unique maximal ideal.  
The connection between localizations and local rings 
is made by the theorem below.

\begin{lemma}
An ideal $\maxi$ of a commutative ring $R$ is a unique maximal ideal of $R$
if and only if
\begin{equation*}
 \units R=R\setminus\maxi. 
\end{equation*}
\end{lemma}

\begin{theorem}\label{thm:local-ring}
The localization $R_{\primei}$ of a commutative ring $R$ at a prime ideal $\primei$ 
is a local ring
whose unique maximal ideal is
\begin{equation*}
\primei R_{\primei},
\end{equation*}
namely the ideal generated by the image of $\primei$.
\end{theorem}

\begin{proof}
The ideal $\primei R_{\primei}$ consists of those $a/b$ such that $a\in\primei$.  
In this case, if $c/d=a/b$, then $cb=da$, which is in $\primei$, 
so $c\in\primei$ since $\primei$ is prime and $b\notin\primei$.  
Hence for all $x/y$ in $R_{\primei}$,
\begin{align*}
x/y\notin R_{\primei}\primei
&\iff x\notin\primei\\
&\iff x/y\text{ has an inverse, namely }y/x.
\end{align*}
By the lemma, we are done.
\end{proof}

We can now refer to $R_{\mathfrak p}$ (where $\mathfrak p$ is prime) 
as the local ring of $R$ at $\mathfrak p$.  
%A reason for the terminology will be seen in algebraic geometry.


\section{Regular rings}\label{sect:vN}

By Theorem~\ref{thm:Boole} (page~\pageref{thm:Boole}),
the Boolean rings are commutative rings whose prime ideals are maximal.
There is a larger class of commutative rings whose prime ideals are maximal.
Indeed, by the Stone Representation Theorem (page~\pageref{thm:Stone}),
every Boolean ring embeds in a power set $\pow{\Omega}$
and hence in a power $\F_2{}^{\Omega}$.
This power is a special case of the direct product $\prod_{i\in\Omega}K_i$,
where each $K_i$ is a field.
For every $x$ in the ring $\prod_{i\in\Omega}K_i$ 
there is $y$ in the ring such that
\begin{equation*}
xyx=x.
\end{equation*}
Indeed, we can just let $y$ be $x^*$,
defined as on page~\pageref{x^*}.
Therefore the ring $\prod_{i\in\Omega}K_i$ is called 
a \textbf{(von Neumann) regular ring.}%%%%%
\footnote{In general, a regular ring need not be commutative; 
see \cite[IX.3, ex.~5, p.~442]{MR600654}.}
Thus Boolean rings are also regular rings in this sense, 
since in a Boolean ring
\begin{equation*}
  x\cdot1\cdot x=x.
\end{equation*}
A regular ring can also be understood as a ring in which, for all $x$,
\begin{equation*}
  x\in(x^2).
\end{equation*}
We have the following easily.

\begin{theorem}\label{thm:reg-red}
  Every regular ring is reduced.
\end{theorem}

\begin{proof}
  Suppose $R$ is regular and $x^2=0$.
But $x=x^2y$ for some $y$, and so $x=0$.
\end{proof}

We can establish the following generalization 
of the first part of Theorem~\ref{thm:Boole} (page \pageref{thm:Boole}).

\begin{theorem}\label{thm:reg-pr-max}
In regular rings, all prime ideals are maximal.
\end{theorem}

\begin{proof}
If $R$ is a regular ring, and $\primei$ is a prime ideal,
then for all $x$ in $R$, for some $y$ in $R$,
\begin{equation*}%\label{eqn:xy-1}
(xy-1)\cdot x=0,  
\end{equation*}
and so at least one of $xy-1$ and $x$ is in $\primei$.  
Hence if $x+\primei$ is not $0$ in $R/\primei$,
then $x+\primei$ has the inverse $y+\primei$. 
Thus $R/\primei$ is a field, so $\primei$ is maximal.
\end{proof}

We now generalize Theorem \ref{thm:Br-quot-loc} 
(page \pageref{thm:Br-quot-loc}).

\begin{theorem}\label{thm:reg-quot-loc}
\sloppy
For every regular ring $R$,
for every $\primei$ in $\spec$,
the homomorphism
\begin{equation*}
  x\mapsto\frac x1
\end{equation*}
% $x\mapsto x/1$ 
from $R$ to $R_{\primei}$
is surjective and has kernel $\primei$.
Thus
\begin{equation*}
R_{\primei}\cong R/\primei.
\end{equation*}
\end{theorem}

\begin{proof}
If $a\in R$ and $b\in R\setminus\primei$, 
and $bcb=b$,
then the elements $a/b$ and $ac/1$ of $R_{\primei}$ are equal since
\begin{equation*}
(a-bac)b=ab-abcb=ab-ab=0.
\end{equation*}
Thus the homomorphism $x\mapsto x/1$ from $R$ to $R_{\primei}$
guaranteed by Theorem~\ref{thm:loc} is surjective.
By the last theorem, $\primei$ is maximal,
and hence $R_{\primei}$ is a field.
As in that theorem,
supposing $x\in\primei$,
we have 
\begin{equation*}
(xy-1)\cdot x=0 
\end{equation*}
%\eqref{eqn:xy-1} 
for some $y$, but $1-xy\notin\primei$.
This shows $x/1=0/1$.
Thus the kernel of $x\mapsto x/1$ includes $\primei$.
Having a prime ideal, $R$ is not the trivial ring, 
so $R_{\primei}$ is not trivial,
and thus the kernel of $x\mapsto x/1$ cannot be all of $R$.
Therefore the kernel is $\primei$, since this is a maximal ideal.
\end{proof}

The foregoing three theorems turn out to \emph{characterize} regular rings.
That is, every ring of which the conclusions of these theorems hold 
must be regular.
In fact a somewhat stronger statement is true;
this is the next theorem below.

For any commutative ring $R$, the ideal $\surd(0)$ consists precisely of the nilpotent elements of $R$ and is according called the \textbf{nilradical} of $R$.
By Theorem~\ref{thm:rad} (page~\pageref{thm:rad}),
\begin{equation*}
  \surd(0)=\bigcap\spec.
\end{equation*}
By Theorem~\ref{thm:rad-reduced} (page~\pageref{thm:rad-reduced}),
this ideal is just $(0)$ if and only if $R$ is reduced.

\begin{theorem}\label{thm:reg-eq}
By the Maximal Ideal Theorem\ac,
the following are equivalent conditions on a ring $R$.%%%%%
\footnote{The equivalence of these conditions 
is part of \cite[Thm~1.16, p.~7]{MR533669}.
This theorem adds a fourth equivalent condition:
\enquote{All simple $R$-modules are injective.}
The proofs given involve module theory,
except the proof that, 
if all prime ideals are maximal, 
and the ring is reduced, 
then each localization at a maximal ideal is a field.
That proof is reproduced below.}
%%%%%%%%%%%%%%%%%%%%
\begin{compactenum}
\item\label{item:reg}
$R$ is regular.
\item\label{item:p-max,red}
Every prime ideal of $R$ is maximal, and $R$ is reduced.
\item\label{item:loc-field}
The localization $R_{\maxi}$ is a field for all maximal ideals $\maxi$ of $R$.
\end{compactenum}
\end{theorem}

\begin{proof}
  \begin{asparaenum}
    \item
We have established \eqref{item:reg}$\lto$\eqref{item:p-max,red} 
in Theorems~\ref{thm:reg-red} and~\ref{thm:reg-pr-max}. 
\item
We prove \eqref{item:p-max,red}$\lto$\eqref{item:loc-field}.
Suppose every prime ideal of $R$ is maximal, and $R$ is reduced.
Let $\maxi$ be a maximal ideal of $R$.
By Theorem~\ref{thm:local-ring} (page~\pageref{thm:local-ring}),
$\maxi R_{\maxi}$ is the unique maximal ideal of $R_{\maxi}$.
By Zorn's Lemma,\ac\
every prime ideal $\mathfrak P$ of $R_{\maxi}$ 
is included in a maximal ideal;
but then this must be $\maxi R_{\maxi}$.
Now, the intersection $\maxi R_{\maxi}\cap R$ 
is a proper ideal of $R$ that includes $\maxi$, so it is $\maxi$.
Hence $\mathfrak P\cap R$ is a prime ideal of $R$ that is included in $\maxi$,
so it is $\maxi$, and therefore $\mathfrak P=\maxi R_{\maxi}$.
Thus this maximal ideal is the unique prime ideal of $R_{\maxi}$.
This ideal is therefore $\bigcap\spec[R_{\maxi}]$,
which is the nilradical of the ring.
Thus for all $r/s$ in $\maxi R_{\maxi}$, for some $n$ in $\N$,
we have $(r/s)^n=0$, so $r^n/s^n=0$, 
and therefore $tr^n=0$ for some $t$ in $R\setminus\maxi$.
In this case, $(tr)^n=0$, so $tr=0$, and therefore $r/s=0$.
In short, $\maxi R_{\maxi}=(0)$.
Therefore $R_{\maxi}$ is a field.
\item
Finally, we show \eqref{item:loc-field}$\lto$\eqref{item:reg}.
Suppose $R_{\maxi}$ is a field for all maximal ideals $\maxi$ of $R$.
If $x\in R$, define
\begin{equation*}
  I=\{r\in R\colon rx\in(x^2)\}.
\end{equation*}
This is an ideal of $R$ containing $x$.
We shall show that it contains $1$.
We do this by showing that it is not included in any maximal ideal $\maxi$.
If $x\notin\maxi$, then, since $x\in I$, we have $I\nincluded\maxi$.
If $x\in\maxi$, then $x/1\notin\units{(R_{\maxi})}$, 
so, since $R_{\maxi}$ is a field,
we have $x/1=0/1$, and hence
\begin{equation*}
  rx=0
\end{equation*}
for some $r$ in $R\setminus\maxi$; but $r\in I$.
Again $I\nincluded\maxi$.
Thus $I$ must be $(1)$, so $x\in(x^2)$.
Therefore $R$ is regular.\qedhere
  \end{asparaenum}
\end{proof}

We again consider the regular rings that are products $\prod\mathscr K$,
where $\mathscr K$ is an indexed family $(K_i\colon i\in\Omega)$ of fields.
Here we have $xx^*x=x$ when $x^*$ is defined 
as in \eqref{eqn:*} on page~\pageref{eqn:*}.
Hence every sub-ring of $\prod\mathscr K$ 
that is closed under the operation $x\mapsto x^*$ is also a regular ring.

We now prove the converse:
every regular ring is isomorphic to a sub-ring, 
closed under $x\mapsto x^*$, of a product of fields.
Since regular rings are reduced (Theorem~\ref{thm:reg-red}),
the homomorphism
\begin{equation}\label{eqn:x|->x+p2}
 x\mapsto\bigl(x+\primei\colon\primei\in\spec\bigr) 
\end{equation}
from $R$ to $\prod_{\primei\in\spec}R/\primei$ 
(given also in \eqref{eqn:x|->x+p}, page \pageref{eqn:x|->x+p})
is an embedding by Theorem~\ref{thm:red-emb} (page \pageref{thm:red-emb}).
Moreover, the quotients $R/\primei$ are fields 
by Theorem~\ref{thm:reg-pr-max} 
(and Theorem~\ref{thm:max-field}, page~\pageref{thm:max-field}).


\begin{theorem}\label{thm:reg-prod}
For every regular ring $R$, the image of the embedding in~\eqref{eqn:x|->x+p2}
of $R$ in the product $\prod_{\primei\in\spec}R/\primei$ of fields
is closed under $x\mapsto x^*$.
\end{theorem}

\begin{proof}
Let the embedding be called $f$.
Given $x$ in $R$, we have to show that $f(x)^*$ is in the image of $f$.
Now, there is $y$ in $R$ such that $xyx=x$, and therefore
\begin{equation*}
  f(x)f(y)f(x)=f(x).
\end{equation*}
For each $\primei$ in $\spec$, 
by applying the coordinate projection $\uppi_{\primei}$, we obtain
\begin{equation*}
  (x+\primei)(y+\primei)(x+\primei)=x+\primei.
\end{equation*}
If $x+\primei\neq0$, we can cancel it, obtaining
\begin{equation*}
y+\primei=(x+\primei)\inv=(x+\primei)^*.
\end{equation*}
However, possibly $x+\primei=0$, while $y+\primei\neq0$,
so that $f(y)\neq f(x)^*$.
In this case, letting $z=yxy$, we have
\begin{gather*}
  xzx=xyxyx=xyx=x,\\
zxz=yxyxyxy=yxyxy=yxy=z.
\end{gather*}
In short, $xzx=x$ and $zxz=z$.
Then
\begin{align*}
x\in\primei&\iff z\in\primei,&
x\notin\primei&\implies z+\primei=(x+\primei)\inv,
\end{align*}
so $(x+\primei)^*=z+\primei$.
Thus $f(z)=f(x)^*$.
\end{proof}

\section{Products of spaces}

Being a group by Theorem \ref{thm:group-prod},
the direct product of a family $(A_i\colon i\in\Omega)$ of groups is nonempty.
Indeed, it contains the identity $(1^{A_i}\colon i\in\Omega)$.
(This is true, even if $\Omega$ is empty.)
If each $A_i$ is merely a nonempty \emph{set,}
we define their \textbf{Cartesian product} 
in the same way as a direct product of groups or rings.
However, it is not obvious that the product of a family of nonempty sets
will itself be nonempty.

\begin{theorem}[Cartesian Product]
By the Axiom of Choice\ac,
the product of an indexed family of nonempty sets is nonempty.
\end{theorem}

\begin{theorem}
  The Cartesian Product Theorem implies the Axiom of Choice.
\end{theorem}

If now $\mathscr A$ is a family $(A_i\colon i\in\Omega)$ of topological spaces,
the \textbf{product topology} 
on the Cartesian product $\prod\mathscr A$
is the weakest topology in which 
the coordinate projections are \textbf{continuous.}
This means that for every $j$ in $\Omega$,
for every closed subset $F$ of $A_j$,
the subset $\{x\in\prod\mathscr A\colon x_j\in A_j\}$ of $\prod\mathscr A$ 
must be closed;
and such subsets compose a sub-basis of the product topology.
Thus all finite unions of such sets are closed,
and such sets compose a \emph{basis} of the product topology,
so that all intersections of arbitrary collections of such unions are closed,
and no other subsets of $\prod\mathscr A$ are closed.

\begin{theorem}[Tychonoff]\label{thm:Tychonoff}
By the Axiom of Choice\ac,
the product of a family of nonempty compact topological spaces 
is nonempty and compact in the product topology.
\end{theorem}

\begin{proof}
  Suppose $\mathscr A$ is a family $(A_i\colon i\in\Omega)$
of nonempty compact topological spaces,
and $\mathscr X$ is a family of closed subsets of $\prod\mathscr A$
whose every finite subset has nonempty intersection.
We want to show $\bigcap\mathscr X\neq\emptyset$.
Each element of $\mathscr X$ 
is the intersection of sets belonging to the basis just described;
so we may assume that each element of $\mathscr X$ belongs to this basis.
Moreover, suppose $F\in\mathscr X$,
and $F$ is a union $F_0\cup\cdots\cup F_n$ 
of sets from the sub-basis just described.
Then for some $i$ in $n+1$, 
$F_i$ has nonempty intersection with each element of $\mathscr X$.
In this case, we can replace $F$ with $F_i$ in $\mathscr X$.

Using the Axiom of Choice then,
we may assume that every element of $\mathscr X$ is a sub-basic set.
One way to spell this out is as follows
(we shall see a neater way later).
We have noted that the topology on $\prod\mathscr A$ has,
as a sub-basis, the sets $\uppi_i{}\inv[F]$,
where $i\in\Omega$ and $F$ is a closed subset of $A_i$.
Then the topology on $\prod\mathscr A$ has, as a \emph{basis,}
the sets
\begin{equation*}
  \uppi_{\sigma(0)}{}\inv[F_0]\cup\dots\cup\uppi_{\sigma(n)}{}\inv[F_n],
\end{equation*}
where $n\in\upomega$,
and $\sigma$ is an \emph{injective} function from $n+1$ into $\Omega$,
and each $F_i$ is a closed subset of $A_{\sigma(i)}$.
Now consider the family of subsets $\mathscr Y$ of $\prod\mathscr A$
that have the finite intersection property,
while each element is either an element of $\mathscr X$
or else an element of a finite set of sub-basic sets 
whose union is in $\mathscr X$.
The family is ordered in an obvious way,
so that $\mathscr Y_0<\mathscr Y_1$ if and only if
each element of $\mathscr Y_1$ is either an element of $\mathscr Y_0$
or else an element of a finite set of sub-basic sets 
whose union is in $\mathscr Y_0$.
Suppose we are given a chain of the family,
and $\mathscr Y$ belongs to the chain.
Then the chain has an upper bound 
consisting of each sub-basic set that belongs to $\mathscr Y$,
as well as, 
for each union $F_0\cup\cdots\cup F_n$ of sub-basic sets in $\mathscr Y$,
either this union itself, if it belongs to every member of the chain,
or else $F_i$, if this belongs to some member of the chain.
By Zorn's Lemma (more precisely, its corollary),
our family has a maximal element.
By what we noted, 
this maximal element must consist precisely of sub-basic sets.

We may thus assume that every set in $\mathscr X$ 
is a nonempty sub-basic closed set.
Then, by the compactness of each $A_i$,
we may assume that, for some indexed family $(F_i\colon i\in\Omega)$,
each $F_i$ being a nonempty subset of $A_i$,
\begin{equation*}
  \mathscr X=\{\uppi_i{}\inv[F_i]\colon i\in\Omega\}.
\end{equation*}
Then $\bigcap\mathscr X=\prod_{i\in\Omega}F_i$,
which is nonempty by the Cartesian Product Theorem.
\end{proof}

The converse was published by Kelley in 1950 \cite{MR0039982}:%%%%%
\footnote{Actually Kelley's proof had an error,
which however is easily corrected,
as \L o's and Ryll-Nardzewski observed in 1951 \cite{MR0048795}.
Kelley's error reduced his claim to being that
the Tychonoff Theorem for sets in which the one-element sets
compose a basis for the topology implies the Axiom of Choice.
Schechter \cite{MR2213624} shows that this hypothesis
is equivalent to the Boolean Prime Ideal Theorem.}

\begin{theorem}\label{thm:tych-ac}
  The Tychonoff Theorem implies the Axiom of Choice.
\end{theorem}

\begin{proof}
  Let $(A_i\colon i\in\Omega)$ be an indexed family of nonempty sets,
and let $b$ not belong to any $A_i$.
If $i\in\Omega$, let
\begin{equation*}
  \tau_i=\{\emptyset,A_i,A_i\cup\{b\}\};
\end{equation*}
this is a topology on $A_i\cup\{b\}$.
Every finite subset of the family
\begin{equation*}
\bigl\{\uppi_i{}\inv[A_i]\colon i\in\Omega\bigr\}
\end{equation*}
of closed subsets $\prod_{i\in\Omega}(A_i\cup\{b\})$
has nonempty intersection.
Indeed, by induction, for every $n$ in $\upomega$,
for every subset $\Omega_0$ of size $n$,
we have
\begin{equation*}
  \prod_{i\in\Omega_0}A_i\neq\emptyset,
\end{equation*}
so the product contains some $(a_i\colon i\in\Omega)$.
Then $\bigcap_{i\in\Omega_0}\uppi_i{}\inv[A_i]$ contains $c$, where
\begin{equation*}
  c_i=
  \begin{cases}
    a_i,&\text{ if }i\in\Omega_0,\\
b,&\text{ if }i\in\Omega\setminus\Omega_0.
  \end{cases}
\end{equation*}
By the Tychonoff Theorem,
$\bigcap_{i\in\Omega}\uppi_i{}\inv[A_i]$ must be nonempty;
but this intersection is $\prod_{i\in\Omega}A_i$.
\end{proof}

\chapter{Model theory without the Prime Ideal Theorem}\label{ch:MT}

Model theory was described on page \pageref{MTh}
as the study of structures as such.
More precisely, model theory takes into account
the \emph{logic} in which the properties of structures are stated and derived.
Usually this logic is \emph{first order} logic,
which means its variables stand for individual elements 
of the universe of a structure.
Second order logic has variables for relations on the universe.
For example, the induction axiom for the natural numbers 
(page \pageref{ax:ind}) is a second order statement,
when considered as a statement about elements and subsets of $\N$.
When considered as a part of Theorem \ref{thm:Peano} (page \pageref{thm:Peano}),
where it is a statement about all sets and in particular the set $\upomega$,
it is first order.  Indeed, for set theory itself,
there is no distinction between first and second order.

In a logic, certain strings of symbols are called \emph{formulas,}
and some formulas can be combined to make other formulas.
If only finitely many formulas can ever be combined to make new formulas,
the logic is \emph{finitary.}
First order logic is implicitly finitary.
(By \enquote{implicitly} I mean that the finitary aspect 
is not made explicit in the name \enquote{first order}.
One can develop infinitary logics 
in which variables stand only for individuals.)

The most important theorem of first order model theory 
is the Compactness Theorem.
Its proof needs the Prime Ideal Theorem.
However, a lot of model theory can be developed without Compactness.
Indeed, in Hodges's encyclopedic volume 
\emph{Model Theory} \cite{{MR94e:03002}},
Compactness is introduced only in the sixth of the 12 chapters.

The present chapter of the present text
develops what we shall need of model theory
that does not require the Prime Ideal Theorem.
Compactness and related results that do require the Prime Ideal Theorem
and even the full Axiom of Choice
will be established in the next chapter.

\section{Logic}

For study of arbitrary structures 
as defined in \S\ref{sect:structures} (page \pageref{sect:structures}),
we now generalize the logic developed for set theory 
in \S\ref{sect:sets} (page \pageref{sect:sets}).
This logic was based on the signature whose only symbol is $\in$.
However, we allowed constants standing for arbitrary sets:
we had to do this in order to define truth and falsity of sentences 
(as on page \pageref{truth-sets}).

Likewise, given a structure $\str A$ with signature $\sig$,
we may augment $\sig$ with a constant for each element of $A$.
If we denote the augmented signature by $\sig(A)$,
then we can \emph{expand} $\str A$ (in the sense of page \pageref{reduct}) 
in an obvious way 
to a structure denoted by\label{A_A}
\begin{equation*}
 \str A_A, 
\end{equation*}
whose signature is $\sig(A)$:
each $a$ in $A$, considered as a constant in $\sig(A)$,
is interpreted in $\str A_A$ as the element $a$ of $A$.

\subsection{Terms}

In the logic of set theory, a \emph{term} is a variable or constant.
In the logic of an arbitrary signature $\sig$, 
there might be $n$-ary operation symbols for positive $n$, 
and so the definition of \textbf{term} is broader and is made recursively.
We start with a countably infinite set of variables.
\begin{compactenum}
\item 
Each variable is a term of $\sig$.
\item
For each $n$ in $\upomega$, if $F$ is an $n$-ary operation symbol in $\sig$,
and $(t_i\colon i\in n)$ is an indexed family of terms of $\sig$,
then the string
\begin{equation*}
 Ft_0\cdots t_{n-1} 
\end{equation*}
is a term of $\sig$.
\end{compactenum}
As a special case of the second condition, 
every constant in $\sig$ as a term.
Thus, if we omit the first condition,
we still have a nontrivial definition,
at least if $\sig$ contains constants.
What we have then is the definition of a \textbf{closed term:}
a term without variables.

There is an analogue of the lemma on page~\pageref{lem:init-seg}:

\begin{theorem}\label{thm:init-seg}
  No proper initial segment of a term is a term.
\end{theorem}

Then we obtain the analogue
of Theorem~\ref{thm:ur} (page \pageref{thm:ur}):

\begin{theorem}[Unique Readability]\label{thm:term-ur}
  A term can be constructed in only one way:
If $Ft_0\cdots t_{n-1}$ and $Fu_0\cdots u_{m-1}$ are the same term,
where the $t_i$ and $u_i$ are terms, 
then $n=m$, and each $t_i$ is the same term as $u_i$.
\end{theorem}

Informally, if $F$ is a binary operation symbol, 
and $G$ is a singulary operation symbol,
and $t$ and $u$ are terms,
then for the terms $Ftu$ and $Gt$ we may write, respectively,
\begin{align*}
  (t&\mathbin Fu),&&t^G.
\end{align*}

If $t$ is a closed term of $\sig$,
and $\str A\in\Str$,
then $t$ has an interpretation
\begin{equation*}
  t^{\str A}
\end{equation*}
in $\str A$, and this interpretation is an element of $A$.
The definition is recursive, like the definition of closed terms themselves;
and the definition is justified by Theorem~\ref{thm:term-ur}.
If $t$ is $Ft_0\cdots t_{n-1}$, where each $t_i$ is a closed term, then
\begin{equation*}
  t^{\str A}=F^{\str A}(t_0{}^{\str A},\dots,t_{n-1}{}^{\str A}).
\end{equation*}
This covers the special case where $t$ is a constant, so that $n=0$.

We define the interpretation of an arbitrary term $t$ of $\sig$ as follows.
Let us denote by
\begin{equation*}
  \vrbl t
\end{equation*}
the set of variables occurring in $t$.
For each $\str A$ in $\Str$,
if $\bm a$ is the tuple $(a_x\colon x\in\vrbl t)$ in $A^{\vrbl t}$,
we obtain the closed term
\begin{equation*}
  t(\bm a)
\end{equation*}
of $\sig(A)$ by replacing each occurrence of $x$ in $t$ 
with the constant $a_x$,
for each $x$ in $\vrbl t$.
Then we can denote by
\begin{equation*}
  t^{\str A}
\end{equation*}
the function $\bm a\mapsto t(\bm a)^{\str A_A}$ from $A^{\vrbl t}$ to $A$.

We defined \emph{homomorphisms} on page~\pageref{hom}.
Given the recursive definition of terms,
we have the following by induction:

\begin{theorem}\label{thm:term-hom}
Suppose $\str A$ and $\str B$ are in $\Str$,
If $h\colon\str A\to\str B$,
then for each term $t$ of $\sig$ and each $\bm a$ from $A^{\vrbl t}$,
\begin{equation*}
  h(t^{\str A}(\bm a))=t^{\str B}(h(\bm a)).
\end{equation*}
\end{theorem}

The converse fails if $\sig$ has predicates.
For this case, we consider \emph{atomic formulas.}

\subsection{Atomic formulas}

In our logic of set theory,
an \emph{atomic formula} is just a string $t\in u$,
where $t$ and $u$ are terms.
We introduced the expression $t=u$
as an abbreviation of a certain formula.
However, we shall now count this as one of the atomic formulas.
Thus, for an arbitrary signature $\sig$,
the \textbf{atomic formulas} are of two kinds:
\begin{equation*}
  t=u,
\end{equation*}
where $t$ and $u$ are terms of $\sig$,
and
\begin{equation*}
  Rt_0\cdots t_{n-1},
\end{equation*}
for each $n$ in $\upomega$,
where $(t_i\colon i<n)$ is an indexed family of terms of $\sig$,
and $R$ is an $n$-ary predicate of $\sig$.
If $R$ is a binary predicate of $\sig$, 
and $t$ and $u$ are terms,
then for the term $Rtu$ we may write
\begin{equation*}
  t\mathrel Ru.
\end{equation*}
It is an obvious consequence of Theorem~\ref{thm:init-seg} 
that atomic formulas are uniquely readable.

An atomic formula in which no variable occurs%
---an atomic formula in which the terms are closed%
---is an \textbf{atomic sentence.}
If $\str A\in\sig$,
then every atomic sentence of $\sig(A)$ 
is \textbf{true} or \textbf{false} in $\str A$
according to the obvious definition:
\begin{compactenum}
  \item
$t=u$ is true in $\str A$ if and only if
    \begin{equation*}
     t^{\str A_A}=u^{\str A_A}. 
    \end{equation*}
\item
$Rt_0\cdots t_{n-1}$ is true in $\str A$ if and only if
  \begin{equation*}
    (t_0{}^{\str A_A},\dots,t_{n-1}{}^{\str A_A})\in R^{\str A}.
  \end{equation*}
\end{compactenum}
If $\sigma$ is an atomic sentence that is true in $\str A$, 
we may write\label{models}
\begin{equation*}
  \str A\models\sigma.
\end{equation*}

If $\phi$ is an atomic formula of $\sig$,
then, as with terms, we can denote by
\begin{equation*}
  \vrbl{\phi}
\end{equation*}
the set of variables occurring in $\phi$;
and then if $\bm a$ is the tuple $(a_x\colon x\in\vrbl{\phi})$ 
in $A^{\vrbl{\phi}}$,
we can denote by
\begin{equation*}
  \phi(\bm a)
\end{equation*}
the result of replacing each occurrence of $x$ in $\phi$ with $a_x$,
for each $x$ in $\vrbl{\phi}$.
Now we have a convertible version of Theorem~\ref{thm:term-hom}:

\begin{theorem}\label{thm:hom-emb}
  Suppose $\str A$ and $\str B$ are in $\Str$,
and $h\colon A\to B$.
\begin{compactenum}
\item 
$h$ is a homomorphism from $\str A$ to $\str B$ if and only if,
for all atomic formulas $\phi$ of $\sig$,
for all $\bm a$ in $A^{\vrbl{\phi}}$,
\begin{equation*}
  \str A\models\phi(\bm a)\implies\str B\models\phi(h(\bm a)).
\end{equation*}
\item
$h$ is an embedding of $\str A$ in $\str B$ if and only if,
for all atomic formulas $\phi$ of $\sig$,
for all $\bm a$ in $A^{\vrbl{\phi}}$,
\begin{equation*}
  \str A\models\phi(\bm a)\iff\str B\models\phi(h(\bm a)).
\end{equation*}
\end{compactenum}
\end{theorem}

\subsection{Formulas}\label{subsect:formulas}


Now arbitrary \textbf{formulas} are built up recursively,
precisely as in the logic of set theory on page \pageref{formula}:
\begin{compactenum}
\item 
Atomic formulas are formulas.
\item
If $\phi$ is a formula, then so is its negation $\lnot\phi$.
\item
If $\phi$ and $\psi$ are formulas, then so are
\begin{compactenum}
\item 
the disjunction $(\phi\lor\psi)$,
\item
the conjunction $(\phi\land\psi)$,
\item
the implication $(\phi\lto\psi)$, and
\item
the equivalence $(\phi\liff\psi)$.
\end{compactenum}
\item\label{item:q}
If $\phi$ is a formula and $x$ is variable, 
then
\begin{compactenum}
\item 
the instantiation $\Exists x\phi$ and
\item
the generalization $\Forall x\phi$
\end{compactenum}
are both formulas.
\end{compactenum}

Again we have:

\begin{theorem}[Unique Readability]\label{thm:form-ur}
A given formula can be built up from atomic formulas in only one way.  
\end{theorem}

Now the set $\fv{\phi}$ of \textbf{free variables} of a formula $\phi$
can be defined recursively:
\begin{compactenum}
  \item
If $\phi$ is atomic, then $\fv{\phi}=\vrbl{\phi}$.
\item
$\fv{\lnot\phi}=\fv{\phi}$.
\item
$\fv{(\phi*\psi)}=\fv{\phi}\cup\fv{\psi}$.
\item
$\fv{\Exists x\phi}=\fv{\Forall x\phi}=\fv{\phi}\setminus\{x\}$.
\end{compactenum}
A \textbf{sentence} is a formula with no free variables.

If $x$ is a variable and $t$ is a term,
we define recursively the result $\phi^x_t$ 
of replacing each \textbf{free occurrence} of $x$ in $\phi$ with $t$:
\begin{compactenum}
  \item
If $\phi$ is atomic, 
then $\phi^x_t$ is just the result 
of replacing \emph{every} occurrence of $x$ in $\phi$ with $t$.
\item
$(\lnot\phi)^x_t$ is $\lnot(\phi^x_t)$.
\item
$(\phi*\psi)^x_t$ is $(\phi^x_t*\psi^x_t)$.
\item
If $x$ is not $y$, then
$(\Exists y\phi)^x_t$ is $\Exists y\phi^x_t$, and
$(\Forall y\phi)^x_t$ is $\Forall y\phi^x_t$.
\item
$(\Exists x\phi)^x_t$ is $\Exists x\phi$, and
$(\Forall x\phi)^x_t$ is $\Forall x\phi$.
\end{compactenum}
If $i\mapsto x(i)$ is a bijection from some $n$ in $\upomega$ to $\fv{\phi}$,
and $\bm a\in A^{\fv{\phi}}$, then we can define
\begin{equation*}
  \phi(\bm a)
\end{equation*}
as
\begin{equation*}
  (\dots(\phi^{x(0)}_{a_{x(0)}})^{x(1)}_{a_{x(1)}}\cdots)^{x(n-1)}_{a_{x(n-1)}}.
\end{equation*}
This definition is independent of the particular choice 
of the bijection $i\mapsto x(i)$.
(This would not be true if $\bm a$ were a tuple of arbitrary terms.)
Now we can define \textbf{truth}\label{truth} and \textbf{falsity} 
of sentences in structures.
That is, let $\str A\in\Str$.
For every formula $\phi$ of $\sig$,
for every $\bm a$ in $A^{\fv{\phi}}$, 
we define whether $\phi(\bm a)$ is true or false in $\str A$.
If $\phi$ is atomic, we have done this.
We proceed as on page \pageref{truth-sets}:
\begin{compactenum}
\item
 $\lnot\phi(\bm a)$ is true in $\str A$ 
if and only if $\phi(\bm a)$ is false in $\str A$.
\item
The truth or falsity of $(\phi*\psi)(\bm a)$ in $\str A$ 
depends on the truth or falsity 
of $\phi(\bm a)$ and $\psi(\bm a)$ in $\str A$ 
according to the usual rules of propositional logic.
\item
$(\Exists x\phi)(\bm a)$ is true in $\str A$ if and only if, 
for some $b$ in $A$, $\phi^x_b(\bm a)$ is true in $\str A$.
\item
$(\Forall x\phi)(\bm a)$ is true in $\str A$ if and only if, 
for all $b$ in $A$, $\phi^x_b(\bm a)$ is true in $\str A$.
\end{compactenum}
Again, if a sentence $\sigma$ is true in $\str A$, we write
\begin{equation}\label{eqn:models}
  \str A\models\sigma.
\end{equation}

\section{Theories and models}\label{sect:th-mod}


The set of all sentences of a signature $\sig$ can be denoted by\label{Sn}
\begin{equation*}
\Sn.
\end{equation*}
This is the universe of an algebra
\begin{equation*}
  (\Sn,\lnot,\lor,\land).
\end{equation*}
The relation of \textbf{truth,}
symbolized as in \eqref{eqn:models} by $\models$,
is a relation between $\Str$ and $\Sn$.
This relation establishes a \emph{Galois correspondence} 
just as in Theorem \ref{thm:polarities} (page \pageref{thm:polarities}),
even though $\Str$ is a proper class.
With respect to this Galois correspondence,
the closed subsets of $\Sn$ are called \textbf{theories.}
The class $\Str$ has closed \emph{subclasses,} 
called \textbf{elementary classes.}
The polarities constituting the Galois correspondence 
can be written respectively as
\begin{align*}
  \mathcal K&\mapsto\Th{\mathcal K},&
\Gamma&\mapsto\Mod{\Gamma}.
\end{align*}
Here $\Th{\mathcal K}$ 
is the \textbf{theory of}\label{theory} 
the class $\mathcal K$ of structures,
and the elementary class $\Mod{\Gamma}$ 
is in particular the class of \textbf{models of} 
the set $\Gamma$ of sentences of $\sig$.

We may modify the notation and terminology in an obvious way,
so that if $\mathcal K=\{\str A\}$, and $\Gamma=\{\sigma\}$, then
  \begin{align*}
    \Th{\str A}&=\Th{\mathcal K},&
\Mod{\sigma}&=\Mod{\Gamma},
  \end{align*}
and these are respectively the \textbf{theory of} $\str A$
and the class of \textbf{models of} $\sigma$.
Then
\begin{gather*}
\Th{\str A}=\{\sigma\in\Sn\colon\str A\models\sigma\},\\  
\Mod{\sigma}=\{\str A\in\Str\colon\str A\models\sigma\}.
\end{gather*}
For arbitrary
subclasses $\mathcal K$ of $\Str$
and subsets $\Gamma$ of $\Sn$,
we now have
\begin{align}\label{eqn:th-mod}
  \Th{\mathcal K}&=\bigcap_{\str A\in\mathcal K}\Th{\str A},&
  \Mod{\Gamma}&=\bigcap_{\sigma\in\Gamma}\Mod{\sigma}.
\end{align}
If $\str A\in\Mod{\Gamma}$, that is, if $\str A$ is a model of $\Gamma$,
we may write
\begin{equation*}
  \str A\models\Gamma.
\end{equation*}
Then also%%%%%
\footnote{We could also write $\mathcal K\models\sigma$ 
instead of $\sigma\in\Th{\mathcal K}$,
so that $\Th{\mathcal K}=\{\sigma\in\Sn\colon\mathcal K\models\sigma\}$;
but we shall not actually use this notation.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{equation}\label{eqn:mod}
\Mod{\Gamma}
=\{\str A\in\Str\colon\str A\models\Gamma\}.
\end{equation}

An arbitrary theory is called a \textbf{complete theory} if, 
for every sentence $\sigma$ of its signature, 
the theory contains either $\sigma$ or its negation $\lnot\sigma$, 
but not both.  

\begin{theorem}\label{thm:el-class-top}
Let $\sig$ be a signature.
  \begin{compactenum}
\item
The only theory of $\sig$ that,
for some $\sigma$ in $\Sn$, 
contains both $\sigma$ and $\lnot\sigma$ is $\Sn$ itself,
which is $\Th{\emptyset}$.
  \item 
Every complete theory of $\sig$ 
is $\Th{\str A}$ for some $\str A$ in $\Str$.
\item
\sloppy
The elementary classes of $\sig$
compose a topology on $\Str$.
  \end{compactenum}
\end{theorem}

\begin{proof}
  \begin{asparaenum}
    \item
If $T$ is a theory containing both $\sigma$ and $\lnot\sigma$,
then $T$ has no models,
and so $T$ must be $\Th{\emptyset}$.
\item
If $T$ is complete, then by the first part $T$ cannot be $\Th{\emptyset}$,
so it has a model $\str A$, and $T\included\Th{\str A}$.
Since $T$ is complete, this inclusion must be an equation.
\item
By Theorem~\ref{thm:basis-cor} (page~\pageref{thm:basis-cor}), since
\begin{align*}
  \emptyset&=\Mod{\Exists xx\neq x},&
  \Mod{\sigma}\cup\Mod{\tau}&=\Mod{\sigma\lor\tau},
\end{align*}
the classes $\Mod{\sigma}$
compose a basis of a topology on $\Str$.\qedhere
  \end{asparaenum}
\end{proof}

\section{Elementary equivalence}

Given a signature $\sig$, in $\Str$ we define
\begin{equation*}
  \str A\equiv\str B\iff\Th{\str A}=\Th{\str B}.
\end{equation*}
The relation $\equiv$ is called \textbf{elementary equivalence.} 
If $\str A$ and $\str B$ are isomorphic,
then they are elementarily equivalent.  
We shall see that the converse fails.
Indeed, what makes model theory interesting
is that non-isomorphic structures can be elementarily equivalent.
Meanwhile, 
recalling the notion of topological indistinguishability 
from page \pageref{top-indist},
we have the following.

\begin{theorem}\sloppy
On $\Str$, the relation of topological indistinguish\-ability
(with respect to the topology consisting of the elementary classes)
is just elementary equivalence.
\end{theorem}

\subsection{Kolmogorov quotients}

Suppose $A$ and $B$ are topological spaces,
and $f$ is a function from $A$ to $B$.
Then $f$ is called
\begin{itemize}
\item 
\textbf{continuous,}
if $f\inv[Y]$ is closed
for every closed subset $Y$ of $B$;
\item
\textbf{closed,}
if $f[X]$ is closed for every closed subset $X$ of $A$.
\end{itemize}
A \textbf{homeomorphism} is a continuous bijection with continuous inverse.
Letting $\sim$ be the relation of topological indistinguishability on $A$,
we can give the quotient $A\modsim$ the \textbf{quotient topology,}
so that a subset $\{x\simcl\colon x\in X\}$ of $A\modsim$ is closed
if and only if its union $\bigcup_{x\in X}x\simcl$ 
is a closed subset of $A$.

\begin{theorem}
  Let $A$ be a topological space.
  \begin{compactenum}
\item
The quotient topology on $A\modsim$ is indeed a topology,
even a Kolmogorov topology.
  \item 
The quotient map $x\mapsto x\simcl$ from $A$ to $A\modsim$
is surjective, continuous, and closed.
\item
If $B$ is a Kolmogorov space,
and $f$ is a continuous function from $A$ to $B$,
then there is a unique function $h$ from $A\modsim$ to $B$
such that, for all $x$ in $A$, $h(x\simcl)=f(x)$.
  \end{compactenum}
\end{theorem}

Suppose now $f$ is a surjective continuous closed function from $A$
to a Kolmogorov space $B$,
and for every Kolmogorov space $C$
and every continuous function $g$ from $A$ to $C$,
there is a unique continuous function $h$ from $B$ to $C$
such that
\begin{equation*}
  g=h\circ f.
\end{equation*}
See Figure~\ref{fig:KQ}.
\begin{figure}[ht]
  \begin{equation*}
    \xymatrix@!{A\ar[d]_f\ar[r]^g&C\\
B\ar@{-->}[ur]_{\exists!\; h}&}
  \end{equation*}
  \caption{Kolmogorov quotient}\label{fig:KQ}
  
\end{figure}
Then $B$ is a \textbf{Kolmogorov quotient} of $A$ with respect to $f$.

\begin{theorem}
If $B_0$ and $B_1$ are Kolmogorov quotients of $A$
with respect to $f_0$ and $f_1$ respectively,
then the unique homomorphism $h_0$ from $B_0$ to $B_1$
such that $f_1=h_0\circ f_0$ is a homeomorphism onto $B_1$,
its inverse being the unique homomorphism
$h_1$ from $B_1$ to $B_0$ such that
$f_0=h_1\circ f_1$.
\end{theorem}

\begin{proof}
See Figure~\ref{fig:2KQ}.
\begin{figure}[ht]
  \begin{equation*}
    \xymatrix@!0@R=3.46cm@C=2cm{&A\ar[dl]_{f_0}\ar[dr]^{f_1}&\\
B_0\ar@/^/[rr]^{h_0}&&B_1\ar@/^/[ll]^{h_1}}
  \end{equation*}
  \caption{Two Kolmogorov quotients}\label{fig:2KQ} 
\end{figure}
The composition $h_1\circ h_0$ 
must be the unique homomorphism $h$ from $B_0$ to itself
such that $f_0=h\circ f_0$.
Since $\id{B_0}$ is such a homomorphism, we have
\begin{equation*}
  h_1\circ h_0=\id{B_0}.
\end{equation*}
By symmetry, $h_0\circ h_1=\id{B_1}$.
\end{proof}

By the theorem, any two Kolmogorov quotients of a space are \emph{equivalent.}

\begin{theorem}\label{thm:Kol-cond}
  If $f$ is a continuous, closed, surjective function from $A$ onto $B$,
and for all $x$ and $y$ in $A$,
\begin{equation*}
  x\sim y\iff f(x)=f(y),
\end{equation*}
then $B$ is a Kolmogorov quotient of $A$ with respect to $f$.
\end{theorem}

\begin{proof}
Suppose $f(x)$ and $f(y)$ are topologically equivalent.
Since $f$ is closed, we have $x\sim y$, and therefore $f(x)=f(y)$.
Thus $B$ is Kolmogorov.
There is a well-defined map $x\simcl x\mapsto f(x)$ from $A\modsim$ to $B$.
This map continuous, closed, and surjective onto $B$;
so it is a homeomorphism onto $B$.
\end{proof}

\subsection{The space of complete theories}

Let us denote the set of complete theories of $\sig$ by
\begin{equation*}
  \St[0]{\sig}.
\end{equation*}
(The subscript $0$ indicates 
that the formulas in a theory have no free variables.)
The following begins to resemble Theorem \ref{thm:spec-top} 
(page \pageref{thm:spec-top}):

\begin{theorem}\label{thm:Kol-quo}
  For every signature $\sig$,
with respect to the relation $\in$ between $\Sn$ and $\St[0]{\sig}$,
\begin{compactenum}[1)]
\item 
the closed subsets of $\Sn$ are precisely the theories of $\sig$;
\item
the closed subsets of $\St[0]{\sig}$ compose a Hausdorff topology;
\item
the map $\str A\mapsto\Th{\str A}$ from $\Str$ to $\St[0]{\sig}$
is a continuous surjection,
and $\St[0]{\sig}$ is a Kolmogorov quotient of $\Str$
with respect to this map.
\end{compactenum}
\end{theorem}

The situation of the theorem might be depicted as in Figure \ref{fig:kol-mod}.
\begin{figure}
  \begin{equation*}
    \xymatrix@!{
\Str\ar@{>>}[d]_{\str A\mapsto\Th{\str A}}&\Sn\ar@{<~>}[l]_{\models}\\
\St[0]{\sig}\ar@{<~>}[ur]_{\ni}
}
  \end{equation*}
  \caption{Kolmogorov quotient of $\Str$}\label{fig:kol-mod}
  
\end{figure}

The \emph{Compactness Theorem} 
is that the topology on $\St[0]{\sig}$ is compact.
In fact we are going to be able to replace $\Sn$ with a Boolean ring $R$
such that $\St[0]{\sig}$ is homeomorphic to $\spec$.
But this will take some work,
which in one approach involves ultraproducts.
The Boolean ring
will be best thought of as a \emph{Boolean algebra}
as developed in the next section.

\section{Boolean Algebras}

We showed in Corollary \ref{cor:pow} (page \pageref{cor:pow})
that, for any set $\Omega$, the power set $\pow{\Omega}$
is the universe of a Boolean ring 
$(\pow{\Omega},\emptyset,\symdiff,\Omega,\cap)$.
By the Stone Representation Theorem (page \pageref{thm:Stone}),
every Boolean ring embeds in such a ring.

\begin{sloppypar}
The structure $(\pow{\Omega},\emptyset,\Omega,{}\comp,\cup,\cap)$
is an example of a \emph{Boolean algebra.}
Here ${}\comp$ is the singulary operation $X\mapsto\Omega\setminus X$.
\end{sloppypar}

\subsection{Abstract Boolean algebras}

Abstractly considered, a \textbf{Boolean algebra} is a structure
\begin{equation*}
  (B,\bot,\top,\bar{\ },\lor,\land),
\end{equation*}
meeting the following conditions.
\begin{compactenum}
\item
The binary operations $\lor$ and $\land$ are \textbf{commutative:}
\begin{align*}
  x\lor y&=y\lor x,&x\land y&=y\land x.
\end{align*}
\item
The elements $\bot$ and $\top$ 
are \textbf{identities} for $\lor$ and $\land$ respectively:
\begin{align*}
x\lor\bot&=x,&x\land\top&=x.  
\end{align*}
\item
$\lor$ and $\land$ are mutually \textbf{distributive:}
  \begin{align*}
    x\lor(y\land z)&=(x\lor y)\land(x\lor z),\\
    x\land(y\lor z)&=(x\land y)\lor(x\land z).
  \end{align*}
\item
The element $\bar x$ is a \textbf{complement} of $x$:
\begin{align*}
  x\lor\bar x&=\top,&x\land\bar x&=\bot.
\end{align*}
\end{compactenum}
\begin{sloppypar}
These axioms are symmetrical in the sense that,
if $(B,\bot,\top,\bar{\ },\lor,\land)$ is a Boolean algebra,
then so is $(B,\top,\bot,\bar{\ },\land,\lor)$.
Then the latter algebra can be called the \textbf{dual} of the former.
Just to give them names, 
we may say that $x\lor y$ is the \textbf{join} of $x$ and $y$, 
and $x\land y$ is their \textbf{meet.}
\end{sloppypar}

The identities in the following theorem
are sometimes given as additional axioms for Boolean algebras;
but Huntington \cite{MR1500675,MR1500471} 
shows that the axioms above are sufficient.

\begin{theorem}
  In any Boolean algebra:
  \begin{gather}\label{eqn:idemp}
    x\lor x=x,\qquad x\land x=x,\\\label{eqn:zero-el}
x\lor\top=\top,\qquad x\land\bot=\bot,\\\label{eqn:absorb}
x\lor(x\land y)=x,\qquad x\land(x\lor y)=x,\\\label{eqn:double-neg}
\bar{\bar x}=x,\\\label{eqn:De-M}
\overline{x\lor y}=\bar x\land\bar y,\qquad
\overline{x\land y}=\bar x\lor\bar y,\\\label{eqn:assoc}
(x\lor y)\lor z=x\lor(y\lor z),\qquad(x\land y)\land z=x\land(y\land z).
  \end{gather}
\end{theorem}

\begin{proof}
  By symmetry, it is enough to establish one of each pair of identities.
We do this in turn.
For \eqref{eqn:idemp} and \eqref{eqn:zero-el}, we have
\begin{align*}
&\begin{aligned}
  x\lor x
&=(x\lor x)\land\top\\
&=(x\lor x)\land(x\lor\bar x)\\
&=x\lor(x\land\bar x)\\
&=x\lor\bot\\
&=x,
\end{aligned}&
&\begin{aligned}
x\lor\top
&=(x\lor\top)\land\top\\
&=(x\lor\top)\land(x\lor\bar x)\\
&=x\lor(\top\land\bar x)\\
&=x\lor\bar x\\
&=\top,
\end{aligned}
\end{align*}
and for \eqref{eqn:absorb} and \eqref{eqn:double-neg},
\begin{align*}
&\begin{aligned}
x\lor(x\land y)
&=(x\land\top)\lor(x\land y)\\
&=x\land(\top\lor y)\\
&=x\land\top\\
&=x,
\end{aligned}&
&\begin{aligned}
\bar{\bar x}
&=\top\land\bar{\bar x}\\
&=(\bar x\lor x)\land\bar{\bar x}\\
&=(\bar x\land\bar{\bar x})\lor(x\land\bar{\bar x})\\
&=\bot\lor(x\land\bar{\bar x})\\
&=(x\land\bar x)\lor(x\land\bar{\bar x})\\
&=x\land(\bar x\lor\bar{\bar x})\\
&=x\land\top\\
&=x.
 \end{aligned}
\end{align*}
In showing \eqref{eqn:double-neg},
what we show is that 
the two complements of $\bar x$,
namely $\bar{\bar x}$ and $x$, are equal.
In the same way, 
any two complements of an element of a Boolean algebra must be equal.
We shall use this to establish \eqref{eqn:De-M}.
First we establish a special case of associativity:
\begin{align*}
  x\lor(\bar x\lor y)
&=\top\land((x\lor(\bar x\lor y)))\\
&=(x\lor\bar x)\land((x\lor(\bar x\lor y)))\\
&=x\lor(\bar x\land(\bar x\lor y))\\
&=x\lor\bar x\\
&=\top,
\end{align*}
and likewise $x\land(\bar x\land y)=\bot$.
Then
\begin{gather*}
  \begin{aligned}
(x\lor y)\lor(\bar x\land\bar y)
&=((x\lor y)\lor\bar x)\land((x\lor y)\lor\bar y)\\
&=\top\land\top\\
&=\top,
  \end{aligned}\\
  \begin{aligned}
    (x\lor y)\land(\bar x\land\bar y)
&=(x\land(\bar x\land\bar y))\lor(y\land(\bar x\land\bar y))\\
&=\bot\lor\bot\\
&=\bot,
  \end{aligned}
\end{gather*}
so by uniqueness of complements we must have \eqref{eqn:De-M}.
Finally, let $T$ stand for $(x\lor y)\lor z$,
and $U$ for $x\lor(y\lor z)$.
Then
\begin{equation*}
  U\lor\bar T
=U\lor((\bar x\land\bar y)\land\bar z)
=((U\lor\bar x)\land(U\lor\bar y))\land(U\lor\bar z).
\end{equation*}
But the factors here are all $\top$, since
\begin{gather*}
  \begin{aligned}
U\lor\bar x
&=(x\lor(y\lor z))\lor\bar x
=\top,
  \end{aligned}\\
  \begin{aligned}
U\lor\bar y
&=(U\lor\bar y)\land\top\\
&=(U\lor\bar y)\land(y\lor\bar y)\\
&=(U\land y)\lor\bar y\\
&=((x\lor(y\lor z))\land y)\lor\bar y\\
&=((x\land y)\lor((y\lor z)\land y))\lor\bar y\\
&=((x\land y)\lor y)\lor\bar y\\
&=y\lor\bar y\\
&=\top,
  \end{aligned}
\end{gather*}
and likewise $U\lor\bar z=\top$.
Thus
\begin{equation*}
 U\lor\bar T=(\top\land\top)\land\top=\top\land\top=\top. 
\end{equation*}
Dually, $U\land\bar T=\bot$.
Then $U=\bar{\bar T}=T$, that is, \eqref{eqn:assoc}.
\end{proof}

Huntington shows also that each of the eight axioms for Boolean algebras
is logically independent from the others.
He does this by exhibiting, for each of the axioms,
a structure in which the axiom is false,
but the remaining seven axioms are true.
In each case, the universe of the structure has two elements.%%%%%
\footnote{Huntington treats our two axioms of the complement
as a single axiom, but with the hypothesis that the identities are unique.
This hypothesis can itself be proved 
by $\bot'=\bot'\lor\bot=\bot$ and so forth.
In our formalism, the universe of a Boolean algebra
is automatically closed under the operations $\lor$ and $\land$;
but Huntington treats this closure as two separate axioms.
Finally, Huntington requires Boolean algebras to have two distinct elements.
Thus Huntington has ten axioms for Boolean algebras,
and he shows them to be logically independent.}

\subsection{Boolean operations}

\begin{theorem}\label{thm:B-alg-ring}
Boolean algebras and Boolean rings are the same thing in the following sense:
  \begin{compactenum}
  \item 
If $\str A$ is a Boolean algebra $(B,\bot,\top,\bar{\ },\lor,\land)$, 
and we define
\begin{equation}\label{eqn:B-add}
  x+y=(x\land\bar y)\lor(\bar x\land y),
\end{equation}
then $(B,\bot,\top,+,\land)$ is a Boolean ring $R(\str A)$.
  \item 
If $\str R$ is a Boolean ring $(B,0,1,+,\cdot)$, and we define
\begin{align}\label{eqn:join}
x\lor y&=x+y+xy,&
\bar x&=1+x,
\end{align}
then $(B,0,1,\bar{\ },\lor,\cdot)$ is a Boolean algebra $A(\str R)$.
\item
$R(A(\str R))=\str R$ and $A(R(\str A))=\str A$.
  \end{compactenum}
\end{theorem}

\begin{proof}
  \begin{asparaenum}
    \item
If the Boolean algebra $(B,\bot,\top,\bar{\ },\lor,\land)$ is given,
then the addition defined by \eqref{eqn:B-add} is obviously commutative.
For associativity, we compute
\begin{multline*}
  (x+y)+z\\
=(((x\land\bar y)\lor(\bar x\land y))\land\bar z)
\lor(((\bar x\lor y)\land(x\lor\bar y))\land z)\\
=(x\land\bar y\land\bar z)\lor(\bar x\land y\land\bar z)
\lor(\bar x\land\bar y\land z)\lor(x\land y\land z),
\end{multline*}
which is symmetric in $x$, $y$, and $z$.
Also $x+x=\bot$.  We already know $x\land x=x$.
Then $(B,\bot,\top,+,\land)$ is a Boolean ring.
\item
If the Boolean ring $(B,0,1,+,\cdot)$ is given,
then the joining operation defined in \eqref{eqn:join} 
is obviously commutative.
Also $x\lor 0=x$.
For distributivity, we have
\begin{multline*}
  (x\lor y)(x\lor z)
=(x+y+xy)(x+z+xz)\\
\begin{aligned}
&=x^2+xz+x^2z+xy+yz+xyz+x^2y+xyz+x^2yz\\
&=x+xz+xz+xy+yz+xyz+xy+xyz+xyz\\
&=x+yz+xyz\\
&=x\lor(yz),
\end{aligned}
\end{multline*}
while
\begin{align*}
  xy\lor xz
&=xy+xz+x^2yz\\
&=xy+xz+xyz\\
&=x(y+z+yz)\\
&=x(y\lor z).
\end{align*}
Finally,
\begin{gather*}
x\land\bar x=x(1+x)=x+x^2=x+x=0,\\
x\lor\bar x=x+1+x+x(1+x)=1.
\end{gather*}
Thus $(B,0,1,\bar{\ },\lor,\cdot)$ is a Boolean algebra.
\item
In $A(\str R)$, we compute
\begin{align*}
  (x\cdot\bar y)\lor(\bar x\cdot y)
&=(x\cdot(1+y))\lor((1+x)\cdot y)\\
&=(x+xy)\lor(y+xy)\\
&=x+xy+y+xy+(x+xy)(y+xy)\\
&=x+y;
  \end{align*}
so this is the sum of $x$ and $y$ in $R(A(\str R))$ as well.
In $R(\str A)$,
\begin{multline*}
  x+y+(x\land y)\\
\begin{aligned}
&=((x\land\bar y)\lor(\bar x\land y))+(x\land y)\\
&=(((x\land\bar y)\lor(\bar x\land y))\land\overline{x\land y})
\lor(\overline{(x\land\bar y)\lor(\bar x\land y)}\land x\land y)\\
&=
\begin{aligned}[t]
&(((x\land\bar y)\lor(\bar x\land y))\land(\bar x\lor\bar y))\\
&\qquad\qquad\qquad\qquad\qquad
\lor((\bar x\lor y)\land(x\lor\bar y)\land x\land y)
\end{aligned}
\\
&=(((x\land\bar y)\lor(\bar x\land y))\land(\bar x\lor\bar y))
\lor(x\land y)\\
&=((x\land\bar y\land(\bar x\lor\bar y))
\lor(\bar x\land y\land(\bar x\lor\bar y)))
\lor(x\land y)\\
&=(x\land\bar y)
\lor(\bar x\land y)
\lor(x\land y)\\
&=x\lor y;
\end{aligned}
\end{multline*}
so this is the join of $x$ and $y$ in $A(R(\str A))$ as well.
We finish by noting
\begin{equation*}
  \top+x=(\top\land\bar x)\lor(\bar{\top}\land x)
=\bar x\lor(\bot\land x)=\bar x\lor\bot=\bar x.\qedhere
\end{equation*}
  \end{asparaenum}
\end{proof}

We now have,
by the Stone Representation Theorem (page \pageref{thm:Stone}),
that every Boolean algebra 
embeds in the Boolean algebra $\pow{\Omega}$ for some set $\Omega$.
A \textbf{Boolean operation} on $\pow{\Omega}$ 
is just an operation on $\pow{\Omega}$ 
that is the interpretation of a term 
in the signature of rings or Boolean algebras.  
\begin{comment}
  


Every operation on $\pow{\Omega}$ 
that can be defined without reference to elements of $\Omega$ 
is a Boolean operation.  
Indeed, suppose we have $n$ subsets $X^0$, \dots, $X^{n-1}$ of $\Omega$.  
For each element $\sigma$ of $2^n$, 
there is a subset $X_{\sigma}$ of $\Omega$ given by
\begin{equation*}
X_{\sigma}=X^0_{\sigma}\cap\dots\cap X^{n-1}_{\sigma},
\end{equation*}
where
\begin{equation*}
X^i_{\sigma}=
\begin{cases}
	X^i,&\text{ if }\sigma(i)=1,\\
	(X^i)\comp,&\text{ if }\sigma(i)=0.
\end{cases}
\end{equation*}
See Figure \ref{fig:X2} for the cases $n=2$ and $n=3$ 
(here each set $X_{\sigma}$ is labelled with $\sigma$).
\begin{figure}[ht]
\psset{unit=8mm,linewidth=1pt}
\mbox{}\hfill
\begin{pspicture}(-3,-2.866)(3,2)
\pscircle(1,0)2
\pscircle(-1,0)2
\rput(0,0){\makebox[0pt][c]{$(1,1)$}}
\rput(-2,0){\makebox[0pt][c]{$(1,0)$}}
\rput(2,0){\makebox[0pt][c]{$(0,1)$}}
\rput(-3,2.4){\makebox[0pt][l]{$(0,0)$}}
\end{pspicture}
\hfill
\begin{pspicture}(-3,-2)(3,3.73)
%\psgrid
\pscircle(-1,0){2}
\pscircle(1,0)2
\pscircle(0,1.73)2
\rput(0,0.58){\makebox[0pt][c]{$(1,1,1)$}}
\rput(0,2.7){\makebox[0pt][c]{$(1,0,0)$}}
\rput(-2,-0.5){\makebox[0pt][c]{$(0,1,0)$}}
\rput(2,-0.5){\makebox[0pt][c]{$(0,0,1)$}}
\rput(0,-0.6){\makebox[0pt][c]{$(0,1,1)$}}
\rput(1.2,1.5){\makebox[0pt][c]{$(1,0,1)$}}
\rput(-1.2,1.5){\makebox[0pt][c]{$(1,1,0)$}}
\rput(3,3.68){\makebox[0pt][r]{$(0,0,0)$}}
\end{pspicture}
\hfill\mbox{}
\caption{Boolean combinations}\label{fig:X2}
\end{figure}
In case $n=0$, the set $2^n$ has the unique element $0$ (the empty function), 
and then $X_{\sigma}$ should be understood as $\Omega$.  
In any case, the sets $X_{\sigma}$ partition $\Omega$ into at most $2^n$ subsets%
---or $\card{2^n}$ subsets, 
if we consider $2^n$ as the set of functions from $n$ to $2$.
For every subset $S$ of $2^n$ in this sense, 
the subset $\bigcup_{\sigma\in S}X_{\sigma}$ of $\Omega$ 
is a Boolean combination of the sets $X^i$; 
and every Boolean combination of these sets is of this form.  
Thus the number of Boolean combinations of the $X^i$ is at most $2^{2^n}$.  
(It is less, if one of them is included in the union of the others.)


\end{comment}

\subsection{Filters}

In $\pow{\Omega}$ we have
\begin{equation*}
X\cap Y=X
\iff X\included Y
\iff X\cup Y=Y.
\end{equation*}
Then in an abstract Boolean algebra 
we can define an ordering $<$ by either of the equivalences
\begin{equation*}
x\land y=x\iff x\leq y\iff x\lor y=y.
\end{equation*}
By Corollary \ref{cor:pow} (page \pageref{cor:pow}),
A subset $I$ of a Boolean algebra $A$ 
is an ideal of the corresponding Boolean ring
if and only if
\begin{gather*}
  \bot\in I,\\
x\in I\And y\in I\implies x\lor y\in I,\\
y\in I\And x\leq y\implies x\in I.
\end{gather*}
By Theorem~\ref{thm:Boole} (page \pageref{thm:Boole}),
an ideal $I$ of $A$ is maximal if and only if
\begin{equation*}
  x\in A\setminus I\iff\bar x\in I.
\end{equation*}
By the \textbf{De Morgan Laws} \eqref{eqn:De-M}, 
the operation $x\mapsto\bar x$
is an isomorphism from a Boolean algebra to its dual.
A subset of a Boolean algebra is called a \textbf{filter}
if it is an ideal of the ring corresponding to the dual algebra,
or equivalently if its image under $x\mapsto\bar x$ 
is an ideal of the ring corresponding to the original algebra.
Thus a subset $F$ of a Boolean algebra $A$ is a filter if and only if
\begin{gather*}
  \top\in F,\\
x\in F\And y\in F\implies x\land y\in F,\\
x\in F\And x\leq y\implies y\in F.
\end{gather*}
See Figure \ref{fig:filter}.
\begin{figure}[ht]
\centering
\psset{unit=15mm}
\begin{pspicture}(-1,-2)(1,2)
%\psgrid
\psarc(-1,0)2{-60}{60}
\psarc(1,0)2{120}{240}
\pscustom[fillstyle=solid,fillcolor=gray]{%
\psline(0.6,0.2)(0,-0.4)(-0.6,0.2)
\psset{linewidth=0pt}  % doesn't have the effect I want
\psarcn(1,0)2{150}{120}
\psarcn(-1,0)2{60}{30}
}
\psdots(0,-1.73)(0,1.73)(0.6,0.2)(0,-0.4)(-0.6,0.2)
\uput[dr](0.6,0.2){$y$}
\uput[d](0,-0.4){$x\land y$}
\uput[dl](-0.6,0.2){$x$}
\uput[d](0,-1.73){$\bot$}
\uput[u](0,1.73){$\top$}
\end{pspicture}
\caption{A filter of a Boolean algebra}\label{fig:filter}
\end{figure}

A maximal proper filter is called an \textbf{ultrafilter.}

\begin{theorem}\label{thm:uf}
  A subset $U$ of a Boolean algebra $A$ is an ultrafilter if and only if
  \begin{align*}
    x\in U\And y\in U&\implies x\land y\in U,&
x\in A\setminus U\iff\bar x\in U.
  \end{align*}
\end{theorem}
We may denote the set of all ultrafilters of $A$ by
\begin{equation*}
  \Stone A.
\end{equation*}
This is called the \textbf{Stone space} of $A$,
because of the following,
which is closer than Theorem~\ref{thm:Stone} (page \pageref{thm:Stone}) is
to the original form of Stone's theorem \cite{MR1501865}.
Given $x$ in $A$, we shall use the notation\label{[x]}
\begin{equation*}
  [x]=\{U\in\Stone A\colon x\in U\}
\end{equation*}
(but this is \emph{not} an equivalence class as on page \pageref{eqc-a}).

\begin{theorem}[Stone Representation Theorem for Boolean Algebras]%
\label{thm:Stone2}
Let $A$ be Boolean algebra.
\begin{compactenum}
\item 
The subset $\{[x]\colon x\in A\}$ of $\pow{\Stone A}$ is a basis 
for a compact Hausdorff topology on $\Stone A$.
\item
The set $\{[x]\colon x\in A\}$ 
is precisely the set of clopen subsets of $\Stone A$ in this topology.
\item
The map $x\mapsto[x]$ is an embedding of $A$ in $\pow{\Stone A}$.
\end{compactenum}
\end{theorem}



%\section{Lindenbaum algebras of sentences}
\section{Logical equivalence}

Given a signature $\sig$, in $\Sn$ we define
\begin{equation*}
  \sigma\sim\tau\iff\Mod{\sigma}=\Mod{\tau}.
\end{equation*}
The relation $\sim$ is called \textbf{logical equivalence.}
Logically equivalent sentences are just sentences with the same models.
We may use the notation
\begin{equation*}
  \sigma\simcl=\{\tau\in\Sn\colon\sigma\sim\tau\}
\end{equation*}
as on page \pageref{a-simcl}.
We also define
\begin{equation*}
  \Lin{\sig}=\Sn\modsim;
\end{equation*}
this is the set of logical equivalence classes $\sigma\simcl$ 
of sentences $\sigma$ of $\sig$. 
(Here Lin stands for Lindenbaum; see below.)
In model theory,
while we are interested in the distinction between non-isomorphic structures
that are elementarily equivalent,
we are not interested in the distinction between sentences
that are different as strings of symbols,
but are logically equivalent.
However, the distinction is essential to logic as such.
In any case, 
we can enlarge Figure \ref{fig:kol-mod} (page \pageref{fig:kol-mod})
to Figure \ref{fig:lin}.
\begin{figure}
  \begin{equation*}
    \xymatrix@!{
\Str\ar@{>>}[d]_{\str A\mapsto\Th{\str A}}
&\Sn\ar@{<~>}[l]_{\models}\ar@{>>}[d]^{\sigma\mapsto\sigma\simcl}\\
\St[0]{\sig}\ar@{<~>}[ur]_{\ni}\ar@{<~>}[r]&\Lin{\sig}
}
  \end{equation*}
  \caption{Lindenbaum algebra}\label{fig:lin}
  
\end{figure}
We have not got a symbol for the induced relation 
\begin{equation*}
  \{(\Th{\str A},\sigma\simcl)\colon
(\str A,\sigma)\in\Str\times\Sn\And\str A\models\sigma\},
\end{equation*}
which is
\begin{equation*}
  \{(T,\sigma\simcl)\colon(T,\sigma)\in\St[0]{\sig}\times\Sn\And\sigma\in T\},
\end{equation*}
between $\St[0]{\sig}$ and $\Lin{\sig}$.

A sentence like $\Exists xx\neq x$ with no models
is a \textbf{contradiction;}\label{valid}
A sentence like $\Forall xx=x$
of which every structure is a model is a \textbf{validity.}
In the next theorem,
we use the symbols
\begin{align*}
  &\bot,&&\top
\end{align*}
to denote a contradiction and validity, respectively.
The notion of a \emph{congruence} on an algebra
was defined on page \pageref{congruence}.

\begin{theorem}\label{thm:Lin}
For every signature $\sig$,
the relation $\sig$ of logical equivalence
is a congruence on the algebra
\begin{equation*}
  (\Sn,\bot,\top,\lnot,\lor,\land).
\end{equation*}
The corresponding quotient algebra is a Boolean algebra.
\end{theorem}

\begin{proof}
  Suppose $\sigma\sim\sigma_1$ and $\tau\sim\tau_1$.  
Then $\sigma\lor\tau\sim\sigma_1\lor\tau_1$, because
  \begin{align*}
    \str A\models\sigma\lor\tau
&\iff\str A\models\sigma\Or\str A\models\tau\\
&\iff\str A\models\sigma_1\Or\str A\models\tau_1\\
&\iff\str A\models\sigma_1\lor\tau_1.
  \end{align*}
Similarly $\lnot\sigma\sim\lnot\sigma_1$ 
and $\sigma\land\tau\sim\sigma_1\land\tau_1$.
Thus $\sim$ is a congruence-relation.
The quotient algebra is a Boolean algebra 
because $\sigma\lor\tau\sim\tau\lor\sigma$ and so forth.
\end{proof}

The Boolean algebra of the theorem
is called the \textbf{Lindenbaum algebra} of sentences of $\sig$,
\enquote{in memory of a close colleague of Tarski 
who died at the hands of the Nazis} \cite[p.~319]{MR94e:03002}.
In $\Lin{\sig}$, 
we now have $\sigma\simcl\leq\tau\simcl$ if and only if 
the sentence $\sigma\lto\tau$ is a validity,
or equivalently
\begin{equation*}
  \str A\models\sigma\implies\str A\models\tau.
\end{equation*}
If $T$ is a theory of $\sig$, and $\sig\in T$, and $\sig\sim\tau$,
then $\tau\in T$; thus
\begin{equation*}
  \{\tau\in\Sn\colon\sigma\sim\tau\}=\{\tau\in T\colon\sigma\sim\tau\}.
\end{equation*}
In particular, the quotient $T\modsim$ is a subset of $\Sn\modsim$.

If now $\tau$ is a topology on a set $B$,
and $A\included B$,
then $\{A\cap F\colon F\in\tau\}$ is a topology on $A$,
namely the \textbf{subspace topology,}
and as being equipped with this topology,
$B$ is a subspace of $A$.
In this case, $B$ is \textbf{dense} in $A$
if every nonempty open subset of $A$ contains a point of $B$.

\begin{theorem}\label{thm:Lin-S}
For every signature $\sig$,
%with respect to the relation $\in$ between $\Lin{\sig}$ and $\St[0]{\sig}$,
\begin{compactenum}[1)]
\item 
for every theory $T$ of $\sig$, 
the quotient $T\modsim$ is a filter of $\Lin{\sig}$;
\item 
for every complete theory $T$ of $\sig$, 
the quotient $T\modsim$ is an ultrafilter of $\Lin{\sig}$;
\item
the map $T\mapsto T\modsim$ from $\St[0]{\sig}$ to $\Stone{\Lin{\sig}}$
is a homeomorphism onto its image;
\item
this image is dense in $\Stone{\Lin{\sig}}$.
\end{compactenum}
\end{theorem}

\begin{proof}
\sloppy
To establish density of the image of $\St[0]{\sig}$ in $\Stone{\Lin{\sig}}$, 
we note that
every nonempty open subset of $\Stone{\Lin{\sig}}$ includes $[\sigma\simcl]$
for some $\sigma$ that is not a contradiction;
but then $\sigma$ has a model $\str A$,
and so $[\sigma\simcl]$ contains $\Th{\str A}$.
\end{proof}

We can enlarge Figure \ref{fig:lin}
to Figure \ref{fig:sto}.
\begin{figure}
  \begin{equation*}
    \xymatrix@!0@=2.7cm{
\Str\ar@{>>}[d]_{\str A\mapsto\Th{\str A}}
&\Sn\ar@{<~>}[l]_{\models}\ar@{>>}[d]^{\sigma\mapsto\sigma\simcl}\\
\St[0]{\sig}\rule[-1.7ex]{0ex}{2ex}
\ar@{<~>}[ur]_{\ni}\ar@{<~>}[r]\ar@{>->}[d]_{T\mapsto T\modsim}
&\Lin{\sig}\\
\Stone{\Lin{\sig}}\ar@{<~>}[ur]_{\ni}&
}
  \end{equation*}
  \caption{Stone space of Lindenbaum algebra}\label{fig:sto}
  
\end{figure}

\begin{corollary}\label{cor:comp-eq}
For every signature $\sig$,
the following statements are equivalent:
\begin{itemize}
\item 
$\St[0]{\sig}$ is compact.
\item
The image of $\St[0]{\sig}$ under $T\mapsto T\modsim$ is $\Stone{\Lin{\sig}}$.
\item
This image is a closed subspace of $\Stone{\Lin{\sig}}$.
\end{itemize}  
\end{corollary}

\begin{proof}
All closed subspaces of a compact space are compact.
The only dense closed subspace of a topological space is the whole space.
In a Hausdorff space,
all compact subspaces are closed.
\end{proof}

\begin{sloppypar}
We shall therefore be able to understand the Compactness Theorem 
as any one of these three equivalent statements.
However, we cannot prove the Compactness Theorem itself without more work.
So far, all we have used for our theorems 
is that $\Str$ is a class $\bm M$,
and $\Sn$ is the universe of an algebra $(S,\bot,\top,\lnot,\lor,\land)$,
and $\models$ is a relation from $\bm M$ to $S$, 
where for all $A$ in $\bm M$, and all $s$ and $t$ in $S$,
\begin{gather*}
  A\nmodels\bot,\qquad\qquad A\models\top,\\
A\models\lnot s\iff A\nmodels s,\\
A\models s\lor t\iff A\models s\Or A\models t,\\
A\models s\land t\iff A\models s\And A\models t.
\end{gather*}
Hence for example we can replace $\Str$ with an arbitrary subclass.
In particular, for each non-contradictory $\sigma$ in $\Sn$,
we can choose (using the Axiom of Choice) a model $\str A_{\sigma}$ of $\sigma$,
and then we can replace $\Str$ with $\{\str A_{\sigma}\colon\sigma\in\Sn\}$.
The relation $\sim$ of logical equivalence will be unchanged;
but possibly not every element of $\Stone{\Lin{\sig}}$ 
is $\Th{\str A_{\sigma}}$ for some $\sigma$.
\end{sloppypar}

\section{Definable relations}

We are usually interested in $\Mod T$ 
for particular theories $T$ of a signature $\sig$.  
One way to study this is to study the \emph{definable relations} 
of models of $T$.  
Suppose $\str A\models T$, 
and $\phi$ is an $n$-ary formula of $\sig$.  
Then the subset
\begin{equation*}
\{\vec a\in A^n\colon\str A\models\phi(\vec a)\}
\end{equation*}
of $A^n$ is said to be \textbf{defined} by $\phi$
and can be denoted by one of
\begin{align*}
&\phi^{\str A},&&\phi(\str A).
\end{align*}
This set is then a \textbf{$0$-definable relation} of $\str A$.  
If $B\included A$, and $\phi$ is a formula of $\sig(B)$, 
then $\phi^{\str A}$ is a \textbf{$B$-definable relation} of $\str A$.

If $\sigma$ is a sentence, then $\sigma^{\str A}\in\{0,1\}$, and
\begin{equation*}
  \sigma^{\str A}=1\iff\str A\models\sigma.
\end{equation*}
We can then extend the notion of logical equivalence 
to arbitrary formulas $\phi$ and $\psi$ of $\sig$ 
having the same free variables:
these two formulas are \textbf{logically equivalent,} 
and we write
\begin{equation*}
  \phi\sim\psi,
\end{equation*}
if for all $\str A$ in $\Str$,
\begin{equation}\label{eqn:log-eq}
  \phi^{\str A}=\psi^{\str A}.
\end{equation}
If $V$ is a finite set of variables,
we may denote by
\begin{equation*}
  \Fm[V]{\sig}
\end{equation*}
the set of formulas $\phi$ of $\sig$ such that $\fv{\phi}=V$;
then we let
\begin{equation*}
  \Lin[V]{\sig}=\Fm[V]{\sig}\modsim.
\end{equation*}
Then $\Lin[V]{\sig}$ is the universe of a Boolean algebra,
just as in Theorem~\ref{thm:Lin} (page \pageref{thm:Lin}).
Alternatively,
if a bijection $i\mapsto v_i$ from $n$ in $\upomega$ to $V$
is understood to have been chosen,
we may replace the subscript $V$ with $n$.

Further modifications are possible.
If $T$ is some theory of $\sig$,
we say that $\phi$ and $\psi$ are \textbf{equivalent in} $T$
(or \emph{modulo} $T$, or \emph{with respect to} $T$)
if \eqref{eqn:log-eq} holds for all $\str A$ in $\Mod T$.
Then we obtain the algebras $\Lin[n]T$.

\section{Substructures}

A formula is \textbf{quantifier-free} 
if neither of the symbols $\exists$ and $\forall$ occurs in it.
There is a recursive definition of the quantifier-free formulas:
just delete condition \ref{item:q} 
from the recursive definition of formulas on page \pageref{subsect:formulas}.
If $\str A$ is a structure of signature $\sig$, 
then the \textbf{diagram} of $\str A$ is the set\label{diag}
\begin{equation*}
  \diag{\str A}
\end{equation*}
of quantifier-free sentences of $\sig(A)$ 
that are true in $\str A$.
Now we can give a variation of Theorem~\ref{thm:hom-emb} 
(page \pageref{thm:hom-emb}):

\begin{theorem}\label{thm:emb-diag}
    Suppose $\str A$ and $\str B$ are in $\Str$,
and $h\colon A\to B$.
The following are equivalent:
\begin{compactenum}
\item 
$h$ is an embedding of $\str A$ in $\str B$.
\item
For all quantifier-free formulas $\phi$ of $\sig$,
for all $\bm a$ in $A^{\vrbl{\phi}}$,
\begin{equation*}
  \str A\models\phi(\bm a)\implies\str B\models\phi(h(\bm a)).
\end{equation*}
\item
For all quantifier-free formulas $\phi$ of $\sig$,
for all $\bm a$ in $A^{\vrbl{\phi}}$,
\begin{equation}\label{eqn:sub}
  \str A\models\phi(\bm a)\iff\str B\models\phi(h(\bm a)).
\end{equation}
\item
When $\str B^*$ is the expansion of $\str B$ to $\sig(A)$ such that,
for each $a$ in $A$,
\begin{equation*}
  a^{\str B^*}=h(a),
\end{equation*}
then
\begin{equation*}
 \str B^*\models\diag{\str A}. 
\end{equation*}
\end{compactenum}
\end{theorem}

For the theorem, 
it would be enough to define $\diag{\str A}$ 
to consist of the atomic and negated atomic sentences of $\sig(A)$ 
that are true in $\str A$;
and indeed sometimes this is the definition used.
We shall use the following in proving the Compactness Theorem
by Henkin's method (page \pageref{sect:Henkin}).

\begin{corollary}\label{cor:diag}
    Suppose $\str A$ and $\str B$ are in $\Str$.
If $\str B$ expands to a model $\str B^*$ of $\diag{\str A}$,
and every element of $B$ is $a^{\str B^*}$ for some $a$ in $A$,
then
\begin{equation*}
  \str A\cong\str B,
\end{equation*}
and indeed $a\mapsto a^{\str B^*}$ is an isomorphism from $\str A$ to $\str B$.
\end{corollary}

A theory $T$ of $\sig$ is \textbf{axiomatized} by a subset $\Gamma$ of $\Sn$ 
if $T$ is the closure of $\Gamma$, that is,
\begin{equation*}
T=\Th{\Mod{\Gamma}};
\end{equation*}
equivalently, every model of $\Gamma$ is a model of $T$.

If $\str A$ is a structure of signature $\sig$,
then, by the last theorem,
the class of structures of $\sig(A)$ in which $\str A_A$ embeds is elementary,
and its theory is axiomatized by $\diag{\str A}$.
However, the class of structures of $\sig$ in which $\str A$ embeds
is not generally elementary.


A \textbf{universal} formula is a formula of the form
\begin{equation}\label{eqn:univ-form}
  \Forall{x_0}\cdots\Forall{x_{n-1}}\phi,
\end{equation}
where $\phi$ is quantifier-free.
The universal formula in \eqref{eqn:univ-form} might be abbreviated as
\begin{equation*}
  \Forall{\vec x}\phi.
\end{equation*}
If $T$ is a theory, then we denote by
\begin{equation*}
T_{\forall}
\end{equation*}
the theory axiomatized by the universal sentences in $T$.

\begin{lemma}\label{lem:TA}
For every theory $T$,
the theory $T_{\forall}$ is included 
in the theory of substructures of models of $T$, 
that is,
\begin{equation*}
\str A\included\str B\And\str B\models T\implies\str A\models T_{\forall}.
\end{equation*}
\end{lemma}

\begin{proof}
Suppose $\str A\included\str B$, and $\str B\models T$,
and $\phi$ is quantifier-free, and $\Forall{\vec x}\phi$ is in $T$.  
For every $\vec a$ in $A^{\fv{\phi}}$, 
we have $\vec a\in B^{\fv{\phi}}$, 
so $\str B\models\phi(\vec a)$ and therefore, by the last theorem,
$\str A\models\phi(\vec a)$.  
Thus $\str A\models\Forall{\vec x}\phi$.
\end{proof}

The converse is given in Theorem \ref{thm:TA} on page \pageref{thm:TA} below.  

In Theorem~\ref{thm:emb-diag},
if \eqref{eqn:sub} holds for \emph{all} formulas $\phi$ of $\sig$
and all $\bm a$ in $A^{\vrbl{\phi}}$,
then $h$ is called an \textbf{elementary embedding} of $\str A$ in $\str B$.
In this case, if $\str A\included\str B$, 
and $h$ is the inclusion of $A$ in $B$,
then $\str A$ is called an \textbf{elementary substructure} of $\str B$, 
and we write
\begin{equation*}
\str A\preccurlyeq\str B.
\end{equation*}
The structures in which $\str A$ embeds elementarily 
are precisely the reducts to $\sig$ of the models of $\Th{\str A_A}$.

A theory $T$ of a signature $\sig$ is called \textbf{model-complete}\label{mc} 
if for all models $\str A$ of $T$, 
the theory of $\sig(A)$ axiomatized by $T\cup\diag{\str A}$ is complete.

\begin{theorem}
A theory $T$ is model-complete if and only if, 
for all $\str A$ and $\str B$ in $\Mod T$,
\begin{equation*}
\str A\included\str B\implies\str A\preccurlyeq\str B.
\end{equation*}
\end{theorem}

\begin{proof}
Each condition is equivalent to the condition that, for all models $\str A$ of $T$, $T\cup\diag{\str A}$ axiomatizes $\Th{\str A_A}$.
\end{proof}



The L\"owenheim--Skolem Theorem below 
is a generalization of the theorem 
published by L\"owenheim in 1915 \cite{Lowenheim} 
and improved by Skolem in 1920 \cite{Skolem-LS}: 
a sentence with a model has a countable model.
Skolem's argument uses what we shall call the \emph{Skolem normal form} 
of the given sentence; we shall discuss this in
\S\ref{sect:arb} (page \pageref{sect:arb}).  
Meanwhile, an example of a sentence in Skolem normal form is
\begin{equation*}
 \Forall x\Exists yx\mathrel Ry, 
\end{equation*}
where $R$ is a binary predicate.
If this sentence has a model $\str A$, then,
by the Axiom of Choice, 
there is a singulary operation
$x\mapsto x^*$ on $A$ such that, for all $b$ in $A$,
\begin{equation*}
\str A\models b\mathrel Rb^*.
\end{equation*}
Given $b$ in $A$, we can define $(b_k\colon k\in\upomega)$ recursively by
\begin{align*}
b_0&=b,&b_{k+1}=b_k{}^*.
\end{align*}
Then $\{b_k\colon k\in\upomega\}$ is countable 
and is the universe of a substructure of $\str A$ 
in which $\Forall x\Exists yx\mathrel Ry$ is true.  
Our own proof of the general result will follow the lines of Skolem's idea.
But we shall use the following theorem
in order to be able to work with arbitrary sentences.
We shall use the \emph{idea} of the theorem
in proving the Compactness Theorem by Henkin's method 
(page \pageref{sect:Henkin}).

\begin{theorem}[Tarski--Vaught Test]\label{thm:TV}
Suppose $\str A\included\str B$, both having signature $\sig$.  
Then $\str A\preccurlyeq\str B$, provided that, 
for all singulary formulas $\phi$ of $\sig(A)$,
\begin{equation*}
\str B\models\Exists x\phi
\implies\text{ for some $c$ in $A$, } \str B\models\phi(c),
\end{equation*}
that is,
\begin{equation*}
\phi^{\str B}\neq\emptyset\implies\phi^{\str B}\cap A\neq\emptyset.
\end{equation*}
\end{theorem}

\begin{proof}
Under the given condition, 
we show by induction that for all formulas $\phi$ of $\sig$, 
if $\vec a\in A^{\fv{\phi}}$, then
\begin{equation*}
\str A\models\phi(\vec a)\iff\str B\models\phi(\vec a).
\end{equation*}
This is given to be the case when $\phi$ is atomic 
(or more generally quantifier-free), 
and it is easily preserved under negation and conjunction.  
Suppose it holds when $\phi$ is a formula $\psi$.  
By hypothesis, for all $\vec a$ in $A^{\fv{\Exists x\psi}}$, 
the following are equivalent:
\begin{gather*}
	\str B\models(\Exists y\psi)(\vec a),\\
%	\text{for some $b$ in $B$, } \str B\models\psi^y_b(\vec a),\\
	\text{for some $b$ in $A$, } \str B\models\psi^y_b(\vec a),\\
	\text{for some $b$ in $A$, } \str A\models\psi^y_b(\vec a),\\
	\str A\models(\Exists y\psi)(\vec a).
\end{gather*}
This completes the induction.
\end{proof}

  

\chapter{Compactness and \L o\'s's Theorem}

In a signature $\sig$,
if $\Gamma$ is a set of sentences
whose every finite subset has a model,
we shall show that $\Gamma$ itself has a model.
This will be the \textbf{Compactness Theorem.}

The Compactness Theorem for countable signatures
was obtained by G\"odel in his doctoral dissertation
and published in 1930
as a kind of corollary \cite[Thm X, p.~590]{Goedel-compl} 
of his \emph{Completeness Theorem,}
which we shall take up
in Chapter \ref{ch:complete} (page \pageref{ch:complete}).
According to Chang and Keisler \cite[p.~604]{MR91c:03026},
Malcev established the Compactness Theorem for arbitrary signatures in 1936;
but in Hodges's estimation \cite[p.~318]{MR94e:03002},
the statement and proof had \enquote{shortcomings.}

Henkin gave a new proof of the Compactness Theorem
in his own doctoral dissertation
and published it in 1949
\cite{MR0033781,MR1396852}.
An alternative proof by means of \emph{ultraproducts}
was published in 1962/3 by Frayne, Morel, and Tarski 
\cite[Thm 2.10, p.~216]{MR0142459}.%%%%%
\footnote{Apparently this proof was announced in 1958.
For this and other historical notes on the ultraproduct method,
see the introduction to \cite{MR0142459}
and its correction \cite{MR0154807}.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
It is these two proofs that will interest us here.

\section{Construction of elementary substructures}

First we establish a result that does not rely on the Compactness Theorem,
but does use the Axiom of Choice.

\begin{theorem}[Downward L\"owenheim--Skolem]\label{thm:dLST}
By the Axiom of Choice\ac,
for every signature $\sig$,
for every structure $\str B$ of $\sig$,
for every subset $X$ of $B$,
there is a structure $\str A$ such that
\begin{align*}
	\str A&\preccurlyeq\str B,&
	X&\included A,&
	\card A&\leq\max(\card X,\card{\sig},\upomega).
\end{align*}
\end{theorem}

\begin{proof}
Suppose $Y\included B$.
By the Axiom of Choice\ac,
there is a bijection $\phi\mapsto b_{\phi}$ from $\Fm[\{x\}]{\sig(Y)}$ to $B$
such that, for all $\phi$ in $\Fm[\{x\}]{\sig(Y)}$,
\begin{equation*}
\str B\models
\Exists x\phi\lto\phi(b_{\phi}).
\end{equation*}
If $a\in B$, and $\phi$ is the formula $x=a$, then $b_{\phi}=a$.
Thus, when we define
\begin{equation*}
  Y'=\{b_{\phi}\colon\phi\in\Fm[\{x\}]{\sig(Y)}\},
\end{equation*}
we have $Y\included Y'$.
Then
\begin{equation*}
\card{Y'}\leq\max(\card Y,\card{\sig},\upomega).  
\end{equation*}
Now we can define $(X_n\colon n\in\upomega)$ recursively by
\begin{align*}
  X_0&=X,&X_{k+1}&=X_k{}',
\end{align*}
and we can let
\begin{equation*}
  A=\bigcup_{n\in\upomega}X_n.
\end{equation*}
By considering formulas $F\vec x=y$, 
we see that $A$ is the universe of a substructure $\str A$ of $\str B$.  
It is of the required cardinality, 
and by the Tarski--Vaught Test, 
it is an elementary substructure of $\str B$.
\end{proof}

In the theorem,
if $\max(\card S,\upomega)\leq\card X$, then $\card A=\card X$.
The \emph{proof} of the theorem does not use cardinalities as such,
but makes essential use of the Axiom of Choice,
and by this, all sets have cardinalities anyway.
The \enquote{upward} version of the theorem
occurs on page \pageref{thm:uLST}.

\section{Models from theories}

Recall from page \pageref{theory}
that a \emph{theory} of a signature $\sig$
is just the set $T$ of sentences of $\sig$
that are true in each of some given class of structures of $\sig$.
In this case, by Theorem \ref{thm:Lin-S} (page \pageref{thm:Lin-S}),
the set $\{\sigma\simcl\colon\sigma\in T\}$ 
of logical equivalence classes of elements of $T$
is a filter of the Lindenbaum algebra $\Lin{\sig}$.

If $\Gamma\included\Sn$,
and every finite subset of $\Gamma$ has a model,
then the set $\{\sigma\simcl\colon\sigma\in\Gamma\}$ 
generates a \emph{proper} filter of $\Lin{\sig}$.
In this setting, the Compactness Theorem
is that every such filter is 
$\{\sigma\simcl\colon\sigma\in\Th{\mathcal K}\}$
for some class $\mathcal K$ of structures of $\sig$.

Thus the Compactness Theorem\label{comp-alg} 
is the converse of the theorem
that $T\modsim$ is a filter when $T$ is a theory
(Theorem \ref{thm:Lin-S}, page \pageref{thm:Lin-S}).
However, we shall not use this formulation in our first proofs.
Given a set of sentences whose every finite subset has a model,
we just want to show that the whole set has a model.

Suppose $\Gamma\included\Sn$, and $\Gamma$ does have a model.
By the Downward L\"owenheim--Skolem Theorem,
$\Gamma$ has a model of size no greater than $\max(\card{\sig},\upomega)$.
Let $A$ be a set of new constants of size $\max(\card{\sig},\upomega)$.
Then we can find a structure $\str B$ of $\sig(A)$
that is a model of $\Gamma$
and whose every element is the interpretation of a closed term of $\sig(A)$.
(For example, every element of $B$ 
could be $a^{\str B}$ for some $a$ in $A$.)
Let $T=\Th{\str B}$.
If $C$ is the set of closed terms of $\sig(A)$,
and $E$ is the equivalence relation on $C$ given by
\begin{equation}\label{eqn:tE}
  t\mathrel Eu\iff(t=u)\in T,
\end{equation}
then there is a well-defined bijection $t/E\mapsto t^{\str B}$ 
from $C/E$ onto $B$.
Then $\str B$ is determined up to isomorphism by $T$;
we may say $\str B$ is a \textbf{canonical model} of $T$.

Thus, if we are going to be able to prove the Compactness Theorem at all,
then, given a subset $\Gamma$ of $\Sn$
whose every finite subset has a model,
we must be able to embed $\Gamma$ in a theory with a canonical model;
and the signature of that theory can be $\sig(A)$,
where $\card A=\max(\card{\sig},\upomega)$.

An arbitrary complete theory need not have a canonical model.

\begin{theorem}
  A complete theory $T$ of a signature $\sig$ has a canonical model
if and only if, for every singulary formula $\phi(x)$ of $\sig$,
for some closed term $t$ of $\sig$,
\begin{equation}\label{eqn:Ex}
  (\Exists x\phi)\in T\implies \phi(t)\in T.
\end{equation}
\end{theorem}

\begin{proof}
Suppose $T$ has a canonical model $\str B$.
If $(\Exists x\phi)\in T$, then $\str B\models\Exists x\phi$,
so for some $b$ in $B$, $\str B\models\phi(b)$.
But then $b=t^{\str B}$ for some closed term $t$ of $\sig$,
so $\str B\models\phi(t)$ and therefore $\phi(t)\in T$.

Suppose conversely \eqref{eqn:Ex} holds for all singulary $\phi(x)$ of $\sig$.
Since $T$ is a complete theory, 
it is $\Th{\str M}$ for some structure $\str M$ of $\sig$.
Let $C$ be the set of closed terms of $\sig$
and let $B=\{t^{\str M}\colon t\in C\}$.
Then $B$ is the universe of a substructure $\str B$ of $\str M$,
and moreover $\str B\preccurlyeq\str M$ by the Tarski--Vaught Test 
(page \pageref{thm:TV}).
Then $\str B$ is a canonical model of $T$.
\end{proof}

The following theorem can be seen 
as a combination of Hodges's 
\cite[Thms 2.3.3 \&\ 2.3.4, pp.~44--6]{MR94e:03002}.
Hodges refers to the set $T$ of sentences in the theorem
as a \emph{Hintikka set,}
because of a 1955 paper of Hintikka.
According to Hodges, \enquote{equivalent ideas appear in} a 1955 paper by Beth
and a 1956 paper by Sch\"utte.

\begin{theorem}\label{thm:T}
Let $\sig$ be a signature,
and suppose $T$ is a subset of $\Sn$ such that
\begin{compactenum}[1)]
\item 
every finite subset of $T$ has a model;
\item
for all $\sigma$ in $\Sn$,
either $\sigma$ or $\lnot\sigma$ is in $T$;
\item
for all singulary formulas $\phi(x)$ of $\sig$,
for some closed term $t$ of $\sig$, \eqref{eqn:Ex} holds.
\end{compactenum}
Then $T$ is a complete theory with a canonical model.
\end{theorem}

\begin{proof}
Let $T$ be as in the hypothesis.
It suffices to show that $T$ has a model $\str B$,
since in this case $T$ must be the complete theory $\Th{\str B}$,
and $T$ will have a canonical model by the previous theorem.
In fact the model $\str B$ that we find will be a canonical model.

If, for some $n$ in $\upomega$, the sentence
\begin{equation*}
 \sigma_0\land\dots\land\sigma_{n-1}\lto\sigma_n
\end{equation*}
of $\sig$ is a validity, then the finite subset
$\{\sigma_0,\dots,\sigma_{n-1},\lnot\sigma_n\}$
of $\Sn$ has no model,
and therefore
\begin{equation*}
  \{\sigma_0,\dots,\sigma_{n-1}\}\included T\implies\sigma_n\in T.
\end{equation*}
In case $n=0$, this means $T$ contains all validities.
For instance, for all closed terms $t$ of $\sig(A)$,
\begin{equation}\label{eqn:t=t}
  (t=t)\in T.
\end{equation}
Also, for all formulas $\phi$ of $\sig$,
for all closed terms $s_x$ and $t_x$ of $\sig$
(where $x$ ranges over $\fv{\phi}$),
\begin{multline}\label{eqn:txux}
  \bigl\{s_x=t_x\colon x\in\fv{\phi}\bigr\}
\cup\bigl\{\phi\bigl(s_x\colon x\in\fv{\phi}\bigr)\bigr\}
\included T\\
\implies\phi\bigl(t_x\colon x\in\fv{\phi}\bigr)\in T.
\end{multline}
In particular, for all closed terms $s$, $t$, and $u$ of $\sig$,
\begin{gather}\label{eqn:T-sym}
  (s=t)\in T\implies(t=s)\in T,\\\label{eqn:T-trans}
\{s=t,\;t=u\}\included T\implies(s=u)\in T.
\end{gather}
We now construct the desired model $\str B$ of $T$.
The argument will have these parts:
\begin{compactenum}
  \item
The definition of $B$.
\item
The definition of $F^{\str B}$ for operation symbols $F$ of $\sig$.
\item
The definition of $R^{\str B}$ for predicates $R$ of $\sig$.
\item
The proof that $\str B\models\sigma$ for all atomic sentences $\sigma$ in $T$.
\item
The proof that $\str B\models T$.
\end{compactenum}
\begin{asparaenum}
  \item
By \eqref{eqn:t=t}, \eqref{eqn:T-sym}, and \eqref{eqn:T-trans},
the relation $E$ given by
\begin{equation*}
  t\mathrel E u\iff(t=u)\in T
\end{equation*}
is an equivalence relation on the set $C$ of closed terms of $\sig$.
Now we may define $B=C/E$.
In general, if $\vec t$ is an element $(t_0,\dots,t_{n-1})$ of $C^n$, 
we may use the notation
\begin{equation*}
  \vec t/E=(t_0/E,\dots,t_{n-1}/E).
\end{equation*}
\item
Given an $n$-ary operation symbol $F$ of $\sig$ for some $n$ in $\upomega$,
given $\vec t$ in $C^n$,
we want to define
\begin{equation*}
  F^{\str B}(\vec t/E)=(Ft_0\cdots t_{n-1})/E.
\end{equation*}
This is a valid definition, 
since if $\vec s/E=\vec t/E$,
then $T$ contains the equations
\begin{align*}
  t_0&=s_0,&&\dots,&t_{n-1}&=s_{n-1},&Ft_0\cdots t_{n-1}&=Ft_0\cdots t_{n-1},
\end{align*}
so that, by $n$ applications of \eqref{eqn:txux}, $T$ contains the equation
\begin{equation*}
  Ft_0\cdots t_{n-1}=Fs_0\cdots s_{n-1}.
\end{equation*}
\item
Next, given an $n$-ary predicate $R$ in $\sig$ for some $n$ in $\upomega$,
we want to define $R^{\str B}$ by the rule
\begin{equation}\label{eqn:aER}
  \bm t/E\in R^{\str B}\iff(Rt_0\cdots t_{n-1})\in T;
\end{equation}
but again we must check that this definition is good.
We are free to make the definition
\begin{equation*}
  R^{\str B}=\{\bm t/E\colon(Rt_0\cdots t_{n-1})\in T\};
\end{equation*}
but to have \eqref{eqn:aER},
we must have
\begin{equation*}
  \bm t/E=\bm s/E\And Rt_0\cdots t_{n-1}\in T\implies(Rs_0\cdots s_{n-1})\in T.
\end{equation*}
We do have this by \eqref{eqn:txux}.
\item
For all atomic sentences $\sigma$ of $\sig$, we show
\begin{equation}\label{eqn:Bs}
  \str B\models\sigma\iff\sigma\in T.
\end{equation}
If $\sigma$ is an equation $s=t$,
then
\begin{equation*}
  \str B\models\sigma
\iff s^{\str B}=t^{\str B}
%\iff s/E=t/E
\iff s\mathrel Et
\iff\sigma\in T,
\end{equation*}
while
if $\sigma$ is $Rt_0\cdots t_{n-1}$, then
\begin{equation*}
  \str B\models\sigma
\iff\vec t^{\str B}\in R^{\str B}
\iff\vec t/E\in R^{\str B}
\iff\sigma\in T.
\end{equation*}
\item
Since for all sentences $\sigma$ and $\tau$ of $\sig$,
\begin{gather*}
  \sigma\in T\iff\lnot\sigma\notin T,\\
\{\sigma,\tau\}\included T\implies\sigma\land\tau\in T,
\end{gather*}
and for all singulary formulas $\phi$ of $\sig$,
\begin{equation*}
  \Exists x\phi\in T\iff\text{ for some $t$ in $C$, }\phi(t)\in T,
\end{equation*}
we can conclude that \eqref{eqn:Bs} holds
for arbitrary sentences $\sigma$ of $\sig$.
In particular, $\str B\models T$.\qedhere
\end{asparaenum}
\end{proof}

Given a set $\Gamma$ of sentences of $\sig$ 
whose every finite subset has a model,
we shall embed $\Gamma$ in a set $T$ as in the last theorem
in two different ways,
by the Henkin method and the ultraproduct method.
These methods differ specifically as follows.
\begin{asparadesc}
  \item[The Henkin method.]\sloppy
If $A$ is a set of new constants,
then by Zorn's Lemma,\ac\
there will be a maximal subset $T$ of $\Sn[\sig(A)]$ such that
\begin{compactenum}[i)]
  \item
$\Gamma\included T$,
\item
every finite subset of $T$ has a model, and
\item
For every singulary $\phi(x)$ of $\sig(A)$,
for some closed term $t$ of $\sig(A)$, \eqref{eqn:Ex} holds.
\end{compactenum}
If, further, $\card A=\max(\card{\sig},\upomega)$, 
then $T$ will satisfy the remaining hypothesis of the last theorem.
In case $\sig$ is countable, then $T$ can be found,
without using the Axiom of Choice,
by listing the sentences of $\sig(A)$ and deciding, one by one, 
whether a sentence or its negation should belong to $T$.
Alternatively, $T\modsim$ can be found
through the compactness of the Stone space of the Lindenbaum algebra
of (logical equivalence classes of) sentences of an appropriate signature.

\item[The ultraproduct method.]
For every finite subset $\Delta$ of $\Gamma$,
using the Axiom of Choice\ac\ if necessary,
we pick a model $\str A_{\Delta}$ of $\Delta$.
The universe of each $\str A_{\Delta}$ being $A_{\Delta}$,
we let
\begin{equation*}
  A=\prod_{\Delta\in\powf{\Gamma}}A_{\Delta}.
\end{equation*}
Considering $A$ as a set of new constants,
we expand each $\str A_{\Delta}$ to a structure $\str A_{\Delta}{}^*$ of $\sig(A)$
by defining, for each $a$ in $A$,
\begin{equation*}
  a^{\str A_{\Delta}{}^*}=a_{\Delta}.
\end{equation*}
\begin{compactenum}[i)]
\item  
Letting $\mathscr U$ be an ultrafilter of $\pow{\powf{\Gamma}}$,
we define
\begin{equation*}
  T
=\bigl\{\sigma\in\Sn[\sig(A)]\colon
\{\Delta\in\powf{\Gamma}\colon\str A_{\Delta}{}^*\models\sigma\}
\in\mathscr U\bigr\}.
\end{equation*}
Then $T$ will be a complete theory with a canonical model;
such a model
is called an \textbf{ultraproduct} of the structures $\str A_{\Delta}{}^*$.
\item
If, further, $\mathscr U$ is chosen so as to contain every subset
\begin{equation*}
  \{\Delta\in\powf{\Gamma}\colon\sigma\in\Delta\}
\end{equation*}
of $\powf{\Gamma}$,
where $\sigma\in\Gamma$,
then we shall have $\Gamma\included T$.
\end{compactenum}
\end{asparadesc}

We now work out the details.

\section{Henkin's method}\label{sect:Henkin}

The following does not require the Axiom of Choice.

\begin{theorem}[Countable Compactness]
  Suppose $\sig$ is a countable signature,
and $\Gamma$ is a set of sentences of $\sig$
whose every finite subset has a model.
Then $\Gamma$ has a model.
\end{theorem}

\begin{proof}\sloppy
Let $A$ be a set $\{a_n\colon n\in\upomega\}$ of constants 
not belonging to $\sig$.
It is possible to define a surjective function $n\mapsto\sigma_n$
from $\upomega$ to $\Sn[\sig(A)]$.
We shall recursively define a function $n\mapsto\Gamma_n$ 
from $\upomega$ to $\pow{\Sn[\sig(A)]}$
such that the union $\bigcup_{n\in\upomega}\Gamma_n$
is a theory $T$ as in Theorem \ref{thm:T}.

We start by letting
\begin{equation*}
  \Gamma_0=\Gamma.
\end{equation*}
Then every finite subset of $\Gamma_0$ has a model.
Suppose $\Gamma_n$ has been defined so that
\begin{compactenum}[1)]
\item
it is the union of $\Gamma_0$ with a finite set, and
\item
its every finite subset has a model.
\end{compactenum}
Note that this is indeed the case when $n=0$.
If it is the case for some $n$,
then one of $\Gamma_n\cup\{\sigma_n\}$ and $\Gamma_n\cup\{\lnot\sigma_n\}$ 
has the same properties.
Indeed, only the second property could fail.
If $\Gamma_n\cup\{\sigma_n\}$ does not have the property,
then for some finite subset $\Delta$ of $\Gamma_n$, 
there is no model of $\Delta\cup\{\sigma_n\}$.
Thus in every model of $\Delta$, the sentence $\lnot\sigma_n$ is true.
If $\Theta$ is another finite subset of $\Gamma_n$,
then $\Delta\cup\Theta$ has a model,
and this will also be a model of $\Theta\cup\{\lnot\sigma\}$.
Thus $\Gamma_n\cup\{\lnot\sigma\}$ is as desired.
In any case, we define $\Gamma_{n+1}$ as follows.
\begin{compactitem}
  \item
If the set $\Gamma_n\cup\{\lnot\sigma_n\}$ 
has the two desired properties, we let $\Gamma_{n+1}$ be this set.
\item
Suppose $\Gamma_n\cup\{\lnot\sigma_n\}$ 
does not have the properties,
so that $\Gamma_n\cup\{\sigma\}$ must have them.
\begin{compactitem}
  \item
If $\sigma_n$ is not existential,
we let $\Gamma_{n+1}=\Gamma_n\cup\{\sigma_n\}$.
\item
If $\sigma_n$ is $\Exists x\phi$ for some formula $\phi$, we let
\begin{equation}\label{eqn:Gn+1}
  \Gamma_{n+1}=\Gamma_n\cup\{\Exists x\phi\}\cup\{\phi^x_{a_k}\},
\end{equation}
where $k$ is the least $\ell$ such that 
$a_{\ell}$ does not occur in any sentence 
in $\Gamma_n\cup\{\Exists x\phi\}$.
Since this set is assumed to be finite, such $\ell$ exist.
\end{compactitem}
\end{compactitem}
Then $\Gamma_{n+1}$ has the desired properties 
that $\Gamma_n$ is assumed to have.

By induction, all $\Gamma_n$ do have the properties.
We can now let
\begin{equation*}
 T=\bigcup_{n\in\upomega}\Gamma_n. 
\end{equation*}
If $\{\tau_0,\dots,\tau_{n-1}\}$ is a finite subset of $T$,
then each $\tau_k$ belongs to some $\Gamma_{f(k)}$,
and so they all belong to $\Gamma_{\max\{f(k)\colon k<n\}}$,
and therefore they have a common model.
In short, every finite subset of $T$ has a model.
Also, for all sentences $\sigma$ of $\sig(A)$,
either $\sigma$ or $\lnot\sigma$ is in $T$.
Finally, by construction, if $\Exists x\phi$ is in $T$,
then $\phi^x_c$ is in $T$ for some constant $c$.
Thus Theorem \ref{thm:T} applies,
and so $T$ has a model.
In particular, this model is a model of $\Gamma$.
\end{proof}

In a variant of the foregoing proof,
Theorem \ref{thm:T} is not used as it is.
We first assume that there is no finite bound
on the sizes of models of finite subsets of $\Gamma$.
Then we let $\Gamma_0=\Gamma\cup\{a_i\neq a_j\colon i<j<\upomega\}$.
We obtain $\Gamma_{n+1}$ from $\Gamma_n$ as before,
except that, if we make the definition \eqref{eqn:Gn+1},
then we have let $k$ be the least $\ell$ 
such that $a_{\ell}$ does not occur 
in any sentence in $(\Gamma_n\cup\{\Exists x\phi\})\setminus\Gamma_0$.
We define $T$ as before,
but now we can obtain a model of $T$ whose universe is just $A$.

In this alternative approach,
The remaining case is handled differently.
Suppose $\Gamma$ has a finite subset $\Delta_0$
such that there is a finite upper bound on the size of models of $\Delta_0$.
Since $\Gamma$ is countable,
we can form a chain
\begin{equation*}
  \Delta_0\included\Delta_1\included\Delta_2\included\cdots
\end{equation*}
of finite subsets of $\Gamma$ whose union is $\Gamma$.
Then there is a corresponding chain
\begin{equation*}
  \sig_0\included\sig_1\included\sig_2\included\cdots
\end{equation*}
of finite signatures such that $\Delta_n\included\Sn[\sig_n]$
for each $n$ in $\upomega$.
For each $n$ in $\upomega$ then,
there are only finitely many nonisomorphic structures of $\sig_n$
that are models of $\Delta_n$.
We may assume that the universe of each of them
is a von Neumann natural number.
As $n$ varies,
these models of $\Delta_n$ are (partially) ordered 
by the relation of being a reduct.
That is, if $m<n$ and $\str A\models\Delta_m$, while $\str B\models\Delta_n$,
\begin{equation*}
  \str A<\str B\iff\str A=\str B\restriction\sig_m.
\end{equation*}
With this ordering,
these structures compose a \emph{tree} in the sense of page \pageref{tree}.
The tree is an infinite $\upomega$-tree,
so by K\"onig's Lemma (Theorem~\ref{thm:Koenig}, page \pageref{thm:Koenig}) 
it has an infinite branch;
the union of this branch is a model of $\Gamma$.



Still without using the Axiom of Choice,
we can obtain Compactness for an uncountable signature,
provided the signature itself is given to us as being well ordered
(otherwise we can apply the Well Ordering Theorem, page \pageref{thm:woth},
which uses the Axiom of Choice).

\begin{theorem}[Well Ordered Compactness]
Suppose $\sig$ is a signature $\{s_{\alpha}\colon\alpha<\kappa\}$
for some cardinal $\kappa$,
and $\Gamma$ is a set of sentences of $\sig$
whose every finite subset has a model.
Then $\Gamma$ has a model.
\end{theorem}

\begin{proof}
Let $A$ be a set $\{a_{\alpha}\colon\alpha<\kappa\}$ 
of constants not belonging to $\sig$.
It is possible to define a surjective function $\alpha\mapsto\sigma_{\alpha}$
from $\upomega$ to $\Sn[\sig(A)]$.
We shall recursively define a function $\alpha\mapsto\Gamma_{\alpha}$ 
from $\upomega$ to $\pow{\Sn[\sig(A)]}$
such that the union $\bigcup_{n\in\upomega}\Gamma_n$
is a set $T$ as in Theorem \ref{thm:T}.
Suppose, for some $\alpha$ such that $\alpha<\kappa$,
a subset $\Gamma_{\beta}$ of $\Sn[\sig(A)]$ has been defined 
whenever $\beta<\alpha$
so that
\begin{equation*}
  \gamma<\beta<\alpha\implies\Gamma_{\gamma}\included\Gamma_{\beta},
\end{equation*}
and also,
whenever $\beta<\alpha$,
the set $\Gamma_{\beta}$ is a subset $\Delta$ of $\Sn[\sig(A)]$ such that
\begin{compactenum}[1)]
\item
$\card{\Delta\setminus\Gamma_0}<\kappa$, and
\item
every finite subset of $\Delta$ has a model.
\end{compactenum}
Then one of the two sets
\begin{align*}
  &\bigcup_{\beta<\alpha}\Gamma_{\beta}\cup\{\sigma_{\beta}\},&
  &\bigcup_{\beta<\alpha}\Gamma_{\beta}\cup\{\lnot\sigma_{\beta}\}
\end{align*}
is also such a subset $\Delta$ of $\Sn[\sig(A)]$.
Now we can obtain $\Gamma_{\alpha}$ from $\bigcup_{\beta<\alpha}\Gamma_{\beta}$,
just as before we obtained $\Gamma_{n+1}$ from $\Gamma_n$
in the previous proof.
\end{proof}

As we observed above (page \pageref{comp-alg}),
a more algebraic formulation of the Compactness Theorem
is that every filter of $\Lin{\sig}$ is $T\modsim$ for some theory $T$.
To prove this,
by the Boolean Prime Ideal Theorem\PI,
it is enough to show that every ultrafilter is $T\modsim$ 
for some complete theory $T$.
We can proceed as follows,
using Theorem~\ref{thm:Lin-S} (page \pageref{thm:Lin-S})
and its corollary.

\begin{theorem}[Compactness]\label{thm:comp-alg}
By the Boolean Prime Ideal Theorem,
for all signatures $\sig$,
the map injective map $T\mapsto T\modsim$
from $\St[0]{\sig}$ to $\Stone{\Lin{\sig}}$ is surjective.
\end{theorem}

\begin{proof}
Suppose $\Gamma$ is a subset of $\Sn$
such that
\begin{equation*}
 \{\sigma\simcl\colon\sigma\in\Gamma\}\in\Stone{\Lin{\sig}}. 
\end{equation*}
We want to show $\Gamma$ has a model $\str A$,
since in that case
\begin{equation*}
  \Th{\str A}\modsim=\{\sigma\simcl\colon\sigma\in\Gamma\}.
\end{equation*}
Let $A$ be a set of constants not in $\sig$.
It will be enough to embed $\Gamma$ in a subset of $\Sn[\sig(A)]$
that satisfies the hypothesis of Theorem \ref{thm:T} (page \pageref{thm:T}).
Such a subset of $\Sn[\sig(A)]$
is precisely a set $\{\sigma\in\Sn[\sig(A)]\colon\sigma\simcl\in\mathscr U\}$,
where $\mathscr U$ is an element of $\Stone{\Lin{\sig}}$ 
that belongs to the intersection
\begin{equation}\label{eqn:bigcap}
\bigcap_{\phi\in\Fm[\{x\}]{\sig(A)}}\left([(\lnot\Exists x\phi)\simcl]
\cup\bigcup_{c\in A}[\phi(c)\simcl]\right)
\cap
\bigcap_{\sigma\in\Gamma}[\sigma\simcl].
\end{equation}
Suppose there is an embedding $\phi\mapsto c_{\phi}$
of $\Fm[\{x\}]{\sig(A)}$ in $A$.
Then by the compactness of $\Stone{\Lin{\sig(A)}}$,
we have
\begin{equation*}
\bigcap_{\phi\in\Fm[\{x\}]{\sig(A)}}[(\Exists x\phi\lto\phi(c_{\phi}))\simcl]
\cap
\bigcap_{\sigma\in\Gamma}[\sigma\simcl]
\neq\emptyset;
\end{equation*}
but the intersection here is a subset of the intersection in \eqref{eqn:bigcap}.
Thus it is enough to define $A$ as $\bigcup_{k\in\upomega}A_k$, where
\begin{align*}
  A_0&=\emptyset,&
A_1&=\left\{c_{\phi}\colon\phi\in\Fm[\{x\}]{\sig}\right\},
\end{align*}
and
\begin{equation*}
A_{k+2}=\left\{c_{\phi}\colon\phi\in\Fm[\{x\}]{\sig(A_{k+1})}
\setminus\Fm[\{x\}]{\sig(A_k)}\right\}.\qedhere
\end{equation*}
\end{proof}

Thus the Compactness Theorem as such 
needs only the Boolean Prime Ideal Theorem.
We shall prove the converse as Theorem \ref{thm:comp-PI} 
(page \pageref{thm:comp-PI}).

\section{Products}

The following can be proved as a consequence of Theorem \ref{thm:comp-alg}
and the Tarski--Vaught Test (page \pageref{thm:TV}).
But it is also just a reformulation of Theorem \ref{thm:T} 
(page \pageref{thm:T}).

\begin{theorem}
Let $\sig$ be a signature,
and suppose $T$ is a subset of $\Sn$ such that
\begin{compactenum}[1)]
\item 
$\{\sigma\simcl\colon\sigma\in T\}\in\Stone{\Lin{\sig}}$;
\item
for all sentences $\sigma$ and $\tau$ of $\sig$,
if $\sigma\in T$ and $\sigma\sim\tau$, then $\tau\in T$;
\item
for all singulary formulas $\phi(x)$ of $\sig$,
for some closed term $t$ of $\sig$, \eqref{eqn:Ex} holds.
\end{compactenum}
Then $T$ is a complete theory with a canonical model.
\end{theorem}

We now establish what amounts to a special case of this.
Supposing $\mathscr A$
is an indexed family $(\str A_i\colon i\in {\Omega})$  
of structures with a common signature $\sig$,
we let
\begin{equation}\label{eqn:A=}
  A=\prod_{i\in\Omega}A_i.
\end{equation}
An element $(a_i\colon i\in\upomega)$ of $A$ may be written just as $a$.
Understanding $A$ as a set of new constants,
we can expand each $\str A_i$ to a structure $\str A_i{}^*$
in the signature $\sig(A)$
so that, for each $a$ in $A$,
\begin{equation}\label{eqn:A*}
  a^{\str A_i{}^*}=a_i.
\end{equation}
Given $\sigma$ in $\Sn[\sig(A)]$, we define
\begin{equation*}
  \bv{\sigma}=\{i\in\Omega\colon\str A_i{}^*\models\sigma\}.
\end{equation*}
We combine the $\str A_i$ into a single structure as follows.
The usual reference for the theorem 
is \L o\'s's 1955 paper \cite{MR0075156},
although the theorem is not given clearly there.

\begin{theorem}[\L o\'s's Theorem]\label{thm:Los}
Suppose $\mathscr A$
is an indexed family
\begin{equation*}
(\str A_i\colon i\in {\Omega})
\end{equation*}
of nonempty structures of $\sig$, 
and $A$ is as in \eqref{eqn:A=}.
Let $\mathscr U$ be an ultrafilter of $\pow{\Omega}$.
There is an equivalence relation $E$ on $A$ given by
\begin{equation*}
  a\mathrel E b\iff\bv{a=b}\in\mathscr U.
\end{equation*}
By the Axiom of Choice\ac,
there is a structure $\str B$ of $\sig(A)$ with universe $A/E$, 
such that, for all $a$ in $A$,
\begin{equation*}
  a^{\str B}=\{b\in A\colon a\mathrel Eb\},
\end{equation*}
and for all $\sigma$ in $\Sn[\sig(A)]$,
\begin{equation}\label{eqn:Bsbv}
  \str B\models\sigma\iff\bv{\sigma}\in \mathscr U.
\end{equation}
\end{theorem}


\begin{proof}
We have
  \begin{gather}\label{eqn:cap}
    \bv{\sigma\land\tau}=\bv{\sigma}\cap\bv{\tau},\\\label{eqn:comp}
\bv{\lnot\sigma}=\Omega\setminus\bv{\sigma}.
  \end{gather}
Let
\begin{equation}\label{eqn:T}
  T=\{\sigma\in\Sn[\sig(A)]\colon\bv{\sigma}\in \mathscr U\}.
\end{equation}\sloppy
By Theorem~\ref{thm:uf} (page \pageref{thm:uf}),
$T\modsim$ is an ultrafilter of $\Lin{\sig(A)}$, since
\begin{align*}
\sigma\in T\And\tau\in T
&\implies\bv{\sigma}\in\mathscr U\land\bv{\tau}\in\mathscr U
&&\text{[by \eqref{eqn:T}]}\\
&\implies\bv{\sigma}\cap\bv{\tau}\in\mathscr U
&&\text{[Thm \ref{thm:uf}]}\\
&\implies\bv{\sigma\land\tau}\in\mathscr U
&&\text{[by \eqref{eqn:cap}]}\\
&\implies\sigma\land\tau\in T
&&\text{[by \eqref{eqn:T}]}  
\end{align*}
and
\begin{align*}
  \sigma\in\Sn[\sig(A)]\setminus T
&\iff\bv{\sigma}\in\pow{\Omega}\setminus\mathscr U
&&\text{[by \eqref{eqn:T}]}\\
&\iff\Omega\setminus\bv{\sigma}\in\mathscr U
&&\text{[Thm \ref{thm:uf}]}\\
&\iff\bv{\lnot\sigma}\in\mathscr U
&&\text{[by \eqref{eqn:comp}]}\\
&\iff\lnot\sigma\in T
&&\text{[by \eqref{eqn:T}].}
\end{align*}
Moreover, for every $\psi$ in $\Fm[\{x\}]{\sig(A)}$,
by the Axiom of Choice,\ac\
we can find $a$ in $A$ such that,
for each $i$ in $\Omega$, 
\begin{equation}\label{eqn:A_i}
  \str A_i\models\Exists x\psi\iff\str A_i\models\psi(a_i).
\end{equation}
Then
\begin{equation*}
  \bv{\Exists x\psi}=\bv{\psi(a)},
\end{equation*}
so $T$ is as in the previous theorem.
\end{proof}

The structure $\str B$ found in the theorem 
is an \textbf{ultraproduct} of the indexed family 
$\mathscr A$ or $(\str A_i\colon i\in\Omega)$
and can be denoted by one of
\begin{align}\label{eqn:up}
&\prod\mathscr A/\mathscr U,&
&\prod_{i\in\Omega}\str A_i/\mathscr U.
\end{align}
We may also denote an equivalence class $\{b\in A\colon a\mathrel Eb\}$ by
  \begin{equation*}
    a/\mathscr U.
  \end{equation*}
If $\mathscr U$ is merely a filter of $\pow{\Omega}$,
the quotient in \eqref{eqn:up} is still defined,
but is called a \textbf{reduced product} of the indexed family.

In the proof of \L o\'s's Theorem, we need the Axiom of Choice
only in the last step, involving quantifiers.
If the ultrafilter $\mathscr U$ is principal,
namely $\{X\in\pow{\Omega}\colon i\in X\}$ for some $i$ in $\Omega$,
then $\str B\cong\str A_i{}^*$.
Thus \L o\'s's Theorem by itself
does not imply even the Boolean Prime Ideal Theorem.
However, these two theorems together
imply the Axiom of Choice 
(Theorem \ref{thm:PI-L-AC}, page \pageref{thm:PI-L-AC}).

Meanwhile, we can formulate the Compactness Theorem
as a weaker version of \L o\'s's Theorem
(with the Boolean Prime Ideal Theorem):

\begin{theorem}[Compactness]\label{thm:Los-comp}
Suppose $\Gamma$ is a set of sentences of a signature $\sig$,
and every finite subset $\Delta$ of $\Gamma$ has a model $\str A_{\Delta}$.
Let
\begin{align*}
  \mathscr A&=(\str A_{\Delta}\colon\Delta\in\powf{\Gamma}),&
A&=\prod_{\Delta\in\powf{\Gamma}}A_{\Delta}.
\end{align*}
There is a complete theory of $\sig(A)$ that includes $\Gamma$,
namely
\begin{equation*}
  \{\sigma\in\Sn[\sig(A)]\colon\bv{\sigma}\in\mathscr U\},
\end{equation*}
where $\mathscr U$ is an ultrafilter of $\pow{\powf{\Gamma}}$
that contains each of the sets
\begin{equation*}
 \{\Delta\in\powf{\Gamma}\colon\sigma\in\Delta\},
\end{equation*}
where $\sigma\in\Gamma$.
\end{theorem}

\begin{proof}
  The indicated theory is the theory of the ultraproduct
$\prod\mathscr A/\mathscr U$.
\end{proof}




\section{Cardinality}

By the theorem below, a non-principal ultrapower $\str C$ of a
countably infinite structure $\str A$ is uncountable.  By the Downward
L\"owenheim--Skolem Theorem (page \pageref{thm:dLST}), 
in a countable signature, 
there will then be a countable structure $\str B$ such that 
\begin{equation*}
\str A\prec\str B\prec C.
\end{equation*}
Indeed, $\str B$ may be chosen to include $A\cup\{x\}$ 
for some $x$ in $C\setminus A$.  
Even though $\str A$ is then a proper substructure of $\str B$, 
these two may be isomorphic.  
However, this is not the case when $\str A$ is $(\N,+,\cdot)$.  
Thus \emph{countable
non-standard models of arithmetic} exist.  
A more illuminating
construction of such models is given in \S\ref{sect:Skolem} below.   

The following is a special case of \cite[Thm 9.5.4(a)]{MR94e:03002} 
(and is said to be found in Frayne, Morel, and Scott \cite{MR0142459}%%%%%
\footnote{I have a printout of this article, 
but have not sorted through all of its many basic results to find this one.  
It should be noted that the article has a correction' \cite{MR0154807}, 
which merely refines the account of Tarski's contribution to the subject 
(as well as taking some of the credit away from Frayne).}%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
).  

\begin{theorem}\label{thm:card}
For all signatures $\sig$, for all $\str A$ in $\Str$, 
for all singulary formulas $\phi$ of $\sig(A)$, 
for all non-principal ultrafilters $\mathscr U$ of $\pow{\upomega}$,  
\begin{equation*}
\upomega\leq\card{\phi(\str A)}
\implies\card{\phi(\str A^{\upomega}/\mathscr U)}=\card{\phi(\str A)}^{\upomega}.
\end{equation*}
In particular, if $\str A$ is countable, then all infinite definable
relations of $\str A^{\upomega}/\mathscr U$ have the cardinality of the
continuum. 
\end{theorem}

\begin{proof}
Suppose $a\in A^{\upomega}$
and $a/\mathscr U\in\phi(\str A^{\upomega}/\mathscr U)$.
then by \L o\'s's Theorem 
\begin{equation*}
\bv{\phi(a)}\in\mathscr U.
\end{equation*}
Then we may assume $\bv{\phi(a)}=\upomega$.
That is, we can find $a'$ in $\phi(\str A)^{\upomega}$ 
such that $a/\mathscr U=a'/\mathscr U$.  
By the Axiom of Choice\ac\ then,
there is an injection $a/\mathscr U\mapsto a'$ 
from $\phi(\str A^{\upomega}/\mathscr U)$ to $\phi(\str A)^{\upomega}$.  
This shows 
\begin{equation*}
\card{\phi(\str A^{\upomega}/\mathscr U)}\leq\card{\phi(\str A)}^{\upomega}.
\end{equation*}
For the reverse inequality when $\phi(\str A)$ is infinite, 
it is enough to find a function $a\mapsto a^*$ 
from $\phi({\str A})^{\upomega}$ to itself such that 
\begin{equation*}
a\neq b\implies a^*/\mathscr U\neq b^*/\mathscr U,
\end{equation*}\sloppy
so that $a\mapsto a^*/\mathscr U$ 
will be an embedding 
of $\phi(\str A)^{\upomega}$ in $\phi(\str A^{\upomega}/\mathscr U)$.
We want
\begin{equation*}
a\neq b\implies\bv{a^*\neq b^*}\in\mathscr U.
\end{equation*}
Now, $a\neq b$ means $a_m\neq b_m$ for some $m$ in $\upomega$.  
For each $m$ in $\upomega$, we have $\upomega\setminus m\in\mathscr U$.
Thus it is enough if
\begin{equation*}
  a_m\neq b_m\implies\upomega\setminus m\included\bv{a^*\neq b^*},
\end{equation*}
that is,
\begin{equation}\label{eqn:ileqj}
  a_m\neq b_m\And m\leq n\implies a^*{}_n\neq b^*{}_n.
\end{equation}
For this, it is enough if, for each $n$ in $\upomega$,
$a^*{}n$ is an injective function of $(a_0,\dots,a_n)$.
So let $\mu_n$ be an injection from $\phi(\str A)^{n+1}$ to $\phi(\str A)$
(which exists because $\phi(\str A)$ is infinite),  
and define
\begin{equation*}
a^*{}_n=\mu_n(a_0,\dots,a_i).\qedhere
\end{equation*}
\end{proof}

Let us try to generalize this argument, replacing $\upomega$ with an
arbitrary infinite index-set $\Omega$.  
Instead of elements $m$ and $n$ of $\upomega$,
we work with elements $i$ and $j$ of $\Omega$.
We replace the element $\upomega\setminus m$ 
of the ultrafilter of $\pow{\upomega}$
with some element $X_i$ of the ultrafilter of $\pow{\Omega}$.
The old condition $m\leq n$ is now $j\in X_i$,
so that \eqref{eqn:ileqj} becomes 
\begin{equation*}
a_i\neq b_i\And j\in X_i\implies a^*{}_j\neq b^*{}_j,
\end{equation*}
and $(a_0,\dots,a_n)$, which is $(a_m\colon m\leq n)$,
becomes $(a_i\colon j\in X_i)$.
So $a^*{}_j$ should be an injective function of this.
As before, it is enough if the sets 
\begin{equation*}
\{i\in\Omega\colon j\in X_i\}
\end{equation*}
are finite.  
An ultrafilter of $\pow{\Omega}$
is called \textbf{regular} if it has such elements
$X_i$ for all $i$ in $\Omega$. 

\begin{theorem}
  There are regular ultrafilters of $\pow{\Omega}$ 
for every infinite set $\Omega$.
\end{theorem}

\begin{proof}
Let $\Omega$ be an infinite set.
Then $\Omega$ is equipollent with $\powf{\Omega}$.
So it is enough to show that there are regular ultrafilters 
of $\pow{\powf{\Omega}}$.  
To do this, if $i\in\powf{\Omega}$, we need only define 
\begin{equation*}
X_i=\{j\in\powf{\Omega}\colon i\included j\}.
\end{equation*}
Since $X_i\cap X_j=X_{i\cup j}$, the $X_i$ do generate a filter
of $\pow{\powf{\Omega}}$.  
The filter is proper, since $i\in X_i$, so none
of the $X_i$ is empty.  
Moreover, 
\begin{equation*}
\{i\in\powf{\Omega}\colon j\in X_i\}=\{i\in\powf{\Omega}\colon
i\included j\}=\pow j, 
\end{equation*}
which is finite.  
So there are regular proper filters, and hence
regular ultrafilters, of $\pow{\powf{\Omega}}$.  
\end{proof}

Hence, in Theorem \ref{thm:card}, 
$\upomega$ can be replaced with an arbitrary infinite set.

\section{Convergence of ultrafilters}

There are a number of equivalent formulations
of the definition of compactness of a topological space.%%%%%
\footnote{See for example  Willard \cite[Thm 17.4, p.~118]{MR0264581}.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
We shall use one of them to understand \L o\'s's Theorem
(Theorem \ref{thm:Los}, page \pageref{thm:Los}) 
more clearly as being a refinement of the Compactness Theorem
(as Theorem \ref{thm:Los-comp}, page \pageref{thm:Los-comp}),
given the model theory of the previous chapter
(and in particular the Tarski--Vaught Test (page \pageref{thm:TV}).

In a topological space,
an \textbf{open neighborhood} of a point
is an open set that contains the point.
Then a \textbf{neighborhood} of the point
is a set that includes an open neighborhood of the point.
For an arbitrary set $\Omega$, a \textbf{filter on} the set $\Omega$
is just a \textbf{filter of} the Boolean algebra $\pow{\Omega}$.


\begin{lemma}
The set of all neighborhoods of a point of a topological space
is a proper filter on the space.
\end{lemma}

A filter on a topological space 
\begin{itemize}
\item 
\textbf{clusters} at a point of the space
if the union of this filter with the filter of neighborhoods of the point
generates a proper filter
(that is, every set in the former filter
has nonempty intersection with every set of the latter filter);
\item
\textbf{converges} to a point of the space
if the filter includes the filter of neighborhoods of the point.
\end{itemize}
A point where a filter clusters is a \textbf{cluster point} of the filter.

A cluster point of a filter need not belong to the intersection of the filter.
For example, in $\R$, every point of the interval $[0,1]$
is a cluster point of the filter generated by $(0,1)$.

The \textbf{closure} of a subset of a topological space
is the smallest closed set that includes the subset.

\begin{lemma}
  A cluster point of a filter
belongs to the closure of every set in the filter.
\end{lemma}

\begin{theorem}
A topological space is compact if and only if
every proper filter on it has a cluster point.
\end{theorem}

\begin{proof}
  Let $(A,\tau)$ be a topological space.
Suppose first the space is compact, 
and let $\mathscr F$ be a proper filter on $A$.
Then $\mathscr F\cap\tau$ is closed under taking finite intersections,
and in particular all such intersections are nonempty,
so $\bigcap(\mathscr F\cap\tau)$ must contain a point $p$,
by the compactness of $\tau$.
In particular, if $F\in\tau$ and $p\notin F$, 
that is, if $A\setminus F$ is an open neighborhood of $p$,
then $F\notin\mathscr F$,
so $\mathscr F\cup\{A\setminus F\}$ generates a proper filter.

Now suppose conversely that every filter on $A$ has a cluster point,
and let $\mathscr F$ be a subset of $\tau$ 
whose every finite subset has nonempty intersection.
Then $\mathscr F$ generates a filter on $A$,
and this filter clusters at some point $p$.
In this case, by the lemma, $p\in\bigcap\mathscr F$.
\end{proof}

An ultrafilter with a cluster point converges to that point.
Thus, on a compact space, every ultrafilter converges.

\begin{theorem}
  By the Prime Ideal Theorem\PI,
if every ultrafilter on a topological space converges,
the space is compact.
\end{theorem}

We shall want to allow the possibility
that an ultrafilter on a \emph{subspace} of a topological space
converges to a point of the larger space.
For this, we can use the following observation.

\begin{lemma}
If $A$ and $B$ are sets, and $A\included B$,
and $\mathscr U$ is an ultrafilter on $A$,
then the filter on $B$ that $\mathscr U$ generates is
\begin{equation*}
  \{X\included B\colon X\cap A\in\mathscr U\},
\end{equation*}
and this is an ultrafilter on $B$.
\end{lemma}

In the situation of the lemma,
if $B$ is actually a topological space,
we may say that $\mathscr U$ \textbf{converges} to a point of $B$
if the ultrafilter on $B$ that $\mathscr U$ generates converges to the point.

\begin{theorem}\label{thm:BA-hom}
  Suppose $A$ and $B$ are Boolean algebras,
$f$ is a homomorphism from $A$ to $B$,
and $\mathscr U$ is an ultrafilter of $B$.
Then $f\inv[\mathscr U]$
is an ultrafilter of $A$.
\end{theorem}

\begin{proof}
  If $x$ and $y$ are in $f\inv[\mathscr U]$,
then $f(x\land y)=f(x)\land f(y)$, which is in $\mathscr U$,
so $x\land y\in f\inv[\mathscr U]$.
If $z\in A$, then, since $f(\lnot z)=\lnot f(z)$, we have
\begin{multline*}
  \lnot z\in f\inv[\mathscr U]
\iff\lnot f(z)\in\mathscr U
\iff f(z)\notin\mathscr U\\
\iff z\notin f\inv[\mathscr U].\qedhere
\end{multline*}
\end{proof}

\begin{sloppypar}
Now we can expand the theorem 
that Stone spaces of Boolean algebras are compact
by considering also \emph{subspaces} of Stone spaces.
\end{sloppypar}

\begin{theorem}
Suppose $A$ is a Boolean algebra,
$U\in\Stone A$,
$\Omega\included\Stone A$, and
$\mathscr U$ is an ultrafilter on $\Omega$.
Then $\mathscr U$ converges to $U$ if and only if, for all $x$ in $A$,
\begin{equation*}
  U\in[x]\iff[x]\cap\Omega\in\mathscr U.
\end{equation*}
The set
\begin{equation*}
  \{x\in A\colon[x]\cap\Omega\in\mathscr U\}
\end{equation*}
is an element of $\Stone A$,
and therefore $\mathscr U$ converges to this point.
\end{theorem}

\begin{proof}
  By the Stone Representation Theorem for Algebras (page \pageref{thm:Stone2}),
the map $x\mapsto[x]\cap\Omega$ from $A$ to $\pow{\Omega}$ 
is a homomorphism of Boolean algebras,
so by the last theorem, the given set is an ultrafilter $U$ of $A$.
Then
\begin{equation*}
  U\in[x]\iff x\in U\iff[x]\cap\Omega\in\mathscr U,
\end{equation*}
so $\mathscr U$ converges to $U$.
\end{proof}

Letting $\Omega$ be $\Stone A$ itself
(and assuming the Prime Ideal Theorem\PI),
we obtain a neat proof that $\Stone A$ is compact.
Similarly,
we shall obtain the Compactness Theorem from \L o\'s's Theorem.
In this context,
we take $A$ to be $\Lin{\sig}$
and $\Omega$ to be the image of $\St[0]{\sig}$ under $T\mapsto T\modsim$.

\begin{corollary}
By the Prime Ideal Theorem\PI,  
$\St[0]{\sig}$ is compact if and only if,
for every ultrafilter $\mathscr U$ on this space,
there is $\str B$ in $\Str$ such that,
for every $\sigma$ in $\Sn$,
\begin{equation}\label{eqn:BssU}
  \str B\models\sigma\iff[\sigma\simcl]\in\mathscr U.
\end{equation}
\end{corollary}

\begin{proof}
  $\str B\models\sigma\iff\Th{\str B}\modsim\in[\sigma\simcl]$.
\end{proof}

Now, each $T$ in $\St[0]{\sig}$ has a model $\str A_T$.
Thus we obtain an indexed family $(\str A_T\colon T\in\St[0]{\sig})$ 
of structures of $\sig$, and then we have
\begin{align*}
  [\sigma\simcl]
&=\{T\in\St[0]{\sig}\colon\sigma\in T\}\\
&=\{T\in\St[0]{\sig}\colon\str A_T\models\sigma\}\\
&=\bv{\sigma}.
\end{align*}
We now have that \eqref{eqn:BssU} 
is equivalent to \eqref{eqn:Bsbv} in \L o\'s's Theorem.
Therefore the compactness of $\St[0]{\sig}$ follows from this theorem
if we let
\begin{equation*}
  \str B=\prod_{T\in\St[0]{\sig}}\str A_T/\mathscr U.
\end{equation*}

Conversely, we shall derive \L o\'s's Theorem from the Compactness Theorem
and the Tarski--Vaught Test---and the Axiom of Choice.
Suppose $(\str A_i\colon i\in\Omega)$ 
is an indexed family
of nonempty structures of $\sig$.
We may assume that the map $i\mapsto\Th{\str A_i}$
from $\Omega$ to $\St[0]{\sig}$ is injective.
(Otherwise we could enlarge $\sig$ to contain a nullary predicate $P_i$ 
for each $i$ in $\Omega$,
and we could define $P_i$ to be true in $\str A_j$ if and only if $i=j$.)
Then we may assume $\Omega$ is a subset of $\St[0]{\sig}$.

We can define $A$ as in \eqref{eqn:A=} (on page \pageref{eqn:A=})
and expand each $\str A_i$ to a structure $\str A_i{}^*$ of $\sig(A)$ 
as in \eqref{eqn:A*}.
Now using the map $i\mapsto\Th{\str A_i{}^*}$,
we may assume $\Omega$ is a subset of $\St[0]{\sig(A)}$.
Suppose $\mathscr U$ is an ultrafilter on $\Omega$.
By the Compactness Theorem,
$\mathscr U$ 
converges to some point $\Th{\str C}$ of $\St[0]{\sig(A)}$.
This means \eqref{eqn:BssU} holds, when $\str B$ is $\str C$,
for all $\sigma$ in $\Sn[\sig(A)]$.

But the structure $\str C$ has a substructure $\str B$ 
whose universe $B$ is $\{a^{\str C}\colon a\in A\}$.
Indeed, for every positive integer $n$,
if $F$ is an $n$-ary operation symbol of $\sig$,
and $(a^i\colon i<n)\in A^n$, let
\begin{equation*}
  b=\bigl(F^{\str A_i}(a^j\colon j<n)\colon i\in\Omega\bigr).
\end{equation*}
Then $\str C\models Fa^0\cdots a^{n-1}=b$.
Thus $\str B$ is well defined and $\str B\included\str C$.

Let $\psi(x)$ be a singulary formula of $\sig(A)$,
and as in the proof of \L o\'s's Theorem,
using the Axiom of Choice\ac,
let $a$ in $A$ be such that, for each $i$ in $\Omega$, \eqref{eqn:A_i} holds.
Then
\begin{equation*}
  \str C\models\Exists x\psi\iff\str C\models\psi(a).
\end{equation*}
By the Tarski--Vaught Test (page \pageref{thm:TV}),
$\str B\preccurlyeq\str C$.
Then \eqref{eqn:BssU} holds as it is, which means \L o\'s's Theorem holds.

\section{Closed sets}

Another way to think about \L o\'s's Theorem and the Compactness Theorem
is as follows.
First note that,
by Corollary \ref{cor:comp-eq} (page \pageref{cor:comp-eq}),
for all signatures $\sig$,
the space $\St[0]{\sig}$ of complete theories of $T$ is compact
if and only if its image under $T\mapsto T\modsim$ 
is a closed subspace of $\Stone{\Lin{\sig}}$.

\begin{lemma}
  If $A$ and $B$ are sets, and $A\included B$,
and $\mathscr U$ is an ultrafilter on $B$,
then the set
\begin{equation*}
  \{X\cap A\colon X\in\mathscr U\}
\end{equation*}
is a filter on $A$,
and if it is a proper filter, it is an ultrafilter.
\end{lemma}

\begin{lemma}
  Suppose $(B,\tau)$ is a topological space, and $A\included B$.
  \begin{compactenum}
  \item 
$A\in\tau$ if and only if 
no ultrafilter on $A$ converges to a point of $B\setminus A$.
\item
Suppose further that $(B,\tau)$ is Hausdorff.
Then $A\in\tau$ if and only if every convergent ultrafilter on $A$
converges to a point of $A$.
  \end{compactenum}
\end{lemma}

\begin{proof}
  Suppose $\mathscr U$ is an ultrafilter on $A$
that converges to a point $p$ of $B\setminus A$.
For every open neighborhood $U$ of $p$,
we must have $U\cap A\in\mathscr U$,
and in particular $U\cap A\neq\emptyset$.
Thus $B\setminus A$ cannot be an open neighborhood of $p$,
so $A$ is not closed.

Conversely, if $A$ is not closed,
then $B\setminus A$ has a point $p$
whose every open neighborhood contains a point of $A$.
Let $\mathscr U$ be an ultrafilter on $B$
that includes the filter of neighborhoods of $p$.
Then $\{X\cap A\colon X\in\mathscr U\}$
is a proper filter on $A$ and therefore an ultrafilter,
but it converges to $p$.
\end{proof}

Thus $\St[0]{\sig}$ is compact
if and only if,
on its image under $T\mapsto T\modsim$,
every ultrafilter converges to an element of this image.
But \L o\'s's Theorem establishes this convergence,
as before.

\chapter{Applications}

\section{The Prime Ideal Theorem}

We establish now the mutual equivalence of the following.
\begin{compactenum}
\item 
The Boolean Prime Ideal Theorem (page \pageref{thm:BPI}).
\item 
The Prime Ideal Theorem (page \pageref{thm:PI}).
\item
The Tychonoff Theorem (page \pageref{thm:Tychonoff})
restricted to Hausdorff spaces (page \pageref{Hausdorff}).
\item
The Compactness Theorem (page \pageref{thm:comp-alg}).
\end{compactenum}

In 1954, Dana Scott \cite{Scott-1954}
announced that the Boolean Prime Ideal Theorem
implies the Prime Ideal Theorem.%%%%%
\footnote{Scott spoke at 
\enquote{the five hundred third meeting 
of the American Mathematical Society\dots 
held at Yosemite National Park on Saturday, May 1, 1954.}
Thanks to Wilfrid Hodges for giving me the reference,
which is not available on MathSciNet.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
It is not clear what proof he had in mind.
Since the Boolean Prime Ideal Theorem implies the Compactness Theorem 
(\pageref{thm:comp-alg}),
we can establish the result as follows.
The \emph{diagram} of a structure was defined on page \pageref{diag}.

\begin{theorem}\label{thm:comp-PI}
The Compactness Theorem implies the Prime Ideal Theorem.
%and in particular the Boolean Prime Ideal Theorem.
\end{theorem}

\begin{proof}
Let $\sig$ be the signature of commutative rings,
let $T$ be the theory of nontrivial commutative rings in this signature,
and let $\str R\models T$.
Let $P$ be a new singulary predicate.
Every finite subset $\Gamma$ of the collection
\begin{multline*}
T\cup\diag{\str R}\cup\{P0,\;\lnot P1\}\\
\cup\{Pa\land Pb\lto P(a-b)\colon a\in R\And b\in R\}\\
%\cup\{Pa\lto P(ab)\colon a\in R\And b\in R\}\\
\cup\{P(ab)\liff Pa\lor Pb\colon a\in R\And b\in R\}
\end{multline*}
of sentences of $\sig(R)\cup\{P\}$ has a model.
Indeed, suppose $A$ is the set of elements of $R$ appearing in $\Gamma$.
Then $A$ generates a finite sub-ring $\str B$ of $\str R$,
and by Theorem \ref{thm:fin-gen} (page \pageref{thm:fin-gen}),
$\str B$ has a maximal ideal $\mathfrak m$, 
which is prime by Corollary \ref{cor:max-prime} (page \pageref{cor:max-prime}).
Then $\str B$ expands to the model $(\str B_A,\mathfrak m)$ of $\Gamma$.
By Compactness,
the whole collection above has a model $(\str S_R,\mathfrak p)$,
where $\str S$ is a ring with prime ideal $\mathfrak p$,
and (by Theorem \ref{thm:emb-diag}, page \pageref{thm:emb-diag}),
$\str R$ is a sub-ring of $\str S$.
Then $R\cap\mathfrak p$ is a prime ideal of $\str R$.
\end{proof}

By Theorems \ref{mith} and \ref{thm:mit-ac} (page \pageref{mith}), 
the Axiom of Choice and the Maximal Ideal Theorem are equivalent.
By Theorems \ref{thm:Tychonoff} and \ref{thm:tych-ac} 
(page \pageref{thm:Tychonoff}),
the Axiom of Choice and the Tychonoff Theorem are equivalent.
We shall now establish that a weaker form of the Maximal Ideal Theorem,
namely the Prime Ideal Theorem,
is equivalent to a weaker form of the Tychonoff Theorem.
The remaining theorems of this section
are due to \L o\'s and Ryll-Nardzewski \cite{MR0048795,MR0065527}.

\begin{theorem}
  The Boolean Prime Ideal Theorem
implies the Tychonoff Theorem for Hausdorff spaces.
\end{theorem}

\begin{proof}
  \begin{sloppypar}
  Suppose $\mathscr A$
is an indexed family $(A_i\colon i\in\Omega)$ of nonempty Hausdorff spaces.
We first show that its product is nonempty.
Let
\begin{equation*}
  B=\bigcup_{\Gamma\included\Omega}\prod_{i\in\Gamma}A_i,
\end{equation*}
and if $j\in\Omega$, let
\begin{equation*}
  B_j=\bigcup_{\{j\}\included\Gamma\included\Omega}\prod_{i\in\Gamma}A_i,
\end{equation*}
The $B_j$ generate a proper filter on $B$,
since for all $n$ in $\upomega$, 
if $\sigma$ is an injection from $n$ into $\Omega$, then
\begin{equation*}
\emptyset\pincluded\prod_{i<n}A_{\sigma(i)}
\included B_{\sigma(0)}\cap\dots\cap B_{\sigma(n-1)}.
\end{equation*}
Using the Boolean Prime Ideal Theorem,
we let $\mathscr U$ be an ultrafilter that includes this filter.
We shall derive from this an ultrafilter on each $A_i$.
If $p\in A_i$, let
\begin{equation*}
  C_i(p)=\{a\in B_i\colon a_i=p\}.
\end{equation*}
Then for all $p$ and $q$ in $A_i$,
\begin{equation*}
  p\neq q\implies C_i(p)\cap C_i(q)=\emptyset.
\end{equation*}
Thus the map $X\mapsto\bigcup_{p\in X}C_i(p)$ 
from $\pow{A_i}$ to $\pow B$ is a homomorphism $h_i$ of Boolean algebras.
By Theorem \ref{thm:BA-hom} (page \pageref{thm:BA-hom}),
$h_i{}\inv[\mathscr U]$ is an ultrafilter $\mathscr U_i$ on $A_i$.
Since $A_i$ is compact,
$\mathscr U_i$ converges to a point of $A_i$;
since $A_i$ is also Hausdorff,
$\mathscr U_i$ converges to a \emph{unique} point $a_i$ of $A_i$.
Then $(a_i\colon i\in\Omega)\in\prod\mathscr A$.
  \end{sloppypar}

We finished our proof of the general Tychonoff Theorem
by noting that 
the product of nonempty closed subsets of the $A_i$ is nonempty.
To reach this point, we used Zorn's Lemma.
But when the $A_i$ are Hausdorff,
we need only the Boolean Prime Ideal Theorem.
Indeed, 
suppose now $\mathscr X$ is a family of closed subsets of $\prod\mathscr A$
with the finite intersection property.
Then $\mathscr X$ generates a proper filter on $\prod\mathscr A$,
and by Theorem \ref{thm:MI} (page \pageref{thm:MI}),
this filter is included in an ultrafilter $\mathscr U$.
(Note that this conclusion requires $\prod\mathscr A$ to be nonempty,
so that $\pow{\prod\mathscr A}$ is a nontrivial ring.)
For each $i$ in $\Omega$, the set
\begin{equation*}
  \{\uppi_i[X]\colon X\in\Omega\}
\end{equation*}
is an ultrafilter on $A_i$ (why?),
so it converges to some $a_i$, which is unique since $A_i$ is Hausdorff.
Then $(a_i\colon i\in\Omega)\in\bigcap\mathscr X$.
Therefore $\prod\mathscr A$ is compact.
\end{proof}

\begin{lemma}
  The Tychonoff Theorem for Hausdorff space
implies that, 
whenever $\mathscr A$ is a family $(A_i\colon i\in\Omega)$ 
of nonempty compact Hausdorff spaces,
and moreover there is a symmetric binary relation $E$ on $\bigcup_{i\in\Omega}A_i$
such that
\begin{itemize}
\item 
for all distinct $i$ and $j$ in $\Omega$,
the subset $\{(x,y)\in A_i\times A_j\colon x\mathrel Ey\}$ 
of $A_i\times A_j$ is closed,
and also, 
\item
for every finite subset $\Omega_0$ of $\Omega$,
for some $x$ in $\prod\mathscr A$,
for all distinct $i$ and $j$ in $\Omega_0$,
$x\mathrel Ey$,
\end{itemize}
then the latter condition holds when $\Omega_0=\Omega$.
\end{lemma}

\begin{proof}
  If $X\included\Omega$, let
  \begin{equation*}
    T(X)=\left\{x\in\prod\mathscr A\colon
\bigwedge_{\substack{\{i,j\}\included X\\i\neq j}}
x_i\mathrel Ex_j\right\}.
  \end{equation*}
By hypothesis, when $X$ is finite, then $T(X)$ is nonempty.
Moreover,
\begin{equation*}
  T(X)=\bigcap_{\substack{\{i,j\}\included X\\i\neq j}}T(\{i,j\}),
\end{equation*}
so this is closed.
An element of the intersection
\begin{equation*}
  \bigcap_{\substack{X\included\Omega\\\card X<\upomega}}T(X)
\end{equation*}
would be the desired element of $\prod\mathscr A$;
since this product is compact, the desired element exists.
\end{proof}

\begin{theorem}
  The Tychonoff Theorem for Hausdorff spaces
implies the Boolean Prime Ideal Theorem.
\end{theorem}

\begin{proof}
Let $R$ be a Boolean ring,
and let $\Omega$ be the set of finitely generated nontrivial sub-rings of $R$.
These will be just the nontrivial \emph{finite} sub-rings of $R$.
Then $(\spec[B]\colon B\in\Omega)$ 
is a family of nonempty compact Hausdorff spaces:
we have this without any special assumption,
by Theorem \ref{thm:fin-gen}, page \pageref{thm:fin-gen}.
Then $E$ is as in the hypothesis of the lemma when,
if $B$ and $C$ are distinct elements of $\Omega$,
and $\mathfrak p\in\spec[B]$ and $\mathfrak q\in\spec[C]$,
\begin{equation*}
  \mathfrak p\mathrel E\mathfrak q\iff\mathfrak p\cap C=\mathfrak q\cap B.
\end{equation*}
Let $(\mathfrak p_B\colon B\in\Omega)$ be as guaranteed by the lemma.
Then $\bigcup_{B\in\Omega}\mathfrak p_B$ is a prime ideal of $R$.
\end{proof}

\section{The Axiom of Choice}

A function $f$ on a set $A$ of nonempty sets
is a \textbf{choice function} if for all $b$ in $A$, $f(b)\in b$.
Then the Axiom of Choice is equivalent to the statement
that every set of nonempty sets has a choice function.

The following result was published by Howard in 1975 \cite{MR0384548}.

\begin{theorem}\label{thm:PI-L-AC}
The Boolean Prime Ideal Theorem
and \L o\'s's Theorem together imply the Axiom of Choice.
\end{theorem}

\begin{proof}
  Let $A$ be a set of nonempty sets that does not have a choice function.
Let $\Omega=\bigcup A\cup A$,%%%%%
\footnote{Howard notes 
that we may assume the elements of $A$ pairwise disjoint, 
and that we may assume $A$ and $\bigcup A$ are disjoint.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
and let
\begin{equation*}
R=\left\{(x,y)\in\bigcup A\times A\colon x\in y\right\}
\cup\left\{(x,x)\colon x\in\bigcup A\right\}.
\end{equation*}
Then
\begin{equation*}
  (\Omega,R)\models\Forall y\Exists xx\mathrel Ry.
\end{equation*}
The subsets of $A$ on which there \emph{is} a choice function
constitute a proper ideal $\mathscr I$ on $A$.
Let $\mathscr U$ be an ultrafilter on $\Omega$
that includes the dual filter $\{\Omega\setminus X\colon X\in\mathscr I\}$
on $A$.
Then
\begin{equation*}
  \prod_{i\in\Omega}(\Omega,R)/\mathscr U\models\Forall y\Exists xx\mathrel Ry.
\end{equation*}
In particular, 
for the element $\bigl((i,i)\colon i\in\Omega\bigr)$ of $\Omega^{\Omega}$,
there exists an element $(a_i\colon i\in\Omega)$ such that
\begin{equation*}
  \{i\in\Omega\colon a_i\mathrel R i\}\in\mathscr U.
\end{equation*}
Let $B=\{i\in A\colon a_i\mathrel Ri\}$.
Then $\{(i,a_i)\colon i\in B\}$ is a choice function on $B$.
However, by assumption, there is a choice function also on $A\setminus B$.
Hence there is a choice function on $A$.
\end{proof}

\section{Arrow's Theorem}

This section is inspired by Sasha Borovik's article
\cite{Borovik-infinitesimals}.  
We
consider an index-set $\Omega$ as a set of \emph{voters.}
Each voter
$i$ in $\Omega$
is called on to assign a linear ordering $<_i$ to a set $A$ of
\emph{candidates.}
These orderings are to be used to assign a linear
ordering $<$ to $A$.  
This ordering $<$ should be a kind of average of
the orderings $<_i$.  
This suggests that we should take an
ultraproduct of the structures $(A,<_i)$.  
We shall see that, on some
reasonable assumptions, we \emph{must} do this.

We want to determine $<$ by first selecting a subset $D$ of
$\pow{\Omega}$ such that, for all $x$ and $y$ in $A$, we shall be able
to require
\begin{equation*}
  \{i\colon x<_i y\}\in D\implies x<y.
\end{equation*}
So $D$ will be, so to speak, a collection of `winning coalitions'.  
If
$X\in D$, then the members of $X$ can determine how the candidates in
$A$ shall be ordered (if all members of $X$ agree).
Then we must have, first of all,
\begin{gather*}
  D\neq\emptyset,\\
  X\in D\implies X\comp\notin D.
\end{gather*}
We also require that additional votes for a
particular ordering can only help that ordering:
\begin{equation*}
  X\in D\And X\included Y\included\Omega\implies Y\in D.
\end{equation*}
Hence in particular $\Omega\in D$.
We require voting to be decisive:
\begin{equation*}
  X\notin D\implies X\comp\in D.
\end{equation*}
If $A$ consists of just two candidates, this is all we need.  
Then $D$
is not necessarily an ultrafilter on $\Omega$; for it need not be
closed under intersections.  
Indeed, in the `democratic' case, if
$\Omega$ has a finite number $2n-1$ of members, then $D$ will be
$\{X\in\pow{\Omega}\colon\card X\geq n\}$; this is definitely not
closed under intersections unless $n=1$.

But now suppose $A$ contains
three distinct candidates, $a$, $b$, and $c$; and let
\begin{align*}
  \{i\colon a<_ib\}&=A,&\{i\colon b<_ic\}&=B.
\end{align*}
Suppose both $A$ and $B$ are in $D$.
Then we must conclude $a<b$ and $b<c$ and therefore $a<c$.  
We have now
\begin{align*}
  A\cap B&\included\{i\colon a<_ic\}, & \{i\colon a<_ic\}&\in D.
\end{align*}
  However, possibly 
\begin{equation*}
  A\cap B=\{i\colon a<_ic\};
\end{equation*}
this is the case when---as is possible---
\begin{gather*}
  \{i\colon c<_ia<_ib\}=A\setminus B,\\
\{i\colon b<_ic<_ia\}=B\setminus A,\\
\{i\colon c<_ib<_ia\}=(A\cup B)\comp.
\end{gather*}
See Figure~\ref{fig:vote}.
\begin{figure}
\centering
%\psset{xunit=10mm,yunit=5mm}
  \begin{pspicture}(-3,-1)(3,1.5)
    \psellipse(-1,0)(2,1)
\psellipse(1,0)(2,1)
\rput(0,0){\makebox[0pt][c]{$a<b<c$}}
\rput(-2,0){\makebox[0pt][c]{$c<a<b$}}
\rput(2,0){\makebox[0pt][c]{$b<c<a$}}
\rput(0,1.3){\makebox[0pt][c]{$c<b<a$}}
  \end{pspicture}
  \caption{An election with three candidates}\label{fig:vote}
\end{figure}
Thus we must have $A\cap B\in D$.  
Therefore $D$ is an ultrafilter on
$\Omega$.  
If $\Omega$ is finite, then $D$ must be a principal
ultrafilter: that is, one voter decides everything, and the system is
a dictatorship.

\section{Completeness of theories}

Using the Compactness Theorem, 
we can establish a complement to Theorem \ref{thm:dLST} 
(page \pageref{thm:dLST}):

\begin{theorem}[Upward L\"owenheim--Skolem]\label{thm:uLST}
If $\str A$ is an infinite structure with signature $\sig$, 
and $\max(\card A,\card{\sigma})\leq\kappa$, 
then there is a structure $\str B$ such that
\begin{align*}
\str A&\preccurlyeq\str B,&\card B=\kappa.
\end{align*}
\end{theorem}

\begin{proof}
Let $C$ be a set $\{c_{\alpha}\colon\alpha<\kappa\}$ be a set of new constants, all distinct.  
By Compactness, the set
\begin{equation*}
\Th{\str A_A}\cup\{c_{\alpha}\neq c_{\beta}\colon \alpha<\beta<\kappa\}
\end{equation*}
of sentences has a model $\str D_{A\cup C}$.  
By construction, this model has cardinality at least $\kappa$.  
By the downward version of the theorem, $\str D$ has an elementary substructure $\str B$ of size $\kappa$ such that $A\included B$.  
Since also $\str A\preccurlyeq\str D$, the structure $\str B$ is as desired.
\end{proof}

This theorem yields an easy test for completeness of theories.  
For an infinite cardinal $\kappa$, a theory is \textbf{$\kappa$-categorical} if all of its models of size $\kappa$ are isomorphic to one another.

\begin{theorem}[\L o\'s--Vaught Test]
If a theory $T$ of a signature $\sig$ has models, but no finite models; $\card{\sig}\leq\kappa$; and $T$ is $\kappa$-categorical; then $T$ is complete.
\end{theorem}

\begin{proof}
If $T$ contains neither $\sigma$ nor $\lnot\sigma$, then both $T\cup\{\lnot\sigma\}$ and $T\cup\{\sigma\}$ have models, which must be infinite.  
Then by the L\"owenheim--Skolem--Tarski theorems (both upward and downward forms may be needed), each of the two sets has a model of cardinality $\kappa$; but these two models cannot be isomorphic to one another.
\end{proof}

Algebraically closed fields are defined on page \pageref{alg}.

\begin{theorem}\label{thm:ACF_0}
\mbox{}
\begin{itemize}
\item
The theory of algebraically closed fields of characteristic $0$ is complete.
\item
For all primes $p$, the theory of algebraically closed fields of characteristic $p$ is complete.
\end{itemize}
\end{theorem}

\begin{proof}
None of these theories has no finite models.  
Every algebraically closed field is determined up to isomorphism by its characteristic and its transcendence-degree.  
If $\kappa$ is uncountable, 
then a field with tran\-scendence-degree $\kappa$ has cardinality $\kappa$.  
Now the \L o\'s--Vaught Test applies.
\end{proof}

Similarly we have the following (see page \pageref{mc} above):

\begin{theorem}\label{thm:ACF}
The theory of algebraically closed fields is model-com\-plete.
\end{theorem}

\begin{proof}
If $T$ is this theory, $K\models T$, and $\card K<\kappa$, then $T\cup\diag K$ is $\kappa$-categorical, but has no finite models.
\end{proof}

We can also now prove the converse of the lemma on page \pageref{lem:TA} above. 

\begin{theorem}\label{thm:TA}
For all theories $T$, the models of $T_{\forall}$ are precisely the substructures of models of $T$.
\end{theorem}

\begin{proof}
Assuming $\str A\models T_{\forall}$, we want to show $T\cup\diag{\str A}$ has a model.  
By Compactness, and since $\diag{\str A}$ is closed under conjunction, it is enough to show $T\cup\{\theta(\vec a)\}$ has a model whenever $\theta$ is a quantifier-free formula of $\sig$ and $\str A\models\theta(\vec a)$.  
If it has no model, then $T\proves\lnot\theta(\vec a)$, so (since no entry of $\vec a$ is in $\sig$) $T\proves\Forall{\vec x}\lnot\theta(\vec x)$, and therefore $\str A\models\Forall{\vec x}\lnot\theta(\vec x)$, which is absurd.
\end{proof}

In particular, when $T$ is just field-theory,\label{id} 
then $T_{\forall}$ is the theory of integral domains,
by Corollary \ref{cor:ID} (page \pageref{cor:ID}).

\section{Elementary classes}

In \cite{MR0075156} \L o\'s defined ultraproducts (but not by that
name) in order to state the following algebraic test for being an
elementary class of structures.

\begin{theorem}
A subclass of $\Str$ is elementary if and only if it contains:
\begin{compactitem}
\item
 every structure that is elementarily equivalent to a member, and
 \item
 every ultraproduct of members.
 \end{compactitem}
\end{theorem}

\begin{proof}
The `only if' direction is the easier.  
An elementary class is the
class of models of some theory $T$.  
If the class is $\mathcal K$, and
$\str A\in\mathcal K$, and $\str A\equiv\str B$, then $\str B\models
T$, so $\str B\in\mathcal K$.  
If $\{\str A_i\colon
i\in\Omega\}\included\mathcal K$, then $\str A_i\models T$ in each
case, so every ultraproduct of the $\str A_i$ is a model of $T$, by \L
o\'s's Theorem.

The more difficult direction is `if'.  
Suppose $\mathcal K$ is a
non-elementary subclass of $\Str$.  
Then there is a model $\str B$ of
$\Th{\mathcal K}$ that does not belong to $\mathcal K$.  
However,
every element $\sigma$ of $\Th{\str B}$ has a model in $\mathcal K$,
since otherwise $\lnot\sigma$ would be in $\Th{\mathcal K}$.
Therefore every finite subset $\Delta$ of $\Th{\str B}$ has a model
$\str A_{\Delta}$ in $\mathcal K$ (since otherwise the negation of the
conjunction of the members of $\Delta$ would be in $\Th{\mathcal K}$).
By (the proof of) the Compactness Theorem, some ultraproduct of $(\str
A_{\Delta}\colon\Delta\in\powf{\Th{\str B}})$ is elementarily
equivalent to $\str B$. 
\end{proof}


\section{Saturation}

If $V$ is a finite set of variables,
a \textbf{$V$-type} is just a subset of $\Fm[V]{\sig}$.
A $V$-type is \textbf{complete}
if its image in $\Lin[V]{\sig}$ under $\phi\mapsto\phi\simcl$ is an ultrafilter.
Usually $V=\{x_0,\dots,x_{n-1}\}$,
and then $V$-types are called $n$-types.
In this case,
a subset $\Gamma$ of $\Fm[V]{\sig}$ is a complete type
if and only if
\begin{itemize}
\item
for all $\phi$ in $\Fm[V]{\sig}$, 
exactly one of $\phi$ and $\lnot\phi$ is in $\Gamma$, and
\item
for all finite subsets $\{\phi_0,\dots,\phi_{m-1}\}$ of $\Gamma$,
there is a model of
\begin{equation*}
  \Exists{x_0}\cdots\Exists{x_{n-1}}(\phi_0\land\dots\land\phi_{n-1}).
\end{equation*}
\end{itemize}
If $\str M\in\Str$, and $A$ is a subset of $M$, 
then,
slightly generalizing the notation introduced on page \pageref{A_A},
we denote by
\begin{equation*}
  \str M_A
\end{equation*}
the structure $\str M$, expanded in the obvious way to the signature $\sig(A)$.
An $n$-type $\Gamma$ of $\sig(A)$ is \textbf{consistent with} $\str M$ if,
for all finite subsets $\{\phi_0,\dots,\phi_{m-1}\}$ of $\Gamma$,
\begin{equation*}
\str M\models\Exists{x_0}\cdots\Exists{x_{n-1}}(\phi_0\land\dots\land\phi_{n-1}),
\end{equation*}
that is,
\begin{equation*}
(\Exists{x_0}\cdots\Exists{x_{n-1}}(\phi_0\land\dots\land\phi_{n-1}))
\in\Th{\str M_A}.
\end{equation*}
In this case, by Compactness,
if $\vec c$ is an $n$-tuple $(c_i\colon i<n)$ of new constants,
then there is a model $\str N$ 
of $\Th{\str M_M}\cup\{\phi(\vec c)\colon\phi\in\Gamma\}$.
Then we may assume $\str M_M\included\str N\restriction\sig(M)$,
and then
\begin{equation*}
  \str M\preccurlyeq\str N\restriction\sig.
\end{equation*}
We say $\Gamma$ is \textbf{realized} in $\str N\restriction\sig$
by $(c_0{}^{\str N},\cdots,c_{n-1}{}^{\str N})$.

If $\str M$ is considered as fixed,
we may denote by
\begin{equation*}
\St[n]A
\end{equation*}
the set of all complete $n$-types of $\sig(A)$ 
that are consistent with $\str M$.
The elements of $A$ are the \textbf{parameters} of elements of $\St[n]A$.

\begin{sloppypar}
For every infinite cardinal $\kappa$, 
a structure is called \textbf{$\kappa$-saturated} 
if it realizes every type that is consistent with it
and that has fewer than $\kappa$-many parameters.  
In particular, a structure is \textbf{$\upomega_1$-saturated} 
or \textbf{$\aleph_1$-saturated} 
if it realizes all types in countably many parameters.
\end{sloppypar}

\begin{theorem}\label{thm:sat}\sloppy
For every structure $\str A$ with a countable signature, every non-principal ultrapower $\str A^{\upomega}/P$ of $\str A$ is $\upomega_1$-saturated.
\end{theorem}

\begin{proof}
If $\Phi$ is a type in countably many parameters, then $\Phi$ itself is countable, so we can write it as $\{\phi_n\colon n\in\upomega\}$.  
Let $\vec a_n$ satisfy $\phi_0\land\dots\land\phi_n$ in $\str A$.  
Then
\begin{equation*}
k\leq n\implies\str A\models\phi_k(\vec a_n).
\end{equation*}
Therefore, if $P$ is a non-principal prime ideal of $\pow{\upomega}$, then $(\vec a_n\colon n\in\upomega)/P$ realizes $\Phi$ in $\str A^{\upomega}/P$.
\end{proof}

There is a version \cite[Thm 6.1.1, p.\ 384]{MR91c:03026} of the foregoing for uncountable index-sets (or exponents) $\Omega$; but then $P$ must have a countable subset whose union is $\Omega$ (so one should show that such prime ideals can be found).


\section{A countable non-standard model of arithmetic}\label{sect:Skolem}

\begin{sloppypar}
By \textbf{arithmetic} we mean the theory of $(\upomega,+,\cdot)$ or
of $(\upomega,+,\cdot,0,1,\leq)$; it makes little difference, since
\begin{compactenum}[1)]
\item  
$\leq$ is definable in $(\upomega,+,\cdot)$ by the formula $\Exists
  zx+z=y$,
\item
$\{0\}$ is definable by $\Forall yy+x=y$,
\item
$\{1\}$ is definable by $0<x\land\Forall y(0=y\lor x\leq y)$.
\end{compactenum}
Similarly $\{n\}$ is definable in $(\upomega,+,\cdot)$ for all $n$ in
$\upomega$.
\end{sloppypar}

Every ultrapower of $(\upomega,+,\cdot)$ is a model of arithmetic.
Every \emph{non-prin\-cipal} ultrapower $\str B$ (determined by a
non-principal ultrafilter $F$ on $\upomega$) is a
\emph{non-standard} model of arithmetic,
in the sense that it is not isomorphic to $(\upomega,+,\cdot)$, but
contains an infinite element $c$.  
However, $\str B$ here must be
uncountable by Theorem~\ref{thm:card}.  
As we noted before this
theorem, by the Downward L\"owenheim--Skolem--Tarski Theorem (Theorem
\ref{thm:dLST}), we can obtain a countable elementary substructure
$\str A$ of $\str B$ that includes $\upomega\cup\{c\}$, and then $\str
A$ will be an elementary extension of $(\upomega,+,\cdot)$. 

We can construct such a structure $\str A$ more directly as follows.
Let $A$ be the set of \emph{$0$-definable} singulary operations of
$(\upomega,+,\cdot)$.  
This means $f\in A$ if and only if the relation
$\{(x,f(x))\colon x\in\upomega\}$ is $0$-definable (that is, definable
without parameters).  
We can consider
$A$ as a subset of $\upomega^{\upomega}$.  
Then a constant sequence
$(x,x,x,\dots)$ should be understood as the constant function
$\{(n,x)\colon n\in\upomega\}$ or $n\mapsto x$, which is in $A$.  
Thus
the diagonal map 
embeds $\upomega$ in $A$.  
Also $A$ is closed under $+$ and $\cdot$.
Therefore $A$ is the universe of a substructure $\str A$ of $\str B$.  
Also,
if $n\in\upomega$, and $\phi$ is an $(n+1)$-ary formula, and $\vec
f$ is an element $(f^0,\dots,f^{n-1})$ of $A^n$, then $A$ has an
element $g$ such that for all $i$ in 
$\upomega$, 
\begin{equation*}
(\upomega,+,\cdot)\models\Exists y\phi(\vec f(i),y)\iff (\upomega,+,\cdot)\models \phi(\vec f(i),g(i)).
\end{equation*}
Indeed, $g$ can be such that $g(i)$ is the \emph{least} $b$ such that
\begin{equation*}
(\upomega,+,\cdot)\models\phi(\vec f(i),b), 
\end{equation*}
if such $b$ exist; and
otherwise $g(i)=0$.  
Then $g$ is defined by the formula 
\begin{multline*}
(\phi(\vec f(x),y)\land(\Forall z(\phi(\vec f(x),z)\lto y\leq
  z)))\\
\lor(\Forall z\lnot\phi(\vec f(x),z)\land y=0). 
\end{multline*}
It follows by the Tarski--Vaught Test (page \pageref{thm:TV}) that
\begin{equation*}
  \str A\preccurlyeq\str B;
\end{equation*}
therefore, since $(\upomega,+,\cdot)\included\str A$, we have
\begin{equation*}
  (\upomega,+,\cdot)\prec\str A.
\end{equation*}
Indeed, we now have that the following are equivalent:
\begin{gather*}
  \str B\models\Exists y\phi(\vec f,y),\\
\{i\colon(\upomega,+,\cdot)\models\Exists y\phi(\vec f(i),y)\}\in F,\\
\{i\colon(\upomega,+,\cdot)\models\phi(\vec f(i),g(i))\}\in F,\\
  \str B\models\phi(\vec f,g).
\end{gather*}
Now the Tarski--Vaught Test applies.  
This construction of $\str A$ is
apparently due to Skolem.%%%%%
\footnote{I take it from Bell and Slomson \cite[Ch.\ 12, \S2]{MR0269486}.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\chapter{Completeness of proof systems}\label{ch:complete}

%SOME DETAILS IN THIS CHAPTER HAVE NOT BEEN SATISFACTORILY WORKED OUT!!!
Recall from page \pageref{valid}
that a sentence true in all structures of its signature 
is called \emph{valid.}
It is easy in principle to show that a sentence is \emph{not} valid: 
just exhibit a model of its negation.
But if a sentence \emph{is} valid, how can we show this?
We cannot simply verify the sentence in each structure of its signature, 
since there will be infinitely many of these structures,
and even to verify a universal sentence $\Forall x\phi$ 
in \emph{one} infinite structure
requires checking infinitely many individual cases.  

The method of \emph{formal proof} 
is a way to establish the validity of sentences.

We shall develop a \emph{proof-system}
in which every provable sentence is valid.
That is the easy part.
The harder part is to show that, 
if a sentence is not provable in our system,
then its negation has a model.
Equivalently, every validity will have a formal proof in the proof-system.
This result is \emph{G\"odel's Completeness Theorem.} 
A model of the negation of an unprovable sentence
can be obtained as an ultraproduct,
which is why we consider the whole subject here.

Recall again from page \pageref{valid} 
that a sentence with no models is a \emph{contradiction.}
In our proof-system, there will be a notion of proving some sentences
with the use of other sentences as hypotheses.
A set of sentences from which a contradiction cannot be proved
is \textbf{consistent} (with respect to the proof-system).
G\"odel's methods generalize to show that,
at least in countable signatures,
every consistent set of sentences has a model.
The Compactness Theorem for countable signatures 
is a corollary of this result.
Indeed, if a set $\Gamma$ of sentences has no model,
then by G\"odel's Completeness Theorem
a contradiction can be proved from $\Gamma$.
But proofs are finite,
and so a contradiction can be proved 
from a finite subset $\Gamma_0$ of $\Gamma$,
and therefore $\Gamma_0$ has no model.

\section{Formal proofs}

It will be convenient to work,
not only with sentences,
but with arbitrary formulas.
A \textbf{formal proof} is just a (finite) list of formulas 
such that each formula on the list is either
\begin{compactenum}[1)]
\item
an \emph{axiom,} or
\item
derivable from formulas earlier in the list 
by means of a \emph{rule of inference.}
\end{compactenum}
We choose the axioms and rules of inference to serve our needs; 
taken all together, they constitute a \textbf{proof-system.}  
In a formal proof in such a system,
the last formula is then said to be \textbf{provable} in the system,
or to be a \textbf{theorem} of the system.  
Note that in fact \emph{every} formula in a formal proof is provable, 
because every initial segment of a formal proof is still a formal proof.

A proof-system is \textbf{sound}
if each of its theorems that is a sentence is valid;
\textbf{complete,} if each validity is a theorem.
In his doctoral dissertation of 1930, 
G\"odel \cite{Goedel-compl} defined a sound proof-system, 
obtained from the \emph{Principia Mathematica} \cite{PM} 
of Russell and Whitehead,
and showed that it was complete.
 
In formulas as defined on page \pageref{subsect:formulas},
the logical symbols that can appear are 
$=$, $\lnot$, $\lor$, $\land$, $\lto$, $\liff$, $\Exists$, $\Forall$, 
variables, and parentheses.
(The other symbols come from the signature being used.)
In fact we do not need $\land$, $\lto$, $\liff$ and $\Exists$,
but can understand them as abbreviations:
\begin{align*}
  \phi\land\psi&\text{ for }\lnot(\lnot\phi\lor\lnot\psi),\\
  \phi\lto\psi&\text{ for }\lnot\phi\lor\psi,\\
  \phi\liff\psi&
\text{ for }\lnot(\lnot\phi\lor\lnot\psi)\lor\lnot(\phi\lor\psi),\\
  \Exists x\phi&\text{ for }\lnot\Forall x\lnot\phi.
\end{align*}

The first four of G\"odel's axioms, or rather \emph{schemes} of axioms, 
are found on page 13, Chapter 1, of the \emph{Principia Mathematica.}
Recall that, by our convention on symbolic precedence 
given on page \pageref{precedence}, $\lor$ takes precedence over $\lto$, 
and of two instances of $\lto$, the one on the right takes precedence.%%%%%
\footnote{For Russell and Whitehead,
the \emph{primitive} Boolean connectives are $\lor$ and $\lnot$; 
the expression $\phi\lto\psi$ can then be understood 
as an abbreviation of $\lnot\phi\lor\psi$.
As G\"odel notes, after the first four axioms,
there was a fifth, 
namely $\phi\lor(\psi\lor\chi)\lto\psi\lor(\phi\lor\chi)$,
but Bernays showed it to be redundant.  
For us, each of the four axioms 
represents infinitely many axioms, 
since $\phi$, $\psi$, and $\chi$ can be any formulas.  
It should be noted that Russell and Whitehead 
were involved in \emph{creating} formal logic; 
in their time,
our way of understanding formulas was not yet fully developed.  
For an amusing fictionalized account of Russell's interactions with G\"odel, 
see \emph{Logicomix} \cite{Logicomix}.}  
%%%%%%%%%%%%%%%%%%%%%%%%%%
By this convention then, the four axiom schemes are as follows.
\begin{compactenum}[1)]\label{4axioms}
\item
$\phi\lor\phi\lto\phi$,
\item
$\phi\lto\phi\lor\psi$,
\item
$\phi\lor\psi\lto\psi\lor\phi$,
\item
$(\phi\lto\psi)\lto\chi\lor\phi\lto\chi\lor\psi$.
\end{compactenum}
\sloppy
The remaining axiom schemes involve variables explicitly.
Given a formula $\phi$ and variables $x$ and $y$,
we use the expression
\begin{equation*}
  \phi^x_y
\end{equation*}
\fussy
to denote the result of replacing 
every free occurrence of $x$ in $\phi$ with $y$.
We say that $y$ is \textbf{substitutable} for $x$ in $\phi$
if there is no subformula $\Forall y\psi$ of $\phi$
in which there is an occurrence of $x$ 
that is free as an occurrence in $\phi$.
For example, suppose $\phi$ is $\Forall y(x\neq y\lto x=y)$,
which is false when $x\neq y$.
Then $\phi^x_y$ is $\Forall y(y\neq y\lto y=y)$, which is valid;
but $y$ is not substitutable for $x$ in $\phi$.

Two of G\"odel's remaining axioms are found
in Chapter 9 of the \emph{Principia Mathematica} 
(at $*$9.2 and $*$9.25, pp.~138--40).%%%%%
\footnote{G\"odel's own reference 
is to the \emph{Principia Mathematica's} Chapter 10,
where the axioms are repeated,
at $*$10.1 and $*$10.12, pp.~145--6.
G\"odel's six axioms 
used \emph{propositional variables} 
where I put $\phi$, $\psi$, and $\chi$, 
and they used a \emph{functional variable} where I put $\theta$.  
Then in addition to the rules of inference given below, 
there was a rule
allowing propositional and functional variables 
to be replaced by \emph{formulas} in our sense.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
	\begin{compactenum}[1)]\setcounter{enumi}4
	\item\label{ax:spec}
$\Forall x\phi\lto\phi$.%%%%%
\footnote{G\"odel gives a stronger form: 
$\Forall x\phi\lto\phi^x_y$, if $y$ is substitutable for $x$ in $\phi$,
but we do not need it: see Theorem \ref{thm:AE}.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item\label{ax:a-or}
$\Forall x(\theta\lor\phi)\lto\theta\lor\Forall x\phi$, 
if $x$ does not occur freely on $\theta$.
\end{compactenum}
Another axiom scheme involves a \textbf{change of bound variable:}
\begin{compactenum}[1)]\setcounter{enumi}6
\item 
$\phi\to\phi'$, where $\phi$ has a subformula $\psi$
in which a variable $x$ does not occur freely,
and there is a variable $y$ not occurring in $\psi$ at all,
and $\phi'$ is the result 
of replacing each occurrence of $x$ in $\psi$ with $y$.
\end{compactenum}
Equality is treated in two axiom schemes, 
found in Chapter 13 of the \emph{Principia Mathematica} 
(at $*$13.15 and $*$13.101, pp.\ 177--8):
\begin{compactenum}[1)]\setcounter{enumi}7
\item
$x=x$,
\item
$x=y\lto\phi\lto\phi^x_y$, if $y$ is substitutable for $x$ in $\phi$.
\end{compactenum}
The rules of inference are three:%%%%%
\footnote{See the previous footnote
on G\"odel's additional rule of inference.
G\"odel apparently expressed the axiom of change of bound variable
together with the rule of change of free variable as one rule, 
stated simply as, 
`Individual variables (free or bound) may be replaced by others, 
so long as this does not cause overlapping of the scopes of variables
denoted by the same sign' \cite[p.\ 584]{Goedel-compl}.
Concerning all of his rules of inference, G\"odel notes, 
`Although Whitehead and Russell use these rules throughout their derivations, 
they do not formulate all of them explicitly.'}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{description}
\item[Detachment:]
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
From $\phi$ and $\phi\lto\psi$ may be inferred $\psi$.%%%%%
\footnote{Detachment is not G\"odel's name for this rule; 
he (or more precisely his translator) calls it the Inferential Schema.}
\item[Generalization:]
From $\theta$ may be inferred $\Forall x\theta$.
\item[Change of Free Variable:]
From $\phi$ may be inferred $\phi^x_y$, 
provided $y$ is substitutable for $x$ in $\phi$.
\end{description}

In the Rule of Change of Free Variable as stated above,
$y$ is \textbf{substituted for} $x$;
in the Rule of Generalization, $x$ is \textbf{generalized on.}
A \textbf{generalization} of a formula $\phi$ is a formula
is a sentence $\Forall{\vec x}\phi$ 
in which all free variables of $\phi$ are generalized on.
Then we can generalize the notion of validity 
by saying that an arbitrary formula is \textbf{valid} 
if some (and hence every) generalization
of it is true in every structure of its signature. 

\begin{theorem}[Soundness]
Every provable formula is valid.   
\end{theorem}

\begin{proof}
Induction.  
The axioms are valid, and the rules of inference preserve validity.
\end{proof}

We shall want to avoid writing down actual proofs,
being content to recognize that they must exist,
because of results like the following.

\begin{theorem}[Detachment]
  If $\phi$ and $\phi\lto\psi$ are provable,
then so is $\psi$.
\end{theorem}

\begin{proof}\sloppy
  If $\chi_0,\dots,\chi_{n-1},\phi$ and $\chi_n,\dots,\chi_{n+m-1},\phi\lto\psi$
are proofs, then so is
\begin{equation*}
  \chi_0,\dots,\chi_{n-1},\chi_n,\dots,\chi_{n+m-1},\phi,\phi\lto\psi,\psi.
\qedhere
\end{equation*}
\end{proof}

\section{Propositional logic}

A completeness theorem for \emph{propositional logic}
was already known before G\"odel's completeness theorem.%%%%%
\footnote{G\"odel's reference for this is Bernays from 1926; but
the theorem can be found in Post's 1921 article \cite{Post}.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\textbf{Propositional formulas} are, strictly, 
not formulas as defined in \S\ref{subsect:formulas} 
(page \pageref{subsect:formulas}) above; 
but they can be understood as formulas in which:
\begin{compactenum}[1)]
  \item
the place of atomic formulas is taken by \textbf{propositional
  variables;}
\item
no quantification symbol $\exists$ or $\forall$ is used.
\end{compactenum}
That is, for us, since we treat $\land$, $\lto$, and $\liff$ as abbreviations,
\begin{compactenum}[1)]
\item 
every propositional variable is a formula;
\item
if $F$ is a propositional formula, so is $\lnot F$;
\item
if $F$ and $G$ are propositional formulas, so is $(F\lor G)$.
\end{compactenum}
There are no \emph{individual} variables in a propositional formula, 
but only \emph{propositional} variables.  
A \emph{structure} for propositional logic 
assigns a truth-value to each of these propositional variables.  
Then a propositional formula is true or false in the structure, 
according to the relevant parts of the definition of truth of sentences 
(on page \pageref{truth}),
which we can express symbolically now as:
\begin{gather*}
\str A\models\lnot\sigma\iff \str A\nmodels\sigma,\\
\str A\models\sigma\lor\tau\iff \str A\models\sigma\Or\str A\models\tau.
\end{gather*}
We may treat the truth-value \emph{true} as $1$, and \emph{false} as $0$.  
Then a propositional formula $F$,
in an $n$-tuple $(P_0,\dots,P_{n-1})$ of propositional variables,
determines an $n$-ary operation $\hat F$ on $2^n$, 
where if $\vec e\in 2^n$, 
then $\hat F(\vec e)$ is the truth-value of $F$ 
in any propositional structure 
that assigns the value $e_i$ to $P_i$ when $i<n$.  
This operation $\hat F$ can be described completely in a \emph{truth-table.}  
%If the operation is identically $1$, then $F$ is a \textbf{propositional tautology.}  

\begin{theorem}[Propositional Completeness]
The first four axiom-schemes above (page \pageref{4axioms}), 
along with the inference-rule of Detachment,
constitute a (sound and) complete proof-system for propositional logic.  
\end{theorem}

We are not going to prove this,
since we already have an algorithm 
for determining whether a formula is a propositional validity: 
just write out its truth-table.%%%%%
\footnote{This is not a practical algorithm
for long formulas;
it may be more efficient to check a proposed formal proof of a formula
than to write out the truth table of the formula.
On the other hand, we have no algorithm for finding formal proofs.
Then again, the proof of the completeness theorem would supply an algorithm.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Let us for the moment refer to atomic formulas and generalizations
as \textbf{elementary formulas} \cite[p.~26]{MR1809685}.
In a formula, 
if every elementary subformula is replaced with a propositional variable, 
the result is a propositional formula.
Then we may refer to one formula $\phi$ as a
\textbf{tautological consequence} of a finite set $\Gamma$ of formulas
if, when all of these formulas are converted to propositional formulas,
$\phi$ becomes true in every structure in which
the formulas of $\Gamma$ become true.
A formula is a \textbf{tautology,} simply,
if it is a tautological consequence of the empty set of formulas.

\begin{theorem}[Tautology]
  If every formula in a finite set $\Gamma$ of formulas is a theorem,
and $\phi$ is a tautological consequence of $\Gamma$,
then $\phi$ is a theorem.
\end{theorem}

\begin{proof}
  Write $\Gamma$ as $\{\psi_0,\dots,\psi_{n-1}\}$.
Under the hypothesis, the formula
\begin{equation*}
  \psi_0\lto\dots\lto\psi_{n-1}\lto\phi
\end{equation*}
is a propositional validity,
so it is a theorem.
By the Detachment Theorem, $\phi$ must be a theorem.
\end{proof}


\begin{theorem}
  The formula
  \begin{equation*}
    \Forall x(\phi\lto\psi)\lto\Forall x\phi\lto\Forall x\psi
  \end{equation*}
is always provable.
\end{theorem}

\begin{proof}
  Suppose $y$ does not occur in $\phi$ or $\psi$.
Then the following formulas are provable:
\begin{align*}
&\begin{gathered}
  \Forall x(\phi\lto\psi)\lto\phi\lto\psi,\\
\Forall x\phi\lto\phi,\\
  \Forall x(\phi\lto\psi)\lto\Forall x\phi\lto\psi,\\
  \Forall x(\phi\lto\psi)\lto\Forall x\phi\lto\psi^x_y,\\
\Forall y(\Forall x(\phi\lto\psi)\lto\Forall x\phi\lto\psi^x_y),\\
\Forall x(\phi\lto\psi)\lto\Forall y(\Forall x\phi\lto\psi^x_y),\\
\Forall x(\phi\lto\psi)\lto\Forall x\phi\lto\Forall y\psi^x_y,\\
\Forall x(\phi\lto\psi)\lto\Forall x\phi\lto\Forall x\psi.
\end{gathered}&
&\begin{aligned}
    &\text{[Axiom \ref{ax:spec}]}\\
    &\text{[Axiom \ref{ax:spec}]}\\
    &\text{[Tautology Theorem]}\\
    &\text{[Change of Free Variable]}\\
    &\text{[Generalization]}\\
    &\text{[Axiom \ref{ax:a-or}]}\\
    &\text{[Axiom \ref{ax:a-or}]}\\
    &\text{[Change of Bound Var.]}\qedhere
  \end{aligned}
\end{align*}
\end{proof}

\begin{theorem}\label{thm:dist}
If $x_0$, \dots, $x_{n-1}$ are not free in $\theta$, then the formula
\begin{equation*}%\label{eqn:rule}
\Forall{x_0}\cdots\;\Forall{x_{n-1}}(\theta\lor\phi)
\lto\theta\lor\Forall{x_0}\cdots\;\Forall{x_{n-1}}\phi
\end{equation*}
is provable.  
\end{theorem}

\begin{theorem}\label{thm:AE}
If each variable $y_i$ is substitutable for $x_i$ in $\phi$, then the formulas
\begin{gather*}
  \Forall{x_0}\dots\;\Forall{x_{n-1}}\phi
\lto\phi^{x_0\cdots x_{n-1}}_{y_0\cdots y_{n-1}},\\
\phi^{x_0\cdots x_{n-1}}_{y_0\cdots y_{n-1}}
\lto\Exists{x_0}\dots\;\Exists{x_{n-1}}\phi
\end{gather*}
are provable.
\end{theorem}

\begin{proof}
  \begin{asparaenum}
    \item
The following are instances of Axiom \ref{ax:spec}:
\begin{gather*}
\Forall{x_0}\dots\;\Forall{x_{n-1}}\phi
\lto\Forall{x_1}\dots\;\Forall{x_{n-1}}\phi,\\
\Forall{x_1}\dots\;\Forall{x_{n-1}}\phi
\lto\Forall{x_2}\dots\;\Forall{x_{n-1}}\phi,\\
\makebox[3cm]{\dotfill},\\
\Forall{x_{n-2}}\Forall{x_{n-1}}\phi
\lto\Forall{x_{n-1}}\phi,\\
\Forall{x_0}\phi
\lto\phi.
\end{gather*}
Then $\Forall{x_0}\dots\;\Forall{x_{n-1}}\phi\lto\phi$ is provable
by the Tautology Theorem.
Since no $x_i$ is free in the subformula 
$\Forall{x_0}\dots\;\Forall{x_{n-1}}\phi$,
by the Rule of Change of Free Variable we can now prove
\begin{equation*}
  (\Forall{x_0}\dots\;\Forall{x_{n-1}}\phi\lto\phi)^{x_0\cdots x_{n-1}}_{y_0\cdots y_{n-1}},
\end{equation*}
which is 
$\Forall{x_0}\dots\;\Forall{x_{n-1}}\phi\lto\phi^{x_0\cdots x_{n-1}}_{y_0\cdots y_{n-1}}$,
as desired.
\item
From Axiom \ref{ax:spec}, using the tautology
$(\psi\lto\lnot\chi)\lto\chi\lto\lnot\psi$ in the form
\begin{equation*}
  (\Forall x\lnot\phi\lto\lnot\phi)\lto\phi\lto\Exists x\phi
\end{equation*}
(that is, 
$\Forall x\lnot\phi\lto\lnot\phi)\lto\phi\lto\lnot\Forall x\lnot\phi$),
we obtain the axiom
\begin{equation*}
  \phi\lto\Exists x\phi.
\end{equation*}
Now an argument like the previous one yields the claim.\qedhere
  \end{asparaenum}
\end{proof}



\section{Sequents}


If $\phi$ is a formula, 
then a formal proof \textbf{from} $\phi$ as a \textbf{hypothesis}
is a formal proof in the earlier sense, 
except
\begin{itemize}
\item 
$\phi$, like an axiom, may be introduced into the proof, but 
\item
\emph{no free variable of $\phi$ may be substituted for or generalized on.}
\end{itemize}
If $\psi$ is provable from $\phi$ in this sense, we may write\label{proves}
\begin{equation}\label{eqn:sequent}
\phi\proves\psi.
\end{equation}
If $\phi$ is provable, simply, then we may express this by
\begin{equation*}
  \proves\phi.
\end{equation*}
The restriction on the use of Generalization and Change of Variables
ensures that the following is true.

\begin{theorem}
If $\phi\proves\psi$, then the formula $\phi\lto\psi$ is valid.  
\end{theorem}

\begin{proof}
Induction on $\psi$.
\end{proof}

We may call an expression as in \eqref{eqn:sequent} a \textbf{sequent.}  
It will be useful to note the following,
so that we can work with sequents
rather than formal proofs themselves.

\begin{theorem}\label{thm:trans-seq}\mbox{}
  \begin{compactenum}
  \item 
If $\proves\phi$ and $\phi\proves\psi$, then $\proves\psi$.
  \item 
If $\chi\proves\phi$ and $\phi\proves\psi$, then $\chi\proves\psi$.
  \end{compactenum}
\end{theorem}

\begin{proof}
The first claim is easily obtained by concatenating two formal proofs.
Thus, if
\begin{equation*}
\theta_0,\dots,\theta_{n-1},\phi
\end{equation*}
is a formal proof of $\phi$, and
\begin{equation*}
\theta_n,\dots,\theta_{n+m-1},\psi
\end{equation*}
is a formal proof of $\psi$ from $\phi$,
then
\begin{equation*}
  \theta_0,\dots,\theta_{n-1},\theta_n,\dots,\theta_{n+m-1},\psi
\end{equation*}
is a formal proof of $\psi$.

For the second claim, 
a similar concatenation may not be a formal proof from $\chi$,
if $\chi$ has free variables that are not free in $\phi$.
For, in this case, a particular proof of $\psi$ from $\phi$
might have involved substitution for, or generalization on,
some of these variables.
But suppose
\begin{equation*}
\theta_0,\dots,\theta_{n-1},\phi
\end{equation*}
is a formal proof of $\phi$ from $\chi$, and
\begin{equation*}
\theta_n,\dots,\theta_{n+m-1},\psi
\end{equation*}
is a formal proof of $\psi$ from $\phi$,
and the free variables of $\chi$ that are not free in $\phi$ 
are $x_0,\dots,x_{n-1}$.
Let $y_0,\dots,y_{n-1}$ be distinct variables 
not appearing at all in any of the formulas in the two formal proofs above,
and then, if $k<m$, let
\begin{align*}
  \theta_{n+k}{}'&\text{ be }(\theta_{n+k})^{x_0\cdots x_{n-1}}_{y_0\cdots y_{n-1}},&
\psi_k&\text{ be }\psi^{x_0\cdots x_{n-1-k}}_{y_0\cdots y_{n-1-k}}.
\end{align*}
The sequence
\begin{equation*}
  \theta_n{}',\dots,\theta_{n+m-1}{}',\psi_0
\end{equation*}
is a formal proof of $\psi_0$ from $\phi$
in which none of the $x_i$ appear.
In particular, none of these variables is substituted for
or generalized on in the proof.
Therefore
\begin{equation*}
  \theta_0,\dots,\theta_{n-1},\theta_n{}',\dots,\theta_{n+m-1}{}',
\psi_0
\end{equation*}
is a formal proof of $\psi_0$ from $\chi$.
Finally
\begin{equation*}
  \theta_0,\dots,\theta_{n-1},\theta_n{}',\dots,\theta_{n+m-1}{}',
\psi_0,\dots,\psi_{m-1},\psi
\end{equation*}
is a formal proof of $\psi$ from $\chi$.
\end{proof}

\section{Completeness by ultraproducts}\label{sect:c-ultra}

Suppose $\sigma$ is an arbitrary sentence.  
We want to show that either $\sigma$ is provable, 
or else its negation has a model.%%%%%
\footnote{The ensuing argument is based mainly on that 
of Bell and Slomson \cite[Ch.\ 12, \S1]{MR0269486}.  
These writers cite J.N. Crossley 
for the suggestion of introducing ultraproducts 
to G\"odel's original argument.  
Church \cite[\S44]{MR18:631a} 
explicates G\"odel's original argument more faithfully.}  
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
In fact this model will be countable.
For now, we make several simplifying assumptions:
\begin{compactenum}
\item
For some positive integers $p$ and $q$, 
for some $p$-tuple $\vec x$ and $q$-tuple $\vec y$ of variables, 
all distinct from one another,
for some quantifier-free formula $\phi$, 
\begin{equation*}
\sigma\quad\text{ is }\quad
\Exists{\vec x}\Forall{\vec y}\phi.
\end{equation*}
\item
No operation symbols occur in $\phi$.
\item
The sign $=$ of equality does not occur in $\phi$.
\end{compactenum}
The justification of these assumptions does not involve ultraproducts, 
so it is relegated to \S\ref{sect:arb}, page~\pageref{sect:arb}.

We may assume that all variables
come from a countable set $V$,
and that there is a bijection
$k\mapsto v_k$
from $\upomega$ onto $V$.  
The power $V^p$ being also countable,
we may suppose we have a bijection
\begin{equation*}
  k\mapsto\vec x_k
\end{equation*}
from $\upomega$ onto $V^p$.
Then there is an injection
\begin{equation*}
  k\mapsto\vec y_k
\end{equation*}
from $\upomega$ into $V^q$
such that $\vec y_k$ has no entries in common with $\vec x_0\cdots\vec x_k$.
%where if $j\leq k$, then $\vec x_j$ and $\vec y_k$ have no entries in common.  
We now denote 
\begin{align*}
%\phi(\vec x_k,\vec y_k)&\quad\text{ by }\quad\phi_k,\\
\phi^{\vec x\vec y}_{\vec x_k\vec y_k}&\quad\text{ by }\quad\phi_k,\\
\phi_0\lor\dots\lor\phi_k&\quad\text{ by }\quad\theta_k,\\
\Forall{\vec x_k}\Forall{\vec y_k}\cdots\;\Forall{\vec x_0}\Forall{\vec y_0}
\theta_k&\quad\text{ by }\quad\tau_k.
\end{align*}
That is, $\tau_k$ is a generalization of $\theta_k$,
and $\theta_k$ itself is defined recursively in $k$, thus:
\begin{align*}
\theta_0&\text{ is }\phi_0,&
\theta_{k+1}&\text{ is }\theta_k\lor\phi_{k+1}.
\end{align*}

\begin{lemma}
In the notation above, for all $k$ in $\upomega$, the sentence
\begin{equation*}
\tau_k\lto\sigma
\end{equation*}
is provable.
\end{lemma}

\begin{proof}
We shall use induction.
First, the following are provable:
\begin{align*}
  \tau_0&\lto\theta_0,&&
\text{[Theorem \ref{thm:AE}]}\\
\tau_0&\lto\phi_0,
&&\text{[$\theta_0$ is $\phi_0$]}\\
\tau_0&\lto\phi,
&&\text{[Change of Free Variable]}\\
\tau_0&\lto\Exists{\vec y}\phi,
&&\text{[Theorem \ref{thm:AE}]}\\
\tau_0&\lto\Forall{\vec x}\Exists{\vec y}\phi,
&&\text{[Generalization]}\\
\tau_0&\lto\sigma.
&&\text{[$\sigma$ is $\Forall{\vec x}\Exists{\vec y}\phi$]}
\end{align*}
For the inductive step,
we note first that since no entry of $\vec y_{k+1}$ appears in $\theta_k$, 
we have the theorems
\begin{align*}
\tau_{k+1}
&\lto\Forall{\vec y_{k+1}}\theta_{k+1},
&&\text{[Theorem \ref{thm:AE}]}\\
\tau_{k+1}
&\lto\Forall{\vec y_{k+1}}(\theta_k\lor\phi_{k+1}),
&&\text{[$\theta_{k+1}$ is $\theta_k\lor\phi_{k+1}$]}\\
\tau_{k+1}
&\lto\theta_k\lor\Forall{\vec y_{k+1}}\phi_{k+1},
&&\text{[Theorem \ref{thm:dist}]}\\
\tau_{k+1}
&\lto\theta_k\lor\Exists{\vec x_{k+1}}\Forall{\vec y_{k+1}}\phi_{k+1},
&&\text{[Theorem \ref{thm:AE}]}\\
\tau_{k+1}
&\lto\theta_k\lor\Exists{\vec x}\Forall{\vec y}\phi,
&&\text{[Change of Bound Variable]}\\
\tau_{k+1}
&\lto\theta_k\lor\sigma,
&&\text{[$\sigma$ is $\Exists{\vec x}\Forall{\vec y}\phi$]}\\
\tau_{k+1}
&\lto\tau_k\lor\sigma.
&&\text{[Theorem \ref{thm:dist}]}
\end{align*}
Thus if $\tau_k\lto\sigma$ is a theorem, then so is $\tau_{k+1}\lto\sigma$.
This completes the induction.
\end{proof}

\begin{theorem}[Completeness]
In the notation above,
if $\sigma$ is not provable,
then $\lnot\sigma$ has a model.  
\end{theorem}

\begin{proof}
If some $\tau_k$ is provable,
then so is $\sigma$ itself, by the lemma,
and we are done.  
So suppose that no $\tau_k$ is provable.  
Then no $\theta_k$ is provable; so it
must not be a tautology.
Since $\theta_k$ is also quantifier-free, 
there must be a truth-assignment on the set of its atomic subformulas 
that makes $\theta_k$ false.  
We can extend this to a truth-assignment $F_k$ 
on the set of \emph{all} atomic formulas in variables from $V$ 
with predicates occurring in $\sigma$.  
Now, for every $k$ in $\upomega$, 
we can understand $V$ as the universe of a structure $\str A_k$
such that, for each $n$ in $\upomega$, 
for each $n$-ary predicate $R$ occurring in $\sigma$,
\begin{equation*}
R^{\str A_k}
=\{\vec u\in V^n\colon F_k(R\vec u)=1\}.
\end{equation*}
Here we rely on the assumption that none of the predicates $R$ is $=$.
We now have
\begin{equation*}
\str A_k\models R\vec u\iff F_k(R\vec u)=1.
\end{equation*}
Then by construction
\begin{equation*}
  \str A_k\models\lnot\theta_k.
\end{equation*}
If $k\leq\ell$, then,
since $\theta_k\lto\theta_{\ell}$
and hence $\lnot\theta_{\ell}\lto\lnot\theta_k$ are theorems, 
we have
\begin{equation*}
  \str A_{\ell}\models\lnot\theta_k.
\end{equation*}
Thus for all $k$ in $\upomega$,
\begin{equation}\label{eqn:finite}
  \{j\in\upomega\colon\str A_j\models\theta_k\}\included k.
\end{equation}
Now let $\str C$ be a non-principal ultraproduct of the structures $\str A_k$.
Since each of these structures has the same universe, namely $V$,
each $u$ in $V$ can be interpreted in $\str C$ 
as its image $(u\colon k\in\upomega)$ under the diagonal map.
Then for all $k$ in $\upomega$,
\begin{equation*}
  \str C\models\lnot\theta_k,
\end{equation*}
and so
\begin{equation*}
  \str C\models\lnot\phi_k.
\end{equation*}
Since we have no operation symbols in our signature, every subset of
$C$ is the universe of a substructure of $\str C$.  
Let $B$ be the image of $V$ under the diagonal map in $C$.  
Since $\phi$ is quantifier-free, 
and the interpretations of all of the variables are now in $B$, 
we now have
\begin{equation*}
  \str B\models\lnot\phi_k,
\end{equation*}
and so, treating $\vec x$ and $\vec y$ now as tuples of variables again, 
we have
\begin{equation*}
  \str B\models\Exists{\vec y}\lnot\phi^{\vec x}_{\vec x_k}.
\end{equation*}
Since every element of $B^p$ 
is the interpretation of some $\vec x_j$, we conclude
\begin{equation*}
  \str B\models\Forall{\vec x}\Exists{\vec y}\lnot\phi,
\end{equation*}
that is, $\sigma$ is false in $\str B$.
\end{proof}

\section{Completeness by K\"onig's Lemma}

In his proof of the Completeness Theorem,
G\"odel himself does not use an ultraproduct explicitly in his argument, 
but from the structures $\str A_k$, 
he can be understood to create the structure $\str B$ as follows.%%%%%
\footnote{I am guided by Church's version of G\"odel's argument here.  
See below.}  
%%%%%%%%%%%%%%%%%%%%%%%
Let $(\alpha_k\colon k\in\upomega)$ 
be a list of all of the atomic formulas 
appearing in the formulas $\phi_{\ell}$.  
The universe $B$ of $\str B$ 
will be the set $V$ of variables occurring in these formulas.
We define $\str B$ by determining in each case 
whether $\alpha_k$, considered as a sentence, is to be true in $\str B$.  
This determination can be made recursively as follows.  

For an arbitrary structure $\str A$
and sentence $\sigma$ of its signature,
the interpretation $\sigma^{\str A}$ of $\sigma$ in $\str A$
is, formally, a subset of $A^0$, namely the subset
\begin{equation*}
  \{x\in A^0\colon\str A\models\sigma\}.
\end{equation*}
But $A^0$ has a unique element, which is $\emptyset$, also called $0$.
Thus $A^0$ itself is $\{0\}$, which is $1$,
and $\pow{A^0}=\{0,1\}$, which is $2$.
So $\sigma^{\str A}$ is an element of $2$, and
\begin{equation*}
  \str A\models\sigma\iff\sigma^{\str A}=1.
\end{equation*}
Suppose for some $n$ in $\upomega$ 
an element $(e_k\colon k<n)$ of $2^n$ has been chosen such that the set
\begin{equation}\label{eqn:K-inf}
  \left\{i\in\upomega\colon\bigwedge_{k<n}\alpha_k{}^{\str A_i}=e_k\right\}
\end{equation}
is infinite.
This set is the union of the two sets of the form
\begin{equation}\label{eqn:Konig}
  \left\{i\in\upomega\colon\bigwedge_{k<n}\alpha_k{}^{\str A_i}=e_k
\And\alpha_n{}^{\str A_i}=e\right\},
\end{equation}
where $e\in2$.
Hence at least one of these sets is infinite.
If it is infinite when $e=0$, we let $e_n=0$.
Otherwise the set must be infinite when $e=1$, so we let $e_n=1$.
By recursion, we obtain an element $(e_n\colon n\in\upomega)$ of $2^{\upomega}$.
Now we define
\begin{equation*}
  \str B\models\alpha_n\iff e_n=1.
\end{equation*}
It follows by induction that, for each $n$ in $\upomega$,
\begin{equation}\label{eqn:inf}
\left|\left\{i\colon
\bigwedge_{k<n}\alpha_k{}^{\str A_i}=\alpha_k{}^{\str B}\right\}\right|=\upomega.
\end{equation}
The construction ensures $\str B\models\lnot\theta_j$ as before.
Indeed, suppose $\str B\models\theta$ for some formula $\theta$ 
(interpreted in $\str B$ as a sentence).  
Then the atomic subformulas of $\theta$ 
belong to a finite set $\{\alpha_i\colon i<k\}$, so
\begin{equation*}
\left\{i\colon\bigwedge_{k<n}\alpha_k{}^{\str A_i}=\alpha_k{}^{\str B}\right\}
\included\{i\in\upomega\colon\str A_i\models\theta\}.
\end{equation*}
In particular, by \eqref{eqn:inf}, 
the set $\{i\in\upomega\colon\str A_i\models\theta\}$ must be infinite.
However, as in \eqref{eqn:finite} we have also 
$\{i\in\upomega\colon\str A_i\models\theta_j\}\included j$,
and in particular the set $\{i\in\upomega\colon\str A_i\models\theta_j\}$ 
is finite.
Thus $\str B\nmodels\theta_j$.

There is some arbitrariness in our definition of $\str B$.  
If both of the sets of the form in \eqref{eqn:Konig} are infinite,
then $e_n$ could be either element of $2$;
we arbitrarily let it be $1$.
Alternatively, if we had a nonprincipal ultrafilter $D$ on $\upomega$, 
then we could just define
\begin{equation*}
  \str B\models\alpha_k\iff\{i\colon\str A_i\models\alpha_k\}\in D.
\end{equation*}
Thus we would return to the earlier ultraproduct construction.
An advantage of our alternative construction 
is that the Axiom of Choice is not required.

G\"odel himself is not explicit about how he obtains $\str B$.  
His editor van Heijenoort detects an allusion to K\"onig's Lemma.  
There are more than one theorem called by this name, 
but probably what is meant is the next theorem below 
\cite[Lemma II.5.7, p.\ 69]{MR85e:03003}.

A \textbf{tree}\label{tree} is a (partially) ordered set 
such that, for every $a$ in the set, 
the subset $\{x\colon x<a\}$ is well-ordered.  
The ordinal that is isomorphic to this set is then the \textbf{height} of $a$.  
An element of the underlying set of the tree is a \textbf{node} of the tree.
If the height of the node $a$ is $\beta$, then a \textbf{successor} of $a$ 
is a node $b$ with height $\beta+1$ such that $a<b$.  
A \textbf{branch} of the tree is a maximal linearly ordered set of nodes.   
The \textbf{height} of the tree is the supremum of the heights of its nodes.
The tree is an \textbf{$\upomega$-tree}
if its every element has finite height and finitely many successors.  
Then an $\upomega$-tree has height at most $\upomega$.

\begin{theorem}[K\"onig's Lemma]\label{thm:Koenig}
Every infinite $\upomega$-tree has an infinite branch.  
\end{theorem}

\begin{proof}
By the Axiom of Choice,
we may assume that the set of successors of every member of the tree 
is well-ordered.
We select an infinite branch recursively 
by first letting $a_0$ be a node at height $0$ 
such that $\{x\colon a_0<x\}$ is infinite; 
then, assuming $\{x\colon a_k<x\}$ is infinite, 
we let $a_{k+1}$ be the least successor of $a_k$ 
such that $\{x\colon a_{k+1}<x\}$ is infinite.
\end{proof}

This theorem applies to the present situation as follows.  
We start with $2^{<\upomega}$, that is, $\bigcup_{n\in\upomega}2^n$, 
ordered by inclusion, 
so that $\vec a\leq\vec b$ if and only if 
$\vec a$ is an initial segment of $\vec b$.  
In this way we obtain the \textbf{complete binary tree of height} $\upomega$.
See Figure \ref{fig:bin}.
\begin{figure}[ht]
\centering
\pstree[treemode=R]{\TR{$\emptyset$}}{
  \pstree{\TR{$(0)$}}{
    \pstree{\TR{$(0,0)$}}{
      \TR{$(0,0,0)$}
      \TR{$(0,0,1)$}
      }
    \pstree{\TR{$(0,1)$}}{
      \TR{$(0,1,0)$}
      \TR{$(0,1,1)$}
      }
      }
  \pstree{\TR{$(1)$}}{
    \pstree{\TR{$(1,0)$}}{
      \TR{$(1,0,0)$}
      \TR{$(1,0,1)$}
      }
    \pstree{\TR{$(1,1)$}}{
      \TR{$(1,1,0)$}
      \TR{$(1,1,1)$}
      }
      }
      }
\caption{The complete binary tree of height $\upomega$}\label{fig:bin}
\end{figure}
This has a sub-tree $T$ consisting of those $(e_0,\dots,e_{n-1})$ 
such that the set 
$\left\{i\in\upomega\colon\bigwedge_{k<n}\alpha_k{}^{\str A_i}=e_k\right\}$ 
in \eqref{eqn:K-inf} is infinite.
This sub-tree $T$ is infinite because, by induction, 
it has nodes at each finite height.  
Then K\"onig's Lemma applies,
giving us an infinite branch of $T$;
the union of this infinite branch 
is an element $(e_n\colon n\in\upomega)$ of $2^{\upomega}$
giving us $\str B$ as before.

The general form of K\"onig's Lemma uses the Axiom of Choice;
we do not need this here,
since the successors of every node $(e_0,\dots,e_{n-1})$ of $T$
are among $(e_0,\dots,e_{n-1},0)$ and $(e_0,\dots,e_{n-1},1)$,
and the former can be understood to precede the latter.

The present situation is simpler in another way too,
since every branch of $T$ is infinite.


\section{Arbitrary formulas}\label{sect:arb}

We have to justify the assumptions about $\sigma$ 
made at the beginning of \S\ref{sect:c-ultra}.


\subsection{Skolem normal form}

Recall from page~\pageref{quantifier} that a \emph{quantifier}
is an expression $\forall x$ or $\exists x$ in a formula.
These are \emph{universal} and \emph{existential} quantifiers, respectively.
(We currently understand $\exists x$ 
as an abbreviation of $\lnot\forall x\lnot$).
A formula is in \textbf{prenex normal form} 
if all of its quantifiers are at the front.

\begin{theorem}
For every formula $\phi$ 
there is a formula $\hat{\phi}$ in prenex normal form
such that each of $\phi$ and $\hat{\phi}$ is provable from the other.
\end{theorem}

\begin{proof}
Suppose $\phi$ and $\psi$ are formulas,
and $y$ is a variable not occurring freely in either of them,
but substitutable for $x$ in $\psi$.
Then each of the formulas
\begin{align*}
  \phi&\lor\Forall x\psi,&
\Forall y(\phi&\lor\psi^x_y)
\end{align*}
is provable from the other.  Indeed,
\begin{align*}
&\proves\Forall x\psi\lto\psi^x_y,&&\text{[Ax.]}\\
&\proves(\Forall x\psi\lto\psi^x_y)
        \lto\phi\lor\Forall x\psi\lto\phi\lor\psi^x_y,&&\text{[Taut.]}\\
&\proves\phi\lor\Forall x\psi\lto\phi\lor\psi^x_y,&&\text{[Det.]}\\
\phi\lor\Forall x\psi&\proves\phi\lor\psi^x_y,&&\text{[Det.]}\\
\phi\lor\Forall x\psi&\proves\Forall y(\phi\lor\psi^x_y),&&\text{[Gen.]}
\end{align*}
and conversely
\begin{align*}
&\proves\Forall y(\phi\lor\psi^x_y)\lto\phi\lor\Forall y\psi^x_y,&&\text{[Ax.]}\\
\Forall y(\phi\lor\psi^x_y)&\proves\phi\lor\Forall y\psi^x_y,&&\text{[Det.]}\\
\Forall y(\phi\lor\psi^x_y)&\proves\phi\lor\Forall x\psi.&&\text{[Ch.\ of Var.]}
\end{align*}
Also each of
\begin{align*}
  \phi&\land\Forall x\psi,&
\Forall y(\phi&\land\psi^x_y)
\end{align*}
is provable from the other;
for, the same proof that establishes
$\Forall y(\phi\lor\psi^x_y)\proves\phi\lor\Forall x\psi$ gives us,
\emph{mutatis mutandis,} 
$\Forall y(\phi\land\psi^x_y)\proves\phi\land\Forall x\psi$,
while
\begin{align*}
&\proves\Forall y(\phi\land\psi^x_y)\lto\phi\land\psi^x_y,&&\text{[Ax.]}\\
\Forall y(\phi\land\psi^x_y)&\proves\phi\land\psi^x_y,&&\text{[Det.]}\\
&\proves\phi\land\psi^x_y\lto\phi,&&\text{[Taut.]}\\
\Forall y(\phi\land\psi^x_y)&\proves\phi,&&\text{[Det.]}\\
&\proves\phi\land\psi^x_y\lto\psi^x_y,&&\text{[Taut.]}\\
\Forall y(\phi\land\psi^x_y)&\proves\psi^x_y,&&\text{[Det.]}\\
\Forall y(\phi\land\psi^x_y)&\proves\Forall y\psi^x_y,&&\text{[Gen.]}\\
\Forall y(\phi\land\psi^x_y)&\proves\Forall x\psi,&&\text{[Ch.\ of Var.]}\\
&\proves\phi\lto\Forall x\psi\lto(\phi\land\Forall x\psi),&&\text{[Det.]}\\
\Forall y(\phi\land\psi^x_y)&\proves\phi\land\Forall x\psi.&&\text{[Det.]}
\end{align*}
NOW WE NEED SOMETHING LIKE 
if $\phi\proves\psi$ then $\lnot\psi\proves\lnot\phi$, with appropriate restrictions.
\end{proof}

A \emph{sentence} is in \textbf{Skolem normal form}
if is in prenex normal form,
and moreover, no existential quantifier follows a universal quantifier.

\begin{theorem}
 For every formula $\phi$, 
there is a sentence $\sigma$ in Skolem normal form, 
possibly with new predicates, 
such that
\begin{itemize}
\item 
if $\sigma$ is valid, then so is $\phi$,
\item
if $\lnot\sigma$ has a model, 
then $\lnot\phi$ will be satisfied in that model 
(that is, it will define a nonempty subset of that model).
\end{itemize}
\end{theorem}

\begin{proof}
A sentence in prenex normal form can be written as
\begin{equation*}
\Exists{\vec x}\Forall y\mathsf Q\;\theta,
\end{equation*}
where $\mathsf Q$ is a string of quantifiers, 
and $\theta$ is quantifier-free.  
Introduce a new predicate $R$ and form the sentence
\begin{equation*}
\Exists{\vec x}(\Forall y(\mathsf Q\;\theta\lto R\vec xy)\lto\Forall yR\vec xy).
\end{equation*}
This has the desired properties.  CHECK!!!!!!!!!!!!!!!!
It is also equivalent to
\begin{gather*}
\Exists{\vec x}(\Exists y(\mathsf Q\;\theta\land\lnot R\vec xy)\lor\Forall yR\vec xy),\\
\Exists{\vec x}\Exists y((\mathsf Q\;\theta\land\lnot R\vec xy)\lor\Forall zR\vec xz),\\
\Exists{\vec x}\Exists y(\mathsf Q\;(\theta\land\lnot R\vec xy)\lor\Forall zR\vec xz),\\
\Exists{\vec x}\Exists y\mathsf Q\;((\theta\land\lnot R\vec xy)\lor\Forall zR\vec xz),\\
\Exists{\vec x}\Exists y\mathsf Q\;\forall z((\theta\land\lnot R\vec xy)\lor R\vec xz),
\end{gather*}
This last sentence is in prenex normal form, though perhaps not in Skolem normal form.  
Still, the number of universal quantifiers that precede existential quantifiers has decreased.  
So the process terminates in a sentence that must be in Skolem normal form. 
\end{proof}



\subsection{Operation symbols}

What we call relations, G\"odel calls functions; 
but he has no symbols for what we call operations.  
If we use such symbols, we can deal with them as follows.  
Suppose, for some $n$-ary operation symbol $F$, 
there is an atomic subformula $\alpha$ of $\sigma$ 
featuring a term $Ft_0\cdots t_{n-1}$.  
Introducing a new $(n+1)$-ary predicate $R_F$, 
we can replace the term $Ft_0\cdots t_{n-1}$ in $\alpha$ 
with a new variable $x$, obtaining an atomic formula $\alpha'$.  
We can then replace $\alpha$ in $\sigma$ with the formula
\begin{equation*}
\Exists x(\alpha'\land R_Ft_0\cdots t_{n-1}x),
\end{equation*}
obtaining the formula $\sigma'$.  
Then $\sigma$ is valid if and only if the formula
\begin{equation*}
\sigma'\land\Forall{\vec x}\Exists y\Forall z
\bigl(R\vec xy\land(R\vec xz\lto y=z)\bigr)
\end{equation*}
is valid.
Now we have to show that $\sigma$ is provable from this last formula.

\subsection{Equality}

Suppose no operation symbol occurs in $\sigma$, 
but the sign $=$ of equality does occur.  
We have to deal with the requirement 
that this sign is interpreted in every structure as equality itself 
(and not merely an equivalence relation).  
We introduce a new binary predicate $\equiv$, 
and we replace each occurrence of $=$ in $\sigma$ 
with this new predicate $\equiv$, obtaining a new sentence $\sigma'$.  
Now let $(R_0,\dots,R_m)$ be a list of all predicates 
(including $\equiv$) occurring in $\sigma'$, 
and let $\sigma''$ be the sentence
\begin{equation*}
\sigma'\land\Forall{\vec x}\Forall{\vec y}
\bigl(\vec x\equiv\vec y\lto
\bigwedge_{j\leq m}(R_j\vec x_j\lto R_j\vec y_j)\bigr).
\end{equation*}
(Here $\vec x_j$ and $\vec y_j$ are initial segments, of appropriate length, 
of $\vec x$ and $\vec y$ respectively; 
and $\vec x$ and $\vec y$ are long enough to make this possible.)  
Then $\sigma$ is valid if and only if $\sigma''$ is valid.
Also, if $\str A\models\sigma''$, 
then $\equiv^{\str A}$ is an equivalence relation on $A$, 
and the set of equivalence classes is the universe of a model of $\sigma$.
Now we have to show that $\sigma$ is provable from $\sigma''$.  


\chapter{Algebraic geometry}\label{ch:ag}

We shall assume the Axiom of Choice throughout this chapter.
Also, $K$ will be a field, 
and $L$ will be a field of which $K$ is a subfield, 
that is,
\begin{equation*}
  K\included L.
\end{equation*}
In short, $L/K$ will be a field-extension.  
For example, $K$ might be $\Q$, and then $L$ might be $\C$.  
For some $n$ in $\upomega$, 
we shall let $\vec X$ denote 
an $n$-tuple $(X^0,\dots,X^{n-1})$ of indeterminates, 
so that we can form the ring $K[\vec X]$ of polynomials
as on page \pageref{poly-ring}.
If $n=1$, we write this field as $K[X]$;
if $n=2$, as $K[X,Y]$.

\section{The spectrum of a polynomial ring}

Given a signature $\sig$, we have defined
\begin{compactitem}
\item 
the class $\Str$ of structures of $\sig$ (page \pageref{Mod}),
\item
the set $\Sn$ of sentences of $\sig$ (page \pageref{Sn}), and
\item
the relation $\models$ between them (page \pageref{models}).
\end{compactitem}
We shall now consider analogously
\begin{compactitem}
\item 
the set $L^n$ of $n$-tuples of elements of $L$,
\item
the set $K[\vec X]$ of polynomials over $K$, and
\item
the relation $\{(\vec x,f)\in L^n\times K[\vec X]\colon f(\vec x)=0\}$ 
between them.
\end{compactitem}
In particular,
we shall be interested in the Galois correspondence\label{ag-gal}
induced by this relation
as in Theorem~\ref{thm:polarities} (page \pageref{thm:polarities}).
We shall write the polarities constituting the Galois correspondence as
\begin{align*}
A&\mapsto\I A,&F&\mapsto\V F,
\end{align*}
respectively.
As in the case of model theory,
we may use variations of this notation, letting
\begin{align*}
  \I{\vec x}&=\{f\in K[\vec X]\colon f(\vec x)=0\},\\
\V f=\{\vec x\in L^n\colon f(\vec x)=0\},
\end{align*}
so that,
analogously to
\eqref{eqn:th-mod} on page \pageref{eqn:th-mod},
\begin{align*}
\I A&=\bigcap_{\vec x\in A}\I{\vec x},&
\V F&=\bigcap_{f\in F}\V f.
\end{align*}
The set $\V F$ is the \textbf{zero-locus} of $F$ in $L^n$:
see Figure~\ref{fig:y-x^2}.
\begin{figure}
\mbox{}\hfill
  \begin{pspicture}(-1.4,-0.5)(1.4,2)
\psline{->}(-1.4,0)(1.4,0)
\psline{->}(0,-0.5)(0,2)
    \psplot[linewidth=1.6pt]{-1.4}{1.4}{x x mul}
  \end{pspicture}
\hfill
  \begin{pspicture}(-1.4,-0.5)(1.4,2)
\psline{->}(-1.4,0)(1.4,0)
\psline{->}(0,-0.5)(0,2)
\psdots(0,0)(1,1)
\psset{linestyle=dotted}
    \psplot{-1.4}{1.4}{x x mul}
\psline(-0.5,-0.5)(1.4,1.4)
  \end{pspicture}
\hfill\mbox{}
\caption{The zero-loci of $Y-X^2$ and $\{Y-X^2,Y-X\}$ in $\R^2$}\label{fig:y-x^2}
\end{figure}
The function $A\mapsto\V A$ is the \textbf{zero-locus map.}  
A course in so-called analytic geometry 
is a study of zero-loci in $\R$, in case $n$ is $2$ or $3$, 
so that $K[\vec X]$ can be written as $\R[X,Y]$ or $\R[X,Y,Z]$.

The zero-loci of the various subsets of $K[\vec X]$ 
are also called \textbf{algebraic sets.}%%%%%
\footnote{More precisely, \emph{affine algebraic sets.}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
As the notation is supposed to recall, 
the definition of $\V A$ depends on $L$. 
We intend to overcome this dependence.  

There is an analogue of logical equivalence,
namely the relation
\begin{equation*}
  \{(f,g)\in K[\vec X]\times K[\vec X]\colon\V f=\V g\}.
\end{equation*}
We shall not be interested in this.
The quotient $\Sn\modsim$ is a Boolean algebra
(by Theorem \ref{thm:Lin}, page \pageref{thm:Lin})
and therefore a Boolean ring 
(by Theorem \ref{thm:B-alg-ring}, page \pageref{thm:B-alg-ring});
but $K[\vec X]$ is already a ring,
albeit not a Boolean ring.

The set $\I A$ is the \textbf{ideal of $A$ in} $K[\vec X]$.
this terminology is justified by the following,
which is a partial analogue 
of parts of Theorems \ref{thm:el-class-top} and \ref{thm:Lin-S}
(pages \pageref{thm:el-class-top} and \pageref{thm:Lin-S}):

\begin{theorem}\label{thm:Z}
\mbox{}
\begin{compactenum}
\item 
The zero-loci of subsets of $K[\vec X]$ compose a topology on $L^n$.
\item
  The subsets $\I{\vec x}$ of $K[\vec X]$ are prime ideals.
\item
  The subsets $\I A$ of $K[\vec X]$ are radical ideals.
\end{compactenum}
\end{theorem}

\begin{proof}
  \begin{asparaenum}
    \item
  Because
\begin{align*}
  \emptyset&=\V 1,&\V f\cup\V g&=\V{fg},
\end{align*}
the sets $\V f$ compose a basis of a topology on $L^n$.
\item
The additional observations
\begin{align*}
  \V0&=L^n,&\V f\cap\V g\included\V{f-g}
\end{align*}
show that each $\I{\vec x}$ is a prime ideal.
Indeed, we can translate the three equations and one inclusion as
\begin{align*}
&\begin{gathered}
    1\not\in\I{\vec x},\\
0\in\I{\vec x},
  \end{gathered}&
&\begin{gathered}
 f\in\I{\vec x}\Or g\in\I{\vec x}\iff fg\in\I{\vec x},\\
f\in\I{\vec x}\And g\in\I{\vec x}\implies f-g\in\I{\vec x}.  
  \end{gathered}
\end{align*}
\item
Prime ideals are radical, 
and the intersection of radical ideals is radical 
by Theorem \ref{thm:rad-M} (page \pageref{thm:rad-M}).\qedhere
  \end{asparaenum}
\end{proof}

Thus in particular the monoid $(K[\vec X],1,{}\cdot{})$
is analogous with the algebra $(\Sn,\bot,\lor)$
and hence with the monoid $(\Sn,\bot,\lor)\modsim$.
In developing model theory,
in place of the relation $\models$,
we could have used $\nmodels$
(thus replacing pairs $(\str A,\sigma)$ with $(\str A,\lnot\sigma)$);
then the monoid $(K[\vec X],1,\cdot)$
would be analogous to the monoid $(\Sn,\top,\land)\modsim$,
and ideals $\I A$ of $K[\vec X]$ as a ring
would be analogous to \emph{ideals} of $\Lin{\sig}$ as a Boolean algebra,
rather than to filters as they are now.

The topology given by the theorem is the \textbf{Zariski topology,}
or more precisely the $K$-Zariski topology.
The closed subsets of $K[\vec X]$, that is, the ideals of subsets of $L^n$,
are radical ideals of $K[\vec X]$.
But we do not know whether \emph{every} radical ideal is closed.
Equivalently 
(since every radical ideal is an intersection of prime ideals
by Theorem \ref{thm:rad}, page \pageref{thm:rad}),
we do not know whether every prime ideal is closed.

Recall that,
as defined on page \pageref{spectrum},
the \emph{spectrum} $\spec$ of a commutative ring $R$
is the set of prime ideals of $R$,
and (by Theorem \ref{thm:spec-top}, page \pageref{thm:spec-top})
it is a compact Kolmogorov space
with basis consisting of the sets 
$\{\mathfrak p\in\spec\colon a\in\mathfrak p\}$,
denoted by $\var a$ (without a subscript), where $a\in R$.
We now have the following partial analogue of part of Theorem \ref{thm:Kol-quo} 
(page \pageref{thm:Kol-quo}):

\begin{theorem}\label{thm:spec}
  The map $\vec x\mapsto\I{\vec x}$ from $L^n$ to $\spec[{K[\vec X]}]$
is continuous,
and the image of $L^n$ under this map
is a Kolmogorov quotient of $L^n$ with respect to the map.
\end{theorem}

\begin{proof}
We use Theorem \ref{thm:Kol-cond} (page \pageref{thm:Kol-cond}).
Let us refer to the map $\vec x\mapsto\I{\vec x}$ as $\Phi$.
If $f\in K[\vec X]$, then
\begin{align*}
  \Phi\inv[\var f]
&=\{\vec x\in L^n\colon\I{\vec x}\in\var f\}\\
&=\{\vec x\in L^n\colon f\in\I{\vec x}\}\\
&=\{\vec x\in L^n\colon f(\vec x)=0\}\\
&=\V f;
\end{align*}
thus $\Phi$ is continuous.
Also
\begin{align*}
  \Phi[\V f]
&=\{\I{\vec x}\colon x\in\V f\}\\
&=\{\I{\vec x}\colon f(x)=0\}\\
&=\{\I{\vec x}\colon x\in L^n\And f\in\I{\vec x}\}\\
&=\Phi[L^n]\cap\{\mathfrak p\in\spec[{K[\vec X]}]\colon f\in\mathfrak p\}\\
&=\Phi[L^n]\cap\var f;
\end{align*}
so $\Phi$ is closed onto its image.
Finally,
$\vec x$ and $\vec y$ in $L^n$ are topologically indistinguishable 
if and only if $\Phi(\vec x)=\Phi(\vec y)$.
\end{proof}

The situation is as in Figure \ref{fig:spec},
a collapsed analogue of Figure \ref{fig:sto} (page \pageref{fig:sto}).
\begin{figure}
  \begin{equation*}
    \xymatrix@!0@=2.7cm{
L^n\ar[d]_{\vec x\mapsto\I{\vec x}}
&K[\vec x]\ar@{<~>}[l]_{f(\vec x)=0}\\
\spec[{K[\vec X]}]
\ar@{<~>}[ur]_{\ni}&
}
  \end{equation*}
  \caption{The spectrum of a ring of polynomials}\label{fig:spec}
  
\end{figure}
The function $\vec x\mapsto\I{\vec x}$ injective on $K^n$, 
since if $\vec a\in K^n$ then 
\begin{equation*}
\I{\vec a}=(X^0-a^0,\dots,X^{n-1}-a^{n-1}).
\end{equation*}
The map is not generally injective:\label{pi} if $n=2$, $K=\Q$, and $L$ is
$\R$ or $\C$, then 
\begin{equation*}
\I{(\uppi,\uppi)}=(X-Y)=\I{(\mathrm e,\mathrm e)}.
\end{equation*}
The map is not generally surjective either:
If $L=\alg{\Q}$, then $(X-Y)$ is not in its range,
although $\I{\{(x,x)\colon x\in\alg{\Q}\}}=(X-Y)$.

\begin{theorem}
  If $\alg{K[\vec X]}$ embeds over $K$ in $L$,
then the map $\vec x\mapsto\I{\vec x}$ on $L^n$
is surjective onto $\spec[{K[\vec X]}]$.
\end{theorem}

\begin{proof}
Suppose $\mathfrak p\in\spec[{K[\vec X]}]$.
If $K[\vec X]/\mathfrak p\included L$,
and $\vec x$ is $(X^k+\mathfrak p\colon k<n)$,
then
\begin{equation*}
  \I{\vec x}=\mathfrak p.
\end{equation*}
Then the same is true if $K[\vec X]/\mathfrak p$ embeds over $K$ in $L$,
and $\vec x$ is the image in $L$ of $(X^k+\mathfrak p\colon k<n)$.
Since $K[\vec X]/\mathfrak p$ is an integral domain
of transcendence degree no greater than $n$ over $K$,
it embeds over $K$ in $\alg{K[\vec X]}$.
\end{proof}

Thus we have an analogue 
of the Compactness Theorem (page \pageref{thm:comp-alg}),
the spectrum of a polynomial ring
being analogous to the Stone space of a Lindenbaum algebra.


\section{Hilbert Basis Theorem}

By Theorem~\ref{thm:Z}, every zero-locus is the zero-locus of a radical ideal:
\begin{equation*}
\V A=\V{(A)}=\V{\surd(A)}.
\end{equation*}

\begin{theorem}
If $\mathfrak a$ and $\mathfrak b$ are two ideals of $K[\vec X]$, then
\begin{equation}\label{eqn:Vab}
  \V{\mathfrak a}\cup\V{\mathfrak b}=\V{\mathfrak a\cap\mathfrak b},
\end{equation}
\end{theorem}

\begin{proof}
Easily 
$\V{\mathfrak a}\cup\V{\mathfrak b}\included\V{\mathfrak a\cap\mathfrak b}$.
The reverse inclusion holds because
\begin{equation*}
  \surd(\mathfrak a\cap\mathfrak b)
=\surd(\{fg\colon f\in\mathfrak a\And g\in\mathfrak b\}).\qedhere
\end{equation*}
\end{proof}

The union of the zero-loci of an \emph{arbitrary} collection of ideals
need not be the zero-locus of the intersection of the ideals.  
For example, if $K=\Q$ (and $L$ is some larger field) and
\begin{equation*}
\mathfrak a_k=\left(\prod_{i=1}^k(X-i)\right)=\bigl((X-1)\dotsm(X-k)\bigr),
\end{equation*}
then $\V{\mathfrak a_k}=\{1,\dots,k\}$, 
but $\bigcap_{k\in\N}\mathfrak a_k=\{0\}$.  Thus
\begin{equation*}
\bigcup_{k\in\N}\V{\mathfrak a_k}=\N\pincluded L=\V{\{0\}}
=\Zlop_L\left(\bigcap_{k\in\N}\mathfrak a_k\right).
\end{equation*}

\begin{theorem}[Hilbert Basis Theorem]\label{thm:H}
For every $n$ in $\upomega$, 
by the Axiom of Choice\ac,
every ideal of the polynomial ring $K[X^0,\dots,X^{n-1}]$ is finitely generated.
\end{theorem}

\begin{proof}
The claim implies, and is therefore equivalent to, 
an apparently stronger claim, 
namely that every ideal $(A)$ of $K[X^0,\dots,X^{n-1}]$ 
is $(A_0)$ for some finite subset $A_0$ of $A$.  
For, if $(A)=(f_0,\dots,f_{m-1})$, 
then each $f_k$ is in $(A^{(k)})$ for some finite subset $A^{(k)}$ of $A$; 
and then we can let $A_0=\bigcup_{k<m}A^{(k)}$.

The claim as also equivalent to the claim that every sequence $(\mathfrak a_k\colon k\in\upomega)$ of ideals of $K[X^0,\dots,X^{n-1}]$ such that
\begin{equation*}
\mathfrak a_0\included\mathfrak a_1\included\mathfrak a_2\included\dotsb
\end{equation*}
---that is, every increasing chain of ideals (indexed by $\upomega$)---is eventually constant.  
For, the union of such a chain is an ideal $\mathfrak b$, and if this ideal is finitely generated, then it has a generating set whose elements all lie in some $\mathfrak a_{\ell}$, and then this ideal is $\mathfrak b$.  
Conversely (or inversely), if $\mathfrak a$ were not finitely generated, then for all subsets $\{f_k\colon k<\ell\}$ of $\mathfrak a$ we could find $f_{\ell}$ in $\mathfrak a\setminus(f_k\colon k<\ell)$; thus we could form a strictly increasing chain $((f_k\colon k<\ell)\colon\ell\in\upomega)$.

We now have also a fourth form of our claim: 
every \emph{countably} generated ideal of $K[X^0,\dots,X^{n-1}]$ 
is finitely generated.
We turn to proving the claim, in any convenient form.

The claim is trivially true when $n=0$, 
since a field has only two ideals: 
the trivial ideal and the improper ideal $(1)$.

The claim is still easy when $n=1$, 
because $K[X]$ is a \textbf{Euclidean domain.}  
That is, if $f$ and $g$ are in $K[X]$
and are not both $0$,
we can use the Euclidean algorithm (as on page \pageref{Euc-alg}) 
to find their greatest common divisor---say $h$; and then $(f,g)=(h)$.  
Hence if $\mathfrak a=(f_k\colon k\in\upomega)$, 
then for each $k$ in $\upomega$ we can find $g_k$ so that
\begin{equation*}
(f_0,\dots,f_k)=(g_k).
\end{equation*}
In particular, $g_{k+1}$ divides $g_k$.  
Then $\min\{\deg(g_k)\colon k\in\upomega\}=\deg(g_{\ell})$ for some $\ell$, and consequently $\mathfrak a=(g_{\ell})$.

When $n\geq2$, we have not got the Euclidean algorithm; but we can come close enough if we use induction.  
Suppose then that the claim is true when $n=m$.  
Let $\mathfrak a$ be an ideal of $K[X^0,\dots,X^m]$.  
We shall form a sequence $(f_0,f_1,\dots)$ 
of elements of $\mathfrak a$ by recursion.  
Given $(f_k\colon k<\ell)$, 
and using the Axiom of Choice\ac,
we let $f_{\ell}$, if it exists, 
be an element of $\mathfrak a\setminus(f_k\colon k<\ell)$ of minimal degree 
as a polynomial in $X^m$ over $K[X^0,\dots,X^{m-1}]$.  
Then these degrees form an increasing sequence:
\begin{equation*}
\deg_{X^m}(f_0)\leq\deg_{X^m}(f_1)\leq\deg_{X^m}(f_2)\leq\cdots
\end{equation*}
Let $g_k$ be the leading coefficient of $f_k$ 
(as a polynomial in $X^m$ over $K[X^0,\dots,X^{m-1}]$; 
so $g_k\in K[X^0,\dots,X^{m-1}]$).  
By inductive hypothesis, for some $\ell$,
\begin{equation*}
(g_k\colon k\in\upomega)=(g_k\colon k<\ell).
\end{equation*}
Then in particular
$g_{\ell}\in(g_k\colon k<\ell)$, 
so by Theorem \ref{thm:(A)} (page \pageref{thm:(A)}),
for some $b^k$ in $K[X^0,\dots,X^{m-1}]$,
\begin{equation*}
  g_{\ell}=\sum_{k<\ell}b^k\cdot g_k.
\end{equation*}
Now let
\begin{equation*}
  h=\sum_{k<\ell}b^k\cdot f_k\cdot(X^m)^{r(k)},
\end{equation*}
where $r(k)={\deg_{X^m}(f_{\ell})-\deg_{X^m}(f_k)}$.
Then $h\in(f_k\colon k<\ell)$ and,
as a polynomial in $X^m$ over $K[X^0,\dots,X^{m-1}]$,
has the leading coefficient and degree of $f_{\ell}$.
But then $f_{\ell}-h$ has lower degree 
and belongs to $\mathfrak a\setminus(f_k\colon k<\ell)$; 
that is, $f_{\ell}$ did not have minimal degree.  
Thus there \emph{is} no $f_{\ell}$; that is, $\mathfrak a=(f_k\colon k<\ell)$.
\end{proof}

A \emph{singly} generated ideal is called \textbf{principal.}
Then part of our proof of the theorem gives the following:

\begin{porism}
Every ideal of $K[X]$ is principal.
\end{porism}

Hence, although in the example above $\N$ is the union of zero-loci, it cannot itself be a zero-locus; for, every zero-locus of polynomials in one variable is the zero-locus of a single polynomial, so it is either the whole field $L$ or a finite subset of this.

The Hilbert Basis Theorem itself has the following:

\begin{corollary}
Every decreasing chain of closed subsets of $L^n$ is eventually constant.
In particular, the Zariski topology is compact.
\end{corollary}

The corollary would imply the theorem,
if we knew that that $\mathfrak a\pincluded\mathfrak b$ implied
$\V{\mathfrak a}\pincludes\V{\mathfrak b}$,
at least when $\mathfrak a$ and $\mathfrak b$ were radical ideals.  
However, this implication can fail.  
For example, when $L=\R$,\label{LR} 
then $(X^2+1)$ is a radical ideal
whose zero-locus is the same as the zero-locus of $(1)$, namely
the empty set. 

\section{Specialization}

We denote the fraction-field of $K[\vec X]$ by
\begin{equation*}
K(\vec X);
\end{equation*}
it is the \textbf{field of rational functions} in $\vec X$ over $K$.  

Suppose now $\vec a\in L^n$.  
Then there is a homomorphism $f\mapsto f(\vec a)$ from $K[\vec X]$ to $L$.  
(We could write the homomorphism also as $X^i\mapsto a^i$.)
The range of this homomorphism is denoted by
\begin{equation*}
K[\vec a],
\end{equation*}
and the fraction-field of this ring is denoted by
\begin{equation*}
K(\vec a);
\end{equation*}
we may consider this field as a subfield of $L$.  
Then
$K[\vec a]$ is the smallest sub-\emph{ring} of $L$ 
that includes $K\cup\{a_0,\dots,a_{n-1}\}$,
and
$K(\vec a)$ is the smallest sub\emph{field} of $L$ 
that includes $K\cup\{a_0,\dots,a_{n-1}\}$.

Let $\mathfrak p$ be the kernel of the homomorphism $f\mapsto f(\vec a)$ 
from $K[\vec X]$ to $L$.
Then
\begin{equation*}
K[\vec a]\cong K[\vec X]/\mathfrak p.
\end{equation*}
Also, $f\mapsto f(\vec a)$ is well-defined 
on the sub-ring $K[\vec X]_{\mathfrak p}$ of $K(\vec X)$,
but not on the complement.
This complement is empty, if $\mathfrak p=(0)$.
If $\mathfrak p\neq(0)$,
then $\vec a$ is said to be \textbf{algebraically dependent} over $K$, 
or simply \textbf{algebraic} over $K$ in case $n=1$.

\begin{theorem}\label{thm:K[a]}
If $a$ is algebraic over $K$, then
\begin{equation*}
K[a]=K(a).
\end{equation*}
Thus nontrivial prime ideals of $K[X]$ are maximal.
\end{theorem}

\begin{proof}
If $a\in K$, then $K[a]=K=K(a)$.
If $a\notin K$, but is algebraic over $K$,
then $b_0+b_1\cdot a+\dots+b_n\cdot a^m=0$ for some $b_i$ in $K$,
where $b_0\neq0$ (and $m>0$).  Then
\begin{equation*}
\frac1a
=-\left(\frac{b_1}{b_0}+\frac{b_2}{b_0}\cdot a+\dots
+\frac{b_m}{b_0}\cdot a^{m-1}\right).\qedhere
\end{equation*}
\end{proof}

Easily, $K[X]$ is not a von Neumann regular ring.
However, being an integral domain, it is reduced.
It is not a counterexample to Theorem \ref{thm:reg-eq} 
(page \pageref{thm:reg-eq}), 
because the prime ideal $(0)$ is not maximal.

The nontrivial prime ideals of $K[\vec X]$
are not generally maximal.  
For example $K[X,Y]/(X-Y)\cong K[X]$, 
which is an integral domain that is not a field; 
so $(X-Y)$ is a non-maximal prime ideal of $K[X,Y]$.

The field $K$ is \textbf{algebraically closed}\label{alg}
if every element of a larger field
that is algebraic over $K$
is already in $K$.
(The notion was used in Theorem \ref{thm:ACF_0}, page \pageref{thm:ACF_0}.)
An \textbf{algebraic closure} of $K$
is an algebraically closed extension of $K$
that has no proper algebraically closed sub-extension.

\begin{theorem}%\label{thm:alg}
  By the Axiom of Choice\ac,
every field $K$ has an algebraic closure.
All algebraic closures of $K$ are isomorphic over $K$.
\end{theorem}

We may therefore refer to \emph{the} algebraic closure of $K$,
denoting it by
\begin{equation*}
  \alg K.
\end{equation*}


\section{Hilbert Nullstellensatz}\label{sect:Gal}

The closed subsets of $K[\vec X]$
with respect to the Galois correspondence between $\pow{L^n}$
and $\pow{K[\vec X]}$ defined on page \pageref{ag-gal}---%
let us refer to these closed subsets more precisely as \textbf{$L$-closed,}
because we are going to consider what happens when we change $L$.
Again, by Theorem \ref{thm:Z} (page \pageref{thm:Z}),
the $L$-closed subsets of $K[\vec X]$ are radical ideals,
and so they have the form of $\I{\V{\mathfrak a}}$ 
for some radical ideal $\mathfrak a$ of $K[\vec X]$; and then
\begin{equation}\label{eqn:inclusion}
\mathfrak a\included\I{\V{\mathfrak a}}.
\end{equation}
This is an equation if and only if $\mathfrak a$ is $L$-closed.
We noted in effect (on page \pageref{LR}) 
that if $L=\R$ (and $K$ is an arbitrary subfield of this), 
then the radical ideal $(X^2+1)$ is not $L$-closed:
\begin{equation*}
(X^2+1)\pincluded(1)=\I{\V[\R]{(X^2+1)}}.
\end{equation*}
However, as $L$ grows larger, so does $\V{\mathfrak a}$; but then
$\I{\V{\mathfrak a}}$ becomes smaller.  
In fact 
\begin{equation*}
(X^2+1)=\I{\V[\C]{(X^2+1)}}.
\end{equation*}
We now are faced with the following:
\begin{question}
For every radical ideal $\mathfrak a$ of $K[\vec X]$, is there
an extension $L$ of $K$ large enough that 
\begin{equation*}
\mathfrak a=\I{\V{\mathfrak a}}?
\end{equation*}
\end{question}

\begin{question}
Is there an extension $L$ of $K$ large enough that for all ideals
$\mathfrak a$ of $K[\vec X]$ and all extensions $M$ of $K$,
\begin{equation*}
\I{\V{\mathfrak a}}\included\I{\V[M]{\mathfrak a}}?
\end{equation*}
\end{question}

Note well that $\mathfrak a$ and $L$ 
are quantified in different orders in the two questions, 
as $\Forall{\mathfrak a}\Exists L$ 
and $\Exists L\Forall{\mathfrak a}$ respectively.
But the conclusions are different.
So it is not immediate
that an answer to one question yields the answer to the other question.
However, 
if the answer to Question 1 is indeed yes,
then so is the answer to Question 2,
if the different fields $L$ 
corresponding to the different ideals $\mathfrak a$ 
are all included in one large field.
They \emph{are} so included,
since the class of fields has the \textbf{joint embedding property:}  
If $f_0$ embeds $K$ in $L_0$, 
and $f_1$ embeds $K$ in $L_1$, 
then there is a field $M$, 
and there are embeddings $g_i$ of the $L_i$ (respectively) in $M$, 
such that $g_0\circ f_0=g_1\circ f_1$.  
See Figure \ref{fig:jep}.
\begin{figure}[ht]
\begin{equation*}
\xymatrix@!{
              &M                      &\\
L_0\ar@{-}[ur]^{g_0}&                       &L_1\ar@{-}[ul]_{g_1}\\
              &K\ar@{-}[ul]^{f_0}\ar@{-}[ur]_{f_1}&
              }
\end{equation*}  
  \caption{Joint embedding property of fields}\label{fig:jep}
\end{figure}

By contrast, even if Question 2 has a positive answer, 
it is not at all clear that the answer to Question 1 must be positive.  

We settle Question 1 first in a special case. 

\begin{lemma}
For all \emph{maximal} ideals $\mathfrak m$ of $K[\vec X]$, for all
extensions $L$ of $K$ in which $K[\vec X]/\mathfrak m$ embeds over $K$,
\begin{equation*}
\mathfrak m=\I{\V{\mathfrak m}}.
\end{equation*}
\end{lemma}

\begin{proof}
As formulated here, the lemma almost proves itself.  
We just have to show $\I{\V{\mathfrak m}}$ is a proper ideal.  
But the image of $\vec X$ in $K[\vec X]/\mathfrak m$ is in
the zero-locus of $\mathfrak m$.  
In particular, if $L$ includes this field,
then $\V{\mathfrak m}$ is not empty, 
so $\I{\V{\mathfrak m}}$ cannot be all of $K[\vec X]$.
\end{proof}

Since $K[\vec X]/\mathfrak m$ is a field by Theorem \ref{thm:max-field} 
(page \pageref{thm:max-field}),
one can show that this field is algebraic over $K$
(as in \cite[Ch.~IX, Cor.~1.2, p.~379]{Lang-alg});
but we shall not need this.
The lemma yields another another special case of the desired general result:

\begin{theorem}\label{thm:weak-weak-Null}
If $\alg{K[\vec X]}\included L$, 
then by the Axiom of Choice\ac,
for all ideals $\mathfrak a$ of $K[\vec X]$ 
such that $\I{\V{\mathfrak a}}$ is the improper ideal,
\begin{equation*}
\mathfrak a=\I{\V{\mathfrak a}}.
\end{equation*}
\end{theorem}

\begin{proof}
The claim is
\begin{equation*}
\I{\V{\mathfrak a}}=(1)\implies\mathfrak a=(1).
\end{equation*}
We prove the contrapositive.  
If $\mathfrak a$ is a proper ideal of $K[\vec X]$, 
then by the Maximal Ideal Theorem (\pageref{mith})\ac, 
it is included in some maximal ideal $\mathfrak m$.  
The field $K[\vec X]/\mathfrak m$ 
can be understood as an algebraic extension of $K(X^i\colon i\in I)$ 
for some subset $I$ of $n$, so it embeds in $\alg{K(\vec X)}$.
By the lemma then, 
since $\I{\V{\mathfrak m}}$ is a proper ideal, so is $\I{\V{\mathfrak a}}$.
\end{proof}

Note that if $\I{\V{\mathfrak a}}\neq(1)$, then $\V{\mathfrak a}\neq\emptyset$.  
Thus every proper ideal has non-empty zero-locus in a large-enough field.  \emph{Nullstellensatz} means zero-locus theorem:

\begin{theorem}[Nullstellensatz]\label{thm:N}
If $\alg{K[\vec X,Y]}\included L$, 
then for all radical ideals $\mathfrak a$ of $K[\vec X]$,
\begin{equation*}
\mathfrak a=\I{\V{\mathfrak a}}.
\end{equation*}
\end{theorem}

\begin{proof}
Say $f\in\I{\V{\mathfrak a}}$.  
If $\vec x\in\V{\mathfrak a}$,
then $f(\vec x)=0$.  
This shows $\V{\mathfrak
  a\cup\{f-1\}}=\emptyset$, so 
\begin{equation*}
\I{\V{\mathfrak a\cup\{f-1\}}}=(1).
\end{equation*}
By the last theorem, $\mathfrak a\cup\{f-1\}$ too must generate the improper ideal of
$K[\vec X]$.  
We want to be able to conclude $f\in\mathfrak a$.  
To do so, we modify the argument so far.  
We have $f\cdot
Y\in\I{\V{\mathfrak a}}$, if we consider $\mathfrak a$ now as a subset
of $K[\vec X,Y]$.  
As before, $\mathfrak a\cup\{f\cdot Y-1\}$
must generate
the improper ideal of $K[\vec X,Y]$.  
Now, by itself, $\mathfrak a$
generates the ideal of $K[\vec X,Y]$ whose elements are polynomials in
$Y$ with coefficients from $\mathfrak a$.  
Hence there is some such
polynomial $g$, and there is some $h$ in $K[\vec X,Y]$, such that 
\begin{equation*}
g+h\cdot(f\cdot Y-1)=1.
\end{equation*}
Substituting $1/f$ for $Y$, we get $g(1/f)=1$; that is,
\begin{gather*}
g_0+g_1\cdot\frac1f+\dots g_m\cdot\frac1{f^m}=1
\end{gather*}
for some $g_i$ in $\mathfrak a$, and hence
\begin{gather*}
g_0\cdot f^m+g_1\cdot f^{m-1}+\dots+g_m=f^m.
\end{gather*}
This means $f^m\in\mathfrak a$.  
Assuming $\mathfrak a$ is radical, we
have $f\in\mathfrak a$.  
Thus $\I{\V{\mathfrak a}}\included\mathfrak
a$ and therefore $\I{\V{\mathfrak a}}=\mathfrak a$. 
\end{proof}

We have now settled both Questions 1 and 2.  
This suggests that
understanding algebraic sets can somehow be reduced to understanding
radical ideals of $K[\vec X]$.  
Indeed, there is \emph{some} extension $L$ of $K$ large enough that we have a Galois correspondence between the $K$-closed subsets of $L^n$ and the radical ideals of $K[\vec X]$.  
It is not particularly important for what follows that this field $L$ can be chosen as $\alg K$.  
Nonetheless, it is true:
Theorem \ref{thm:weak-weak-Null} holds,
merely under the hypothesis $\alg K\included L$.
 
\begin{theorem}[Hilbert's Nullstellensatz, weak form]\label{thm:HN}
\sloppy
All proper ideals of $K[\vec X]$ 
have non-empty zero-loci in all extensions of $\alg K$.
\end{theorem}

\begin{proof}
In the lemma, by the Hilbert Basis Theorem, $\mathfrak m$ has the form $(f_0,\dots,f_\ell)$ for some $f_i$ in $K[\vec X]$.  
Thus the formula
\begin{equation*}
f_0=0\land\dots\land f_{\ell}=0
\end{equation*}
has a solution in $K[\vec X]/\mathfrak m$ and \emph{a fortiori} in $\alg{(K[\vec X]/\mathfrak m)}$.  
The latter field is an \emph{elementary} extension of $\alg K$, by the model-completeness of the theory of algebraically closed fields (Theorem \ref{thm:ACF} on page \pageref{thm:ACF}).  
Therefore the formula has a solution here too.  
Thus as long as $\alg K\included L$, we have $\V{\mathfrak m}\neq0$.
\end{proof}

As an alternative to using the model-completeness 
of the theory of algebraically closed fields,
one can use the result mentioned above,
that $K[\vec X]/\mathfrak m$ is algebraic over $K$.
In any case,
the proof of Theorem \ref{thm:N} gives:

\begin{corollary}[Hilbert's Nullstellensatz, strong form]
For all radical ideals $\mathfrak a$ of $K[\vec X]$,
\begin{equation*}
\I{\V[\alg K]{\mathfrak a}}=\mathfrak a.
\end{equation*}
\end{corollary}

\chapter{Finite fields}

\section{Ultraproducts of finite structures}

Suppose a theory $T$ has arbitrarily large finite models.  
Then there
is a sequence $(\str A_m\colon m\in\upomega)$ of finite models of $T$ such
that $\card{A_m}>m$ in each case.
Consequently, the sentence
\begin{equation*}
\Exists{(x_0,\dots,x_m)}\bigwedge_{i<j<m}x_i\neq x_j
\end{equation*}
is true in each $\str A_n$ such that $m\leq n$.  
By \L o\'s's Theorem
then, the sentence is true in every non-principal ultraproduct of the
structures $\str A_i$.  
In particular, this ultraproduct is infinite.
Moreover, every sentence that is true in each $\str A_i$ is true in
the ultraproduct; that is, the ultraproduct is a model of the theory
of the structures $\str A_i$.  
Thus the ultraproduct is an infinite
model of the theory of finite models of $T$.  
Such a structure might
be called a \textbf{pseudo-finite} model of $T$.  
We shall consider
the case where $T$ is the theory of fields.  

\section{Finite fields}

Let us review the basic theorems about finite fields.  
Suppose $K$ is a field.  
There is a homomorphism $1\mapsto1$ (or $k\mapsto k\cdot1$) from $\Z$ to
$K$.  
The kernel of this homomorphism is $n\Z$ for some
\emph{positive} $n$, called the \textbf{characteristic} of $K$, $\Char K$.
Since $\Z/n\Z$ must be an integral domain (by Corollary \ref{cor:ID},
page \pageref{cor:ID}), $n$ is either $0$ or prime. 
If $\Char K=0$, we may consider $\Q$ as a subfield of $K$; if $\Char
K$ is a prime $p$, we consider $\Z/p\Z$, denoted by $\F_p$, as a
subfield of $K$.  
Respectively, $\Q$ or $\F_p$ is the \textbf{prime
  field} of $K$. 

Let $K$ be a finite field of characteristic $p$.  
Then $K$ is a
vector-space over $\F_p$ of some finite dimension $m$, so $K$ has
order $p^m$.  
The group $\units K$ of units of $K$ has order $p^m-1$,
so its every element is a root of $x^{p^m-1}-1$.  
Then \emph{every}
element of $K$ is a root of the polynomial
\begin{equation*}
x^{p^m}-x.  
\end{equation*}
Since the formal derivative of this is $-1$, it has no repeated roots.
Thus its roots (in an algebraic closure $\alg{\F_p}$ of $\F_p$ that includes $K$) are
precisely the elements of $K$: we have 
\begin{equation*}
K=\{x\in\alg{\F_p}\colon x^{p^m}=x\}.
\end{equation*}
Conversely, for all $m$ in $\N$, since the map $x\mapsto x^{p^m}$ is an automorphism of $\alg{\F_p}$, the set $\{x\in\alg{\F_p}\colon x^{p^m}=x\}$ (namely the fixed field of the automorphism) is a subfield having order $p^m$.  
This then is the \emph{unique} subfield of $\alg{\F_p}$ of this order, and we can denote it by
\begin{equation*}
\F_{p^m}.
\end{equation*}
The group $\units{\F_{p^m}}$ of units of this field is cyclic.  
For again, it is a finite abelian group of order $p^m-1$ and is therefore a direct product
\begin{equation*}
\prod_{\ell\divides p^m-1}G_{\ell},
\end{equation*}
where each $G_{\ell}$ is an $\ell$-group (a group whose elements have orders that are powers of $\ell$; here and elsewhere in this chapter, $\ell$ is, like $p$, a prime number).  
Since $G_{\ell}$ is finite, for some positive integer $n$, every element of $G_{\ell}$ is a solution of
\begin{equation*}
x^{\ell^n}=1.
\end{equation*}
But in a field, this equation has no more than $\ell^n$ solutions.  
Therefore, if $n$ is minimal, $G_{\ell}$ must be cyclic of order $\ell^n$.  
Then the product $\units{\F_{p^m}}$ is itself cyclic, of order $p^m-1$.

The collection of finite subfields of $\alg{\F_p}$, ordered by inclusion, is isomorphic, under the map $\F_{p^m}\mapsto m$, to $\N$ as ordered by dividing.  
That is,
\begin{equation*}
\F_{p^m}\included\F_{p^n}\iff m\divides n.
\end{equation*}
See Figure~\ref{fig:ord}.
\begin{figure}[ht]
\begin{equation*}
\xymatrix@!0@=1.6cm{\F_{p^8}&&\F_{p^{12}}&&\F_{p^{18}}&&\F_{p^{27}}\\
&\F_{p^4}\ar@{-}[ul]\ar@{-}[ur]&&\F_{p^6}\ar@{-}[ul]\ar@{-}[ur]&&\F_{p^9}\ar@{-}[ul]\ar@{-}[ur]&\\
&&\F_{p^2}\ar@{-}[ul]\ar@{-}[ur]&&\F_{p^3}\ar@{-}[ul]\ar@{-}[ur]&&\\
&&&\F_p\ar@{-}[ul]\ar@{-}[ur]&&&}
\end{equation*}
\caption[The lattice of finite fields of characteristic $p$]%
{The lattice (in part) of finite fields of characteristic $p$}\label{fig:ord}
\end{figure}
Indeed, if $\F_{p^m}\included\F_{p^n}$, then $\F_{p^n}$ is a vector-space over $\F_{p^m}$, so its order is $(p^m)^k$ for some $k$, and then $n=mk$, so $m\divides n$.  
Conversely, if $m\divides n$, then
\begin{equation*}
p^m-1\divides p^n-1,
\end{equation*}
and therefore
\begin{equation*}
x^{p^m-1}-1\divides x^{p^n-1}-1,
\end{equation*}
so $\F_{p^m}\included\F_{p^n}$.  

Finally,
\begin{equation}\label{eqn:algFp}
\alg{\F_p}=\bigcup_{n\in\N}\F_{p^n}
\end{equation}
(since every extension $\F_{p^n}/\F_p$ is certainly algebraic, while every finite algebraic extension of $\F_p$ is a finite field).

\section{Galois groups}

We have shown that for each prime $p$, for each $m$ in $\N$, there is a subfield $\F_{p^m}$ of $\alg{\F_p}$, and this subfield is generated by (in fact it consists of) the roots of the polynomial $x^{p^m}-x$, which is separable.  
Therefore the finite field-extension $\F_{p^m}/\F_p$ is normal and separable, that is, Galois.  
The order of its group of automorphisms is $[\F_{p^m}\colon\F_p]$, that is, $m$.  
But the \textbf{Frobenius automorphism} of $\alg{\F_p}$, namely $x\mapsto x^p$ or
\begin{equation*}
\Frob,
\end{equation*}
restricts to an automorphism of $\F_{p^m}$ of order $m$, since we have shown in effect
\begin{equation*}
\Fix{\Frob^k}=\F_{p^k}.
\end{equation*}
Thus
\begin{equation*}
  \Aut{\F_{p^m}/\F_p}=\gpgen{\Frob\restriction\F_{p^m}}\cong\Z/m\Z.
\end{equation*}

\begin{sloppypar}
For any field $K$, let us write
\begin{equation*}
\Gal K=\Aut{K\sep/K},
\end{equation*}
the \emph{absolute Galois group} of $K$.  
We want to determine $\Gal{\F_p}$.  
Suppose $\sigma\in\Gal{\F_p}$.  
For every $n$ in $\N$, we have
\begin{equation*}
\sigma\restriction\F_{p^n}\in\Aut{\F_{p^n}/\F_p},
\end{equation*}
and hence for some $\sigma(n)$ in $\Z$
\begin{equation*}
  \sigma\restriction\F_{p^n}=(\Frob\restriction\F_{p^n})^{\sigma(n)}.
\end{equation*}
All that matters here is the congruence-class of $\sigma(n)$ \emph{modulo}
$n$.  
Thus we have an injective map
\begin{equation*}
\sigma\mapsto(\sigma(n)\colon n\in\N)
\end{equation*}
from
$\Gal{\F_p}$ to $\prod_{n\in\N}\Z/n\Z$.  
The map is not surjective,
but if $m\divides n$, then since $\F_{p^m}\included\F_{p^n}$ we must have
\begin{equation*}
  \sigma(n)\equiv \sigma(m)\pmod m.
\end{equation*}
However, suppose an element $(\sigma(n)\colon n\in\N)$ of
$\prod_{n\in\N}\Z/n\Z$ meets this condition.  
For any $x$ in $\alg{\F_p}$ we can define an element $\sigma$ of $\Gal{\F_p}$ by
\begin{equation*}
x^{\sigma}=x^{p^{\sigma(m)}},
\end{equation*}
where $x\in\F_{p^m}$.  (Here $x^{\sigma}$ is of course the image of $x$ under $\sigma$.) This definition of $x^{\sigma}$ is independent of the choice of $m$, since if also $x\in\F_{p^n}$, then
\begin{equation*}
x\in\F_{p^{\gcd(m,n)}},
\end{equation*}
so
\begin{equation*}
\sigma(m)\equiv\sigma(\gcd(m,n))\equiv\sigma(n)\pmod{\gcd(m,n)}
\end{equation*}
and therefore
\begin{equation*}
x^{p^{\sigma(m)}}=x^{p^{\sigma(\gcd(m,n))}}=x^{p^{\sigma(n)}}.
\end{equation*}
Thus
\begin{multline*}
\Gal{\F_p}\cong\{(\sigma(n)\colon n\in\N)\in\prod_{i\in\N}\Z/n\Z\colon\\
\bigwedge_{m\divides n}\pi^n_m(\sigma(n))=\sigma(m)\}
\end{multline*}
where $\pi^n_m$ is the quotient-map $x+n\Z\mapsto x+m\Z$ from $\Z/n\Z$ to $\Z/m\Z$.
\end{sloppypar}

In particular, $\Gal{\F_p}$ has a certain `universal property'\label{up} with respect to the system of groups $\Z/n\Z$ and homomorphisms $\pi^n_m$:
\begin{compactenum}
\item
$\Gal{\F_p}$ is a group $G$ from which there is a homomorphism $h^G_n$ to $\Z/n\Z$ for every $n$ in $\N$ such that, if $m\divides n$, then
\begin{equation*}
\pi^n_m\circ h^G_n=h^G_m.
\end{equation*}
\item
For every such group $G$, there is a unique homomorphism $h$ from $G$ to $\Gal{\F_p}$ such that, for each $n$ in $\N$,
\begin{equation*}
h^G_n=h^{\Gal{\F_p}}_n\circ h.
\end{equation*}
\end{compactenum}
See Figure~\ref{fig:Gal}.
\begin{figure}[ht]
\begin{equation*}
\xymatrix@!0@R=3.46cm@C=4cm{
&\Gal{\F_p}\ar[ddl]_{h^{\Gal{\F_p}}_n}\ar[ddr]^{h^{\Gal{\F_p}}_m}&\\
&\save[]+<0cm,-1.16cm>*{G}\ar[dl]^{h^G_n}\ar[dr]_{h^G_m}\ar[u]_h\restore&\\
\Z/n\Z\ar[rr]_{\pi^n_m}&&\Z/m\Z
}
\end{equation*}
\caption{The universal property of $\Gal{\F_p}$}\label{fig:Gal}
\end{figure}
Therefore $\Gal{\F_p}$ is called a \textbf{limit} of the given system
of groups and homomorphisms.  
This is the cat\-egory-theoretic sense of
\emph{limit} as given in, say, \cite[p.\ 705]{MR97a:13001} or
\cite{MR1094561}.  
Every set of groups, equipped with some
homomorphisms, has a limit in this sense, though the limit might be
empty.

The group $\Gal{\F_p}$ is called more precisely a \textbf{projective
  limit} or an \textbf{inverse limit} of the system of groups $\Z/n\Z$
with the quotient-maps, because any two of these groups are quotients of a
third.  
This condition is not required for the existence of the limit.

We give the finite groups $\Z/n\Z$ the discrete topology, and their
product the product topology.  
This product is compact by the
Tychonoff Theorem (page \pageref{thm:Tychonoff}).  
The
image of $\Gal{\F_p}$ in this group is closed, so it too is compact:
it is called a \textbf{pro-finite completion} of the system of finite
cyclic groups.\footnote{Perhaps one should talk about convergent sequences here\dots}

\section{Pseudo-finite fields}

Two examples of infinite models of the theory of finite fields are:
\begin{align}\label{eqn:psf}
&\prod_{p\text{ prime}}\F_p/M,&
&\prod_{n\in\N}\F_{p^n}/M,
\end{align}
where in each case $M$ is some non-principal maximal ideal.  
The first
example has characteristic $0$; the second, characteristic $p$. 

By the `Riemann Hypothesis for curves' as proved by Weil,\footnote{See for
example \cite[Ex.~V.1.10, p.~368]{MR0463157} or \cite[Thm 3.14,
  p.~35]{MR89b:12010}.} for every prime power $q$, for every curve $C$
of genus $g$ over $\F_q$, the number of $\F_q$-rational points of $C$
is at least  
\begin{equation*}
1+q-2g\surd q.
\end{equation*}
In particular, if $q$ is large enough, then $C$ does have an
$\F_q$-rational point. 

A field $K$ is called \textbf{pseudo-algebraically-closed} or
\textbf{PAC} if every plane curve defined over $K$ has a $K$-rational
point.  
This condition entails that every absolutely
  irreducible variety over $K$ has a $K$-rational point.\footnote{See
  \cite[ch.~10, pp.~129--131]{MR89b:12010}.}  
  
  The following are now true of every infinite model of the theory of finite fields: 
\begin{compactenum}
\item
It is perfect.
\item
It has exactly one extension of each degree (in some algebraic closure).
\item
It is pseudo-algebraically-closed.
\end{compactenum}
This is not obvious, even given the results stated above; one must show that these conditions are
\emph{first-order,} that is, the structures that satisfy them make up
an elementary class.
By the definition of Ax \cite{MR0229613}, a field with the first two
of these properties is \textbf{quasi-finite;} with all three of these
properties, \textbf{pseudo-finite.}  
So every infinite model of the
theory of finite fields is (quasi-finite and) pseudo-finite.  
Ax
proves the converse.  
In particular, Ax proves that every
pseudo-finite field is elementarily equivalent to a non-principal ultraproduct of finite fields, and indeed to one of the
ultraproducts given above in \eqref{eqn:psf}.  
The method is as
follows; here I use Ax \cite{MR0229613} and also Chatzidakis \cite{Chatzidakis-psf}. 

For every field $K$, the field $\Abs K$ of \textbf{absolute numbers}
of $K$ consists of the algebraic elements of $K$ (here algebraic means
algebraic over the prime field). 
The following is \cite[Prop.~7$'$, \S10, p.~261]{MR0229613}.

\begin{lemma}
For every field $K$ of prime characteristic $p$, there is a maximal
ideal $M$ of $\prod_{n\in\N}\F_{p^n}$ such that 
\begin{equation*}
\Abs K\cong\Abs{\prod_{n\in\N}\F_{p^n}/M}.
\end{equation*}
\end{lemma}

\begin{proof}
Because $\alg{\F_p}=\bigcup_{n\in\N}\F_{p^n}$ as in \eqref{eqn:algFp} on page \pageref{eqn:algFp}, we need only choose $M$ so that, for all $m$ in $\N$,
\begin{equation*}
\F_{p^m}\included K\iff\F_{p^m}\included \prod_{n\in\N}\F_{p^n}/M.
\end{equation*}
For each $m$ in $\N$, let $f_m$ be an irreducible element of $\F_p[X]$
of degree $m$.  
Then each zero of $f_m$ generates $\F_{p^m}$ over $\F_p$.  
So we want $M$ to be such that
\begin{equation*}
\F_{p^m}\included K\iff f_m\text{ has a zero in }\prod_{n\in\N}\F_{p^n}/M.
\end{equation*}
Let $F$ be the ultrafilter on $\N$ corresponding to $M$, that is,
\begin{equation*}
F=\{\N\setminus\supp f\colon f\in M\}=\bigl\{\{n\colon f_n=0\}\colon f\in M\}.
\end{equation*}
Then
\begin{multline*}
  f_m\text{ has a zero in }\prod_{n\in\N}\F_{p^n}/M\\
\iff\{n\colon f_m\text{ has a zero in }\F_{p^n}\bigr\}\in F.
\end{multline*}
Moreover,
\begin{equation*}
f_m\text{ has a zero in }\F_{p^n}\iff m\divides n.
\end{equation*}
So, combining all of our equivalences, we want to choose $F$ on $\N$ such that 
\begin{equation*}
\F_{p^m}\included K\iff\{n\colon m\divides n\}\in F.
\end{equation*}
For each $m$ in $\N$, the subset
\begin{equation*}
\{k\colon k\divides m\And\F_{p^k}\included K\}
\end{equation*}
of $\N$ is a sublattice of the lattice of factors of $m$ with respect to
divisibility: in particular, it contains the least common multiple of
any two members.  
It also contains $1$.\footnote{Thus it contains the least common multiple of every (finite) set of members, including the empty set.}  
Therefore it has a maximum element, say $g(m)$.  
The
arithmetic function $g$ is multiplicative: 
\begin{equation*}
\gcd(m,n)=1\implies g(mn)=g(m)\cdot g(n).
\end{equation*}
Now let
\begin{equation*}
b_m=\{x\colon \gcd(m,x)=g(m)\}.
\end{equation*}
Then the function $m\mapsto b_m$ is also multiplicative, in the sense that
\begin{equation}\label{eqn:bmn}
\gcd(m,n)=1\implies b_{mn}=b_m\cap b_n.
\end{equation}
Indeed, suppose $\gcd(m,n)=1$.  
Then or all $x$ in $\N$,
\begin{equation*}
\gcd(mn,x)=\gcd(m,x)\cdot\gcd(n,x),
\end{equation*}
and these factors are co-prime, being respectively factors of $m$ and $n$.  
But also $g(mn)=g(m)\cdot g(n)$, and these factors are co-prime, being respectively factors of $m$ and $n$.  
Therefore
\begin{multline*}
\gcd(mn,x)=g(mn)\\
\iff\gcd(m,x)=g(m)\And\gcd(n,x)=g(n).
\end{multline*}
So we have \eqref{eqn:bmn}.  
Moreover, we have also
\begin{equation}\label{eqn:bn-bm}
m\leq n\implies b_{\ell^n}\included b_{\ell^m}.
\end{equation}
For, we have
\begin{equation*}
b_{\ell^n}=
\begin{cases}
\{g(\ell^n)\cdot y\colon\ell\ndivides y\},&\text{ if }g(\ell^n)<\ell^n,\\
\{\ell^ny\colon y\in\N\},&\text{ if }g(\ell^n)=\ell^n,
\end{cases}
\end{equation*}
and also
\begin{equation*}
m\leq n\implies g(\ell^m)=\min\bigl(\ell^m,g(\ell^n)\bigr).
\end{equation*}
Now we can just check that \eqref{eqn:bn-bm} holds in each of the three cases
\begin{align*}
g(\ell^n)&=\ell^n,& \ell^m&<g(\ell^n)<\ell^n,& g(\ell^n)&<\ell^m.
\end{align*}
So we have finally
\begin{equation*}
b_m\cap b_n=b_{\lcm(m,n)}.
\end{equation*}
Thus, since each $b_m$ is nonempty, the set of these generates a proper filter on $\N$.  
Let $F$ be an ultrafilter on $\N$ that contains all of the sets $b_m$.  
We claim
that this $F$ is as desired.  
Indeed, 
\begin{compactitem}
\item
if $\F_{p^m}\included K$, so $g(m)=m$, then $b_m=\{mx\colon x\in\N\}$;
\item
if $\F_{p^m}\nincluded K$, so $g(m)<m$, then $b_m\cap\{mx\colon x\in\N\}=\emptyset$. 
\end{compactitem}
Consequently the following are equivalent:
\begin{gather*}
	\F_{p^m}\included K,\\
	\{mx\colon x\in\N\}\in F,\\
	f_m\text{ has a root in }\prod_{n\in\N}\F_{p^n}/M,\\
	\F_{p^m}\included\prod_{n\in\N}\F_{p^n}/M.\qedhere
\end{gather*}
\end{proof}

The lemma has a companion \cite[Prop.~7]{MR0229613}, namely that for
every quasi-finite field $K$ of characteristic $0$, there is a maximal
ideal $M$ of $\prod_{p}\F_p$ such that 
\begin{equation*}
\Abs K=\Abs{\prod_p\F_p/M},
\end{equation*}
but the proof is more difficult.  
Since all fields of characteristic
$0$ are perfect, quasi-finiteness in this case just means having
exactly one extension of each degree.  
In this case the field of
absolute numbers has \emph{at most} one extension of each degree.
This is because, if $\alpha$ is algebraic over $\Abs K$, then $\alpha$ has the same degree over $K$ that it has over $\Abs K$.  
For, the minimal polynomial of $\alpha$ over $\Abs K$ is a product
\begin{equation*}
\prod_{i<n}(X-\alpha_i),
\end{equation*}
the $\alpha_i$ being the conjugates of $\alpha$ over $\Abs K$.  
The minimal polynomial over $K$ is a factor of this; so its coefficients are polynomial functions of (some of) the conjugates of $\alpha$ over $\Abs K$.  
So the coefficients are algebraic (over $\Abs K$); therefore the already belong to $\Abs K$, by its definition.

We now want to prove \cite[Thm 4, \S8, p.~255]{MR0229613}, that if $F$
and $F'$ are pseudo-finite fields, then 
\begin{equation}\label{eqn:FF'}
\Abs F\cong\Abs{F'}\implies F\equiv F'.
\end{equation}
With this and the foregoing lemma, we shall have that every
pseudo-finite field (at least in positive characteristic) is
elementarily equivalent to an ultraproduct of finite fields. 

To establish \eqref{eqn:FF'}, since $\Abs F$ is determined by $\Th F$,
we can replace $F$ and $F'$ (respectively) by elementarily equivalent
fields.  
In particular, we can replace them with ultrapowers with exponent
$\upomega$; these ultrapowers are $\upomega_1$-saturated by
Theorem~\ref{thm:sat} on page \pageref{thm:sat}. 
Now take a countable elementary substructure $F_0$ of $F$; this exists by the
downward L\"owenheim--Skolem--Tarski Theorem, Theorem~\ref{thm:dLST}.
One shows \cite[5.10, Lemme de plongement]{Chatzidakis-psf} that this
embeds in $F'$ under a monomorphism $\phi_0$.  
Then $F'$ has an
elementary substructure $F'_0$ that includes the image of $F_0$; and
$F'_0$ embeds in $F$ under a monomorphism $\phi'_0$ that extends
$\phi_0{}\inv$.  
Continuing, we obtain isomorphic elementary
substructures $F_{\upomega}$ and $F'_{\upomega}$ of $F$ and $F'$
respectively.  
See Figure~\ref{fig:tower}.
\begin{figure}
  \begin{equation*}
    \xymatrix@!0@=3cm{
F                                              &F'\\
F_{\upomega}\ar[r]^{\bigcup_{n\in\upomega}\phi_n}\ar@{-}[u]&F_{\upomega}'\ar@{-}[u]\\
F_1\ar[r]^{\phi_1}\ar@{.}[u]                      &\phi_1[F_1]\ar@{.}[u]\\
\phi_0'[F_0']\ar@{-}[u]                          &F_0'\ar[l]_{\phi_0'}\ar@{-}[u]\\
F_0\ar[r]^{\phi_0}\ar@{-}[u]                      &\phi_0[F_0]\ar@{-}[u]
}
  \end{equation*}
  \caption{Isomorphisms of pseudo-finite fields}\label{fig:tower}  
\end{figure}
This establishes \eqref{eqn:FF'}.
\begin{comment}



An uncountable, quasi-finite field $F$ is called \textbf{hyper-finite}
if for every subfield $E$ of $F$, for every \emph{absolutely entire}
$E$-algebra $R$ of cardinality less than $F$, there is a homomorphism
from $R$ to $F$ over $E$.  
Here, $R$ is \textbf{absolutely entire} if
$\alg E\otimes_ER$ is \emph{entire,} that is, is an integral
domain.\footnote{Lang \cite[pp.\ 91--92]{Lang-alg} recommends
  \emph{entire} as the adjective form of \emph{integral domain,}
  observing that \emph{integral} would have been better, had it not
  already been taken for other purposes.} 

Hyper-finiteness is a kind of \emph{saturation.}  
Ax's route is to
show \cite[Prop.~4, \S7, p.~254]{MR0229613} that saturation implies
hyper-finiteness, and to show \cite[Thm~2, \S5, p.~248]{MR0229613}
that if $F$ and $F'$ in \eqref{eqn:FF'} are hyper-finite of the same
cardinality, then 
\begin{equation*}
\Abs F\cong\Abs{F'}\implies F\cong F'
\end{equation*}



\end{comment}


Throughout the chapter, $K$ will be a field, 
and $L$ will be a field of which $K$ is a subfield, 
that is,
\begin{equation*}
  K\included L.
\end{equation*}


\chapter{Schemes}


Throughout this chapter,
as in Chapter \ref{ch:ag} (page \pageref{ch:ag}),
$K$ will be a field, 
and $L$ will be a field of which $K$ is a subfield, 
that is,
\begin{equation*}
  K\included L.
\end{equation*}
Sources for the algebraic geometry of this chapter 
include Coombes \cite{Coombes} and Hartshorne \cite{MR0463157}.  
The main point is to look at the \emph{ultraproduct scheme} at the end; 
this work is based on the first of the three MSRI/Evans Hall Lectures, 
given at the University of California at Berkeley 
in the spring of 1998 by Angus Macintyre.%%%%%
\footnote{These lectures used to be preserved on the MSRI website; 
but I could not find them there, the last time I looked.}
%%%%%%

\section{Zero-loci}

Throughout this section,
let $R=K[\vec X]$.
In \S\ref{sect:Gal} (page \pageref{sect:Gal}), 
letting $f$ range over $R$,
and letting $\vec x$ range over some $L^n$, where $K\included L$, 
we used the equation $f(\vec x)=0$ 
to establish a one-to-one correspondence 
between the $K$-closed subsets of $L^n$ 
and certain radical ideals of $R$.
By Hilbert's Nullstellensatz (page \pageref{thm:HN}),
if $L$ includes $\alg K$,
then the correspondence is between 
the $K$-closed subsets of $L^n$ and (all of) the radical ideals of $R$.
The correspondence is inclusion-reversing.  
Thus the set of radical ideals of $R$
encodes the topological structure of $L^n$ 
for $L$ that include $\alg K$. 

Suppose indeed $\alg K\included L$, 
we are given a particular $f$ in $R$.
We are interested in its zero-locus, the $K$-closed set $\V f$; 
and this now corresponds to the prime ideal $\I{\V f}$, which is $\surd(f)$.  
We should should like to have a way of picking out this ideal 
among all of the radical ideals of $K[\vec X]$, 
without having to refer to $L^n$.  
One way of doing this is simply to observe 
that $\surd(f)$ is the intersection 
of all radical ideals of $K[\vec X]$ that contain $f$.  
More is true, by Theorem \ref{thm:rad} (page \pageref{thm:rad}):
\begin{equation}\label{eqn:rad-f}
  \begin{aligned}
  \surd(f)
&=\bigcap\{\mathfrak p\in\spec\colon f\in\mathfrak p\}\\
&=\bigcap\var f.
\end{aligned}
\end{equation}
We can also give a new proof of this,
using the Nullstellensatz.
Given an ideal $\mathfrak a$ of $R$, we have
\begin{equation*}
  \vec x\in\V{\mathfrak a}\iff\mathfrak a\included\I{\vec x},
\end{equation*}
and so
\begin{align*}
\mathfrak a
&\included\bigcap\{\mathfrak p\in\spec\colon\mathfrak a\included\mathfrak p\}\\
&\included\bigcap\{\I{\vec x}\colon
\vec x\in L^n\And\mathfrak a\included\I{\vec x}\}\\
&=\bigcap\{\I{\vec x}\colon
\vec x\in\V{\mathfrak a}\}\\
&=\I{\V{\mathfrak a}}.
\end{align*}
The Nullstellensatz then makes the inclusions equalities,
if $\mathfrak a$ is radical;
in general,
\begin{equation*}
\surd\mathfrak a
=\bigcap\{\mathfrak p\in\spec\colon\mathfrak a\included\mathfrak p\}.
\end{equation*}
We may use the obvious notation
\begin{equation*}
    \var{\mathfrak a}
=\{\mathfrak p\in\spec\colon\mathfrak a\included\mathfrak p\}
=\bigcap_{f\in\mathfrak a}\var f,
\end{equation*}
so that
\begin{equation*}
  \surd\mathfrak a=\bigcap\var{\mathfrak a}.
\end{equation*}
So if $L$ is large enough in the sense of including $\alg K$, 
then we have a one-to-one correspondence between:
\begin{compactitem}
\item
closed subsets $\V{\mathfrak a}$ of $L^n$;
\item
radical ideals $\surd\mathfrak a$ of $R$;
\item
closed subsets $\var{\mathfrak a}$ 
of $\spec$.
\end{compactitem}
We want to understand the sets $\var{\mathfrak a}$ 
as being zero-loci like $\V{\mathfrak a}$.
In \eqref{eqn:rad-f},
the condition that $f\in\mathfrak p$ 
is equivalent to the condition that $f+\mathfrak p=0$ in $R/\mathfrak p$.  
Suppose we write $f+\mathfrak p$ as $f_{\mathfrak p}$.
As in \eqref{eqn:x|->x+p2} on page \pageref{eqn:x|->x+p2},
we have an embedding
\begin{equation*}
f\mapsto(f_{\mathfrak p}\colon\mathfrak p\in\spec)
\end{equation*}
of $R$ in the product
\begin{equation*}
\prod_{\mathfrak p\in\spec}R/\mathfrak p.
\end{equation*}
Also
\begin{equation*}
\var f=\{\mathfrak p\in\spec\colon f_{\mathfrak p}=0\},
\end{equation*}
a zero-locus.
To establish
\begin{equation*}
\var{\mathfrak a}\cup\var{\mathfrak b}=\var{\mathfrak a\cap\mathfrak b}
\end{equation*}
corresponding to \eqref{eqn:Vab} on page \pageref{eqn:Vab},
we need that the functions $\mathfrak p\mapsto f_{\mathfrak p}$ on $\spec$ take values in integral domains; and this is the case, since $f_{\mathfrak p}\in R/\mathfrak p$.

It will be useful to have a notation for the \emph{open} subsets of $\spec$.  
If $f\in R$, let us write
\begin{equation*}
\U f=\var f\comp=\{\mathfrak p\in\spec\colon f\notin\mathfrak p\}. 
\end{equation*}
If $A\included R$, we let
\begin{equation*}
\U A=\var A\comp=\bigcup_{f\in A}\U f=\{\mathfrak p\in\spec\colon A\not\included\mathfrak p\}. 
\end{equation*}
These are the open subsets of $\spec$, and each of them is $\U{\mathfrak a}$ for some radical ideal $\mathfrak a$ of $R$.  

\section{Regular functions}

At the beginning of the last section, we considered the equation $f(\vec x)=0$, where $f\in K[\vec X]$ and $\vec x\in L^n$.  
We have generally $f(\vec x)\in L$, that is, $f$ is a function from $L^n$ to $L$.  
There can be other such functions.  
An arbitrary function $h$ from a subset $S$ of $L^n$ to $L$ is \textbf{regular} (or more precisely \emph{$K$-regular}) \emph{at} a point $\vec a$ of $S$ if there is a neighborhood $U$ of $\vec a$ (in the Zariski topology over $K$, restricted to $S$) and there are elements $f$ and $g$ of $K[\vec X]$ such that, for all $\vec x$ in $U$,
\begin{equation*}
h(\vec x)=\frac{f(\vec x)}{g(\vec x)}.
\end{equation*}
The function is \textbf{regular,} simply, if it is regular at all points of its domain.
The only regular functions on $L^n$ itself are the elements of $K[\vec X]$.  
However,\label{Y^2-X^3} let
\begin{align*}
S_0&=\V{Y^2-X^3}\setminus\V X,&
S_1&=\V{Y^2-X^3}\setminus\V Y.
\end{align*}
These are open subsets of their union.
On $S_0$ and $S_1$ respectively there are regular functions $h_0$ and $h_1$ given by
\begin{align*}
h_0(x,y)&=\frac y{x^2},&
h_1(x,y)&=\frac xy.
\end{align*}
These two functions agree on $S_0\cap S_1$, since $y^2=x^3$ for all $(x,y)$ in that set (and even in $S_0\cup S_1$).  
Thus $h_0\cup h_1$ is a regular function $h$ on $S_0\cup S_1$.  
However, there are no $f$ and $g$ in $K[X,Y]$ such that, for all $(x,y)$ in $S_0\cup S_1$, $h(x,y)= f(x,y)/g(x,y)$.

In the example, $S_0\cup S_1$ is an open subset of the closed subset $\V{Y^2-X^3}$ of $L^2$.  
For now, we shall look just at open subsets of the powers $L^n$ themselves.  

If $\mathfrak p$ is a prime ideal of $K[\vec X]$, and $f$ and $g$ in $K[\vec X]$ are such that $\vec x\mapsto f(\vec x)/g(\vec x)$ is well-defined (and therefore regular) on $L^n\setminus\V{\mathfrak p}$, this means $f/g$ is a well-defined element of the local ring $K[\vec X]_{\mathfrak p}$.

Now write $R=K[\vec X]$ as before, and let $\mathfrak a$ be an arbitrary radical ideal of $R$, so that $\U{\mathfrak a}$ is an open subset of $\spec$.  
We define shall define a sub-ring, to be denoted by
\begin{equation*}
\Oh{\U{\mathfrak a}},
\end{equation*}
of the product\footnote{Note well that the factors of the product are
  the localizations $R_{\mathfrak p}$, rather than, say, the
  quotient-fields of the quotients $R/\mathfrak p$.  
However, in the
  other case that we shall be interested in, where $R$ is itself a
  product of fields, then the integral domains $R/\mathfrak p$ will
  already be fields, which are isomorphic to the localizations
  $R_{\mathfrak p}$.  
See \S\ref{sect:vN}.} 
\begin{equation*}
\prod_{\mathfrak p\in\U{\mathfrak a}}R_{\mathfrak p}.
\end{equation*}
See Figure \ref{fig:stalk}.
\begin{figure}[ht]
\centering
\psset{unit=8mm}
    \begin{pspicture}(0,1)(9.5,5.5)
%\psgrid
    \psellipse(4,3)(4,2)
\psline{->}(6,3)(6,5.4)
\psdot(6,3)
\uput[r](6,5.4){$R_{\mathfrak p}$}
\uput[r](6,3){$\mathfrak p$}
\uput[r](8.1,3){$\spec$}
  \end{pspicture}
  \caption[A stalk of a sheaf]{A stalk of a sheaf (see p.\ \pageref{stalk})}\label{fig:stalk}
\end{figure}
Elements of this product are functions on $\U{\mathfrak a}$; so as
before we have a notion of being \emph{regular}:
An element $h$ of
the product is \textbf{regular} at a point $\mathfrak p$ of
$\U{\mathfrak a}$ if, for some open subset $V$ of $\U{\mathfrak a}$
that contains $\mathfrak p$, there are $f$ and $g$ in $R$ such that,
for all $\mathfrak q$ in $V$, 
\begin{equation*}
h_{\mathfrak q}=\frac fg.
\end{equation*}
Note that this requires $g\notin\mathfrak q$.  
The ring $\Oh{\U{\mathfrak a}}$ consists of the elements of $\prod_{\mathfrak p\in\U{\mathfrak a}}R_{\mathfrak p}$ that are regular at all points of $\U{\mathfrak a}$.

There is a simpler definition when $\mathfrak a$ is a principal ideal $(g)$.  
In this case, one shows
\begin{equation*}
\Oh{\U{(g)}}\cong\{g^k\colon k\in\upomega\}\inv R,
\end{equation*}
because the map $x/g^n\mapsto(x/g^n\colon\mathfrak p\in\U{(g)})$ from this ring to $\Oh{\U{(g)}}$ is injective and surjective.  
See Hartshorne \cite[Prop.~II.2.2, p.~71]{MR0463157}.

If $U$ and $V$ are open subsets of $R$ such that $U\includes V$, then the restriction-map from $\prod_{\mathfrak p\in U}R_{\mathfrak p}$ to
$\prod_{\mathfrak p\in V}R_{\mathfrak p}$ itself restricts to a map $\rho^U_V$ from $\Oh U$ to $\Oh V$.  
If $h\in\Oh U$, we then write
\begin{equation*}
\rho^U_V(h)=h\restriction V.
\end{equation*}
 The function $U\mapsto\Oh U$ on the collection of open subsets of $R$, together with these homomorphisms $\rho_{UV}$, is called a \textbf{pre-sheaf} of rings on $\spec$ because:
\begin{align*}
\Oh{\emptyset}&=\{0\},&
\rho^U_U&=\id U,&
\rho^U_W&=\rho^V_W\circ\rho^U_V.
\end{align*}
(The notation $\rho^U_V$ implies $U\includes V$; so for the last equation we have $U\includes V\includes W$.)
We now have a situation that is `dual' (because the arrows are reversed) to that of the Galois group $\Gal{\F_p}$: see page \pageref{up}.  
For all $\mathfrak p$ in $\spec$, $R_{\mathfrak p}$ has a certain `universal property' with respect to the system of rings $\Oh U$ such that $\mathfrak p\in U$:
\begin{compactenum}
\item
$R_{\mathfrak p}$ is a ring $A$ to which there is a homomorphism $h^U_A$ from $\Oh U$ for such that, if $U\includes V$, then
\begin{equation*}
h^V_A\circ\rho^U_V=h^U_A.
\end{equation*}
\item
For every such ring $A$, there is a unique homomorphism $h$ to $A$ from $R_{\mathfrak p}$ such that
\begin{equation*}
h^U_A=h\circ h^U_{R_{\mathfrak p}}.
\end{equation*}
\end{compactenum}
See Figure~\ref{fig:Rp}.
\begin{figure}[ht]
\begin{equation*}
\xymatrix@!0@R=3.46cm@C=4cm{
&R_{\mathfrak p}&\\
&\save[]+<0cm,-1.16cm>*{A}
\ar@{<-}[u]_h
\ar@{<-}[dl]_{h^V_A}
\ar@{<-}[dr]^{h^U_A}
\restore&\\
\Oh V
\ar[uur]^{h^V_{R_{\mathfrak p}}}&&
\Oh U
\ar[uul]_{h^U_{R_{\mathfrak p}}}
\ar[ll]^{\rho^U_V}
}
\end{equation*}
\caption{The universal property of $R_{\mathfrak p}$}\label{fig:Rp}
\end{figure}
Therefore $R_{\mathfrak p}$ is called a \textbf{co-limit} or \textbf{direct limit} of the given system of rings.  
This limit can be obtained as a quotient of the sum $\sum_{\mathfrak p\in U}\Oh U$ by the smallest ideal that contains, for each pair $U$ and $V$ such that $U\pincludes V$, every element $x$ such that $x_V=\rho^U_V(x_U)$, and $x_W=0$ if $W$ is not $U$ or $V$.

The pre-sheaf $U\mapsto\Oh U$ is further a \textbf{sheaf} of rings because it has two additional properties:
\begin{compactenum}
\item
If $h\in\Oh U$, and $h\restriction V=0$ for all $V$ in an open covering of $U$, then $h=0$.
\item
If there is $h_V$ in $\Oh V$ for every $V$ in an open covering of $U$, and
\begin{equation*}
h_V\restriction(V\cap W)=h_W\restriction(V\cap W)
\end{equation*}
for all $V$ and $W$ in this open covering, then for some $h$ in $\Oh U$, for each $V$ in the open covering,
\begin{equation*}
h_V=h\restriction V.
\end{equation*}
\end{compactenum}
The local ring $R_{\mathfrak p}$ is the \textbf{stalk}\label{stalk} of
the sheaf at $\mathfrak p$.  
In the fullest sense, the
\textbf{spectrum} of $R$ is $\spec$ as a topological space equipped
with this sheaf.  
The sheaf is then the \textbf{structure sheaf} of
the spectrum of $R$. 

\section{Generic points and irreducibility}

This section is here for completeness, but will not be used later.
Every point $\vec a$ of $L^n$ is called a \textbf{generic point} of
$\V{\I{\vec a}}$; more precisely, $\vec a$ is a generic point
\emph{over} $K$ of $\V{\I{\vec a}}$.  
In the example on page \pageref{pi},
$(\uppi,\uppi)$ and $(\mathrm e,\mathrm e)$ 
are generic points of $\V{X-Y}$
%$\{(x,x)\colon x\in\R\}$ 
over $\Q$.  

In any case, if for some radical
ideal $\mathfrak a$, the algebraic set $\V{\mathfrak a}$ has a generic
point, then $\mathfrak a$ must be prime. 
The converse may fail.  
For example, $\V{(X-Y)}$ has no generic point
if $L\included\alg{\Q}$.  
However, to Theorem \ref{thm:spec}, we have the following

\begin{corollary}
If $\alg{K(\vec X)}\included L$, then the zero-locus in $L$ of every prime ideal has a generic point. 
\end{corollary}


A closed subset of $L^n$ is called \textbf{irreducible} if it cannot
be written as the union of two closed subsets, neither of which
includes the other.

\begin{theorem}
For all radical ideals $\mathfrak a$ of $K[\vec X]$, if $\alg K\included L$,
\begin{center}
$\mathfrak a$ is prime $\iff$ $\V{\mathfrak a}$ is irreducible.
\end{center}
\end{theorem}

\begin{proof}
If $\mathfrak p$ is prime, and $\V{\mathfrak p}=\V{\mathfrak a}\cup\V{\mathfrak b}$ for some radical ideals $\mathfrak a$ and $\mathfrak b$, then (by Hilbert's Nullstellensatz)
\begin{equation*}
\mathfrak p=\mathfrak a\cap\mathfrak b,
\end{equation*}
so we may assume $\mathfrak p=\mathfrak a$ and therefore $\V{\mathfrak a}\includes\V{\mathfrak b}$.

Suppose conversely $\V{\mathfrak a}$ is irreducible, and $fg\in\mathfrak a$.  
Then
\begin{equation*}
\V{\mathfrak a}=\V{\mathfrak a\cup\{f\}}\cup\V{\mathfrak a\cup\{g\}},
\end{equation*}
so we may assume $\V{\mathfrak a}=\V{\mathfrak a\cup\{f\}}$ and therefore (again by Hilbert's Nullstellensatz) $f\in\mathfrak a$.
\end{proof}

For example, $L^n$ itself is irreducible, since the zero-ideal of $K[\vec X]$
is prime.  
Therefore the closure of every open subset is the whole
space $L^n$.  
In any case, every closed set is the union of only
finitely many irreducible closed sets: this is by the corollary to the
Hilbert Basis Theorem (Theorem~\ref{thm:H} on page \pageref{thm:H}).  
Hence every radical
ideal of $K[\vec X]$ is the intersection of just finitely many elements of
$\spec[{K[\vec X]}]$.  

\section{Affine schemes}

For an arbitrary commutative ring $R$, 
every element $f$ of $R$ 
determines a function $\mathfrak p\mapsto f_{\mathfrak p}$ on $\spec$, 
where $f_{\mathfrak p}$ is the element $f+\mathfrak p$ of $R/\mathfrak p$.  
However, the corresponding map
\begin{equation}\label{eqn:Rto}
f\mapsto(f_{\mathfrak p}\colon\mathfrak p\in\spec)
\end{equation}
from $R$ to $\prod_{\mathfrak p\in\spec}R/\mathfrak p$ is injective
if and only if $R$ is reduced
(Theorem~\ref{thm:rad}, page~\pageref{thm:rad}).
For example, the kernel of this map contains $X+(X^2)$ when $R=K[X]/(X^2)$.
In general, the kernel is $\surd\{0\}$.

We may refer to the topology on $\spec$ as the \textbf{Zariski topology.}  
Just as before, 
we obtain the sheaf $U\mapsto\Oh U$ of rings on $\spec$, 
with stalks $R_{\mathfrak p}$.
Continuing the example on page \pageref{Y^2-X^3}, we may let
\begin{equation*}
R=K[X,Y]/(Y^2-X^3).
\end{equation*}
Let $x$ and $y$ be the images of $X$ and $Y$ respectively in $R$.  
Then
\begin{equation*}
\U{(x,y)}=\U x\cup\U y,
\end{equation*}
and
\begin{align*}
\mathfrak p\in\U x&\implies\frac y{x^2}\in R_{\mathfrak p},&
\mathfrak p\in\U y&\implies\frac xy\in R_{\mathfrak p},
\end{align*}
 and if $\mathfrak p\in\U x\cap\U y$, then $y/x^2$ and $x/y$ are the same element of $R_{\mathfrak p}$.  
Thus we obtain an element of $\Oh{\U{(x,y)}}$.

The spectrum of a ring is called an \textbf{affine scheme.}  
One point of introducing this terminology is that a \emph{scheme,} simply, is a topological space with a sheaf of rings such that such that every point of the space has a neighborhood that, with the restriction of the sheaf to it, is an affine scheme.  
However, we shall not look at schemes in general.  
In fact we shall look at just one affine scheme whose underlying ring is not a polynomial ring.

\section{The ultraproduct scheme}

Now let $R$ be the product $\prod_{i\in\Omega}K_i$ of fields as above.  
As $\mathfrak p$ ranges over $\spec$, the quotients $R/\mathfrak p$ are just the possible ultraproducts of the fields $K_i$.  
We want to investigate how these arise from the structure sheaf of the spectrum of $R$.  
So, letting $\mathfrak a$ be an ideal of $R$, we want to understand $\Oh{\U{\mathfrak a}}$. 

We can identify $\spec$ with $\spec[\pow{\Omega}]$, and more generally, we can identify ideals of $R$ with ideals of $\pow{\Omega}$.  
Because $R_{\mathfrak p}\cong R/\mathfrak p$, we may assume
\begin{equation*}
\Oh{\U{\mathfrak a}}\included\prod_{\mathfrak p\in\U{\mathfrak a}}R/\mathfrak p.
\end{equation*}
Here we may treat $\mathfrak a$ as an ideal of $\pow{\Omega}$, so $\U{\mathfrak a}$ can be thought of as an open subset of $\spec[\pow{\Omega}]$.  
Then, in the product $\prod_{\mathfrak p\in\U{\mathfrak a}}R/\mathfrak p$, the index $\mathfrak p$ ranges over this open subset, but in the quotient $R/\mathfrak p$, the index returns to being the corresponding ideal of $R$.

Let $s\in\prod_{\mathfrak p\in\U{\mathfrak a}}R/\mathfrak p$.  
Every \emph{principal} ideal in $\U{\mathfrak a}$ is $(\Omega\setminus\{i\})$ for some $i$ in $\Omega$.  
In this case we have $\mathfrak a\nincluded(\Omega\setminus\{i\})$, that is,
\begin{equation*}
i\in\bigcup\mathfrak a.
\end{equation*}
Let us denote $(\Omega\setminus\{i\})$ by $\mathfrak p(i)$.
There is only one prime ideal of $\pow{\Omega}$ that does not contain $\{i\}$, namely $\mathfrak p(i)$.  
Thus
\begin{equation*}
\U{\{i\}}=\{\mathfrak p(i)\}.
\end{equation*}
In particular, $s$ is automatically regular at $\mathfrak p(i)$.  
We want to understand when $s$ is regular at not-principal ideals.  

Still considering also the principal ideals, we have
\begin{equation*}
R/\mathfrak p(i)\cong K_i.
\end{equation*}
Let $s_{\mathfrak p(i)}$ be sent to $s_i$ under this isomorphism, so whenever $x$ in $R$ is such that $x_i=s_i$, we have
\begin{equation*}
s_{\mathfrak p(i)}=x+\mathfrak p(i).
\end{equation*}
By definition, we have $s\in\Oh{\U{\mathfrak a}}$ if and only if, for all $\mathfrak p$ in $\U{\mathfrak a}$, for some subset $\U{\mathfrak b}$ of $\mathfrak a$ such that $\mathfrak b\nincluded\mathfrak p$, for some $x$ in $R$, for all $\mathfrak q$ in $\U{\mathfrak b}$,
\begin{equation*}
s_{\mathfrak q}=x+\mathfrak q.
\end{equation*}
We may assume $\mathfrak b$ is a principal ideal $(A)$, where $A\in\mathfrak a\setminus\mathfrak p$.  
If $\mathfrak q$ in $\U A$ here is the principal ideal $\mathfrak p(j)$, so that $j\in A$, we must have $x_j=s_j$.  
More generally, $\mathfrak q\in\U A$ means $A\notin\mathfrak q$, so $A$ is $\mathfrak q$-large, and hence for all $x$ in $R$, $x+\mathfrak q$ is determined by $(x_i\colon i\in A)$.  
Thus we may assume
\begin{equation*}
x=(s_i\colon i\in\Omega).
\end{equation*}
This establishes that $\Oh{\U{\mathfrak a}}$ is the image of $R$ in $\prod_{\mathfrak p\in\U{\mathfrak a}}R/\mathfrak p$:
\begin{equation*}
\Oh{\U{\mathfrak a}}=\{(x+\mathfrak p\colon\mathfrak p\in\U{\mathfrak a})\colon x\in R\}.
\end{equation*}
In particular, $\Oh{\U{\mathfrak a}}$ is a quotient of $R$, that is, a reduced product of the $K_i$.  
More precisely,
\begin{equation*}
\Oh{\U{\mathfrak a}}\cong R/\mathfrak b,
\end{equation*}
where
\begin{equation*}
\mathfrak b
=\bigcap_{\mathfrak p\in\U{\mathfrak a}}\mathfrak p
=\bigcap_{\mathfrak a\nincluded\mathfrak p}\mathfrak p.
\end{equation*}
It follows that
\begin{equation}\label{eqn:OU}
\Oh{\U{\mathfrak a}}\cong\prod_{i\in\bigcup\mathfrak a}K_i.
\end{equation}
We can see this in two ways.  
For example, if $\mathfrak
p\in\U{\mathfrak a}$, so that $\mathfrak a\nincluded\mathfrak p$, then
$\bigcup\mathfrak a\notin\mathfrak p$, that is, $\bigcup\mathfrak a$
is $\mathfrak p$-large.  
Therefore the image of $x$ in
$\Oh{\U{\mathfrak a}}$ depends only on $(x_i\colon
i\in\bigcup\mathfrak a)$.  
This shows that $\Oh{\U{\mathfrak a}}$ is a
quotient of $\prod_{i\in\bigcup\mathfrak a}K_i$. 

It is moreover the quotient by the trivial ideal.  
For, if
$i\in\bigcup\mathfrak a$, then $\mathfrak p(i)\in\U{\mathfrak a}$, so
that $x+\mathfrak p(i)$ depends only on $x_i$, that is,
\begin{equation*}
  x+\mathfrak p(i)=0\iff x_i=0.
\end{equation*}
This gives us \eqref{eqn:OU}.

Note that possibly $\bigcup\mathfrak a\notin\mathfrak p$, although
$\mathfrak a\included\mathfrak p$.  
Such is the case when $\mathfrak
p$ is non-principal, but $\mathfrak a$ is the ideal of finite sets.
However, we always have
\begin{equation*}
  \bigcup\mathfrak a\notin\mathfrak p\implies(\bigcup\mathfrak
  a)\nincluded\mathfrak p.
\end{equation*}

Another way to establish \eqref{eqn:OU} is to show
\begin{equation*}
  \bigcap_{\mathfrak a\nincluded\mathfrak p}\mathfrak
  p=(\Omega\setminus\bigcup\mathfrak a).
\end{equation*}
If $X\included\Omega\setminus\bigcup\mathfrak a$, and $\mathfrak
a\nincluded\mathfrak p$, then $\bigcup\mathfrak a\notin\mathfrak p$,
so $\Omega\setminus\bigcup\mathfrak a\in\mathfrak p$, and therefore
$X\in\mathfrak p$.  
Inversely, if
$X\nincluded\Omega\setminus\bigcup\mathfrak a$, then
$X\cap\bigcup\mathfrak a$ has an element $i$, so that
$X\notin\mathfrak p(i)$ and $\mathfrak a\nincluded\mathfrak p(i)$.

Because the stalk $R_{\mathfrak p}$ is always a direct limit of those
$\Oh U$ such that $\mathfrak p\in U$, we have in the present situation
that the ultraproduct $\prod_{i\in\Omega}K_i/\mathfrak p$
is a direct limit of those products 
$\prod_{i\in A}K_i$ such that $A\notin\mathfrak p$.
Symbolically,
\begin{equation*}
  \prod_{i\in\Omega}K_i/\mathfrak p=\lim_{\longrightarrow}\Bigl\{\prod_{i\in A}K_i\colon A\notin\mathfrak p\Bigr\}.
\end{equation*}
Equivalently, the ultraproduct is the direct limit of those
$R/\mathfrak a$ such that $\mathfrak a$ is a principal ideal included
in $\mathfrak p$:
\begin{equation*}
  \prod_{i\in\Omega}K_i/\mathfrak
  p=\lim_{\longrightarrow}\Bigl\{\prod_{i\in\Omega}K_i/(B)\colon
  B\in\mathfrak p\Bigr\}. 
\end{equation*}


\appendix

\chapter{The German script}\label{app:German}

In his \emph{Model Theory} of 1993, 
Wilfrid Hodges observes \cite[Ch.~1, p.~21]{MR94e:03002}:
\begin{quote}
Until about a dozen years ago, 
most model theorists named structures in horrible Fraktur lettering.  
Recent writers sometimes adopt a notation according to which 
all structures are named $M$, $M'$, $M^*$, $\bar M$, $M_0$, $M_i$ 
or occasionally $N$.  
I hope I cause no offence by using a more freewheeling notation.
\end{quote}
For Hodges, \emph{structures} 
(such as we define in \S\ref{sect:structures} 
on page~\pageref{sect:structures} above) 
are denoted by the letters $A$, $B$, $C$, and so forth; 
Hodges refers to their universes as
\textbf{domains}\index{domains}
and denotes these by $\operatorname{dom}(A)$ and so forth.  
In his \emph{Model Theory:  An Introduction} of 2002, 
David Marker \cite{MR1924282} uses \enquote{calligraphic} letters 
to denote structures, as distinct from their universes:
so $M$ is the universe of~$\mathcal M$, and $N$ of $\mathcal N$.
I still prefer the older practice 
of using capital Fraktur letters for structures:
\begin{equation*}
\begin{array}{*{13}{c}}
\mathfrak A&\mathfrak B&\mathfrak C&\mathfrak D&\mathfrak E&\mathfrak F&\mathfrak G&\mathfrak H&\mathfrak I&\mathfrak J&\mathfrak K&\mathfrak L&\mathfrak M\\\mathfrak N&\mathfrak O&\mathfrak P&\mathfrak Q&\mathfrak R&\mathfrak S&\mathfrak T&\mathfrak U&\mathfrak V&\mathfrak W&\mathfrak X&\mathfrak Y&\mathfrak Z
  \end{array}
\end{equation*}
For the record, here are the minuscule Fraktur letters, 
which are sometimes used in this text for denoting ideals:
\begin{equation*}
\begin{array}{*{13}{c}}
\mathfrak a&\mathfrak b&\mathfrak c&\mathfrak d&\mathfrak e&\mathfrak f&\mathfrak g&\mathfrak h&\mathfrak i&\mathfrak j&\mathfrak k&\mathfrak l&\mathfrak m\\\mathfrak n&\mathfrak o&\mathfrak p&\mathfrak q&\mathfrak r&\mathfrak s&\mathfrak t&\mathfrak u&\mathfrak v&\mathfrak w&\mathfrak x&\mathfrak y&\mathfrak z
  \end{array}
\end{equation*}
A way to write these letters by hand is seen
on the page reproduced below
from a 1931 textbook \cite{German}
on the German language:
%\vfill

\begin{figure}[p]
  \begin{sideways}
\centering
%\includegraphics[width=417pt,height=292pt]{german-script-cropped.eps}
\includegraphics%[width=1\textwidth]%[width=350pt]%
{../german-script-cropped.eps}
  \end{sideways}
\caption{The German alphabet}%\label{fig:German}
\end{figure}

\AfterBibliographyPreamble{\relscale{0.9}}
%\bibliographystyle{plain}
%\bibliography{../../../references}
%\bibliography{../../references}
%\bibliography{../references}

\def\rasp{\leavevmode\raise.45ex\hbox{$\rhook$}} \def\cprime{$'$}
  \def\cprime{$'$} \def\cprime{$'$} \def\cprime{$'$}
\begin{thebibliography}{10}

\bibitem{MR1616156}
Emil Artin.
\newblock {\em Galois theory}.
\newblock Dover Publications, Inc., Mineola, NY, second edition, 1998.
\newblock Edited and with a supplemental chapter by Arthur N. Milgram,
  ``Unabridged and unaltered republication of the last corrected printing of
  the 1944 second, revised edition of the work first published by The
  University of Notre Dame Press in 1942 as Number 2 in the series, \emph{Notre
  Dame Mathematical Lectures.}''.

\bibitem{MR0229613}
James Ax.
\newblock The elementary theory of finite fields.
\newblock {\em Ann. of Math. (2)}, 88:239--271, 1968.

\bibitem{MR1094561}
Michael Barr and Charles Wells.
\newblock {\em Category theory for computing science}.
\newblock Prentice Hall International Series in Computer Science. Prentice Hall
  International, New York, 1990.

\bibitem{MR0269486}
J.~L. Bell and A.~B. Slomson.
\newblock {\em Models and ultraproducts: {A}n introduction}.
\newblock North-Holland Publishing Co., Amsterdam, 1969.
\newblock reissued by Dover, 2006.

\bibitem{MR0227053}
Garrett Birkhoff.
\newblock {\em Lattice theory}.
\newblock Third edition. American Mathematical Society Colloquium Publications,
  Vol. XXV. American Mathematical Society, Providence, R.I., 1967.

\bibitem{Borovik-infinitesimals}
Alexandre Borovik and Mikhael Katz.
\newblock Inevitability of infinitesimals.
\newblock
  \url{http://manchester.academia.edu/AlexandreBorovik/Papers/305871/Inevitability_of_infinitesimals}.
\newblock accessed July 18, 2012.

\bibitem{Burali-Forti}
Cesare Burali-Forti.
\newblock A question on transfinite numbers.
\newblock In van Heijenoort \cite{MR1890980}, pages 104--12.
\newblock First published 1897.

\bibitem{MR91c:03026}
C.~C. Chang and H.~J. Keisler.
\newblock {\em Model theory}, volume~73 of {\em Studies in Logic and the
  Foundations of Mathematics}.
\newblock North-Holland Publishing Co., Amsterdam, third edition, 1990.

\bibitem{Chatzidakis-psf}
Zo{\'e} Chatzidakis.
\newblock {\em Th{\'e}orie de mod{\`e}les des corps finis et pseudo-finis}.
\newblock Pr{\'e}publications de l'Equipe de Logique. Universit\'e Paris VII,
  Octobre 1996.
\newblock \url{http://www.logique.jussieu.fr/~zoe/}.

\bibitem{MR18:631a}
Alonzo Church.
\newblock {\em Introduction to mathematical logic. {V}ol. {I}}.
\newblock Princeton University Press, Princeton, N.~J., 1956.

\bibitem{Cohn-ANT}
Harvey Cohn.
\newblock {\em Advanced Number Theory}.
\newblock Dover, New York, 1980.
\newblock Corrected republication of 1962 edition.

\bibitem{Coombes}
Kevin~R. Coombes.
\newblock Agathos: Algebraic geometry: A total hypertext online system.
\newblock \url{http://www.silicovore.com/agathos/contents.html}.
\newblock accessed July 9, 2014.

\bibitem{MR0159773}
Richard Dedekind.
\newblock {\em Essays on the theory of numbers. {I}: {C}ontinuity and
  irrational numbers. {II}: {T}he nature and meaning of numbers}.
\newblock authorized translation by Wooster Woodruff Beman. Dover Publications
  Inc., New York, 1963.

\bibitem{Descartes-Geometry}
Ren{\'e} Descartes.
\newblock {\em The Geometry of {R}en{\'e} {D}escartes}.
\newblock Dover Publications, Inc., New York, 1954.
\newblock Translated from the French and Latin by David Eugene Smith and Marcia
  L. Latham, with a facsimile of the first edition of 1637.

\bibitem{Logicomix}
Apostolos Doxiadis and Christos~H. Papadimitriou.
\newblock {\em Logicomix}.
\newblock Bloomsbury, London, 2009.

\bibitem{MR97a:13001}
David Eisenbud.
\newblock {\em Commutative algebra}, volume 150 of {\em Graduate Texts in
  Mathematics}.
\newblock Springer-Verlag, New York, 1995.
\newblock With a view toward algebraic geometry.

\bibitem{MR17:814b}
Euclid.
\newblock {\em The thirteen books of {E}uclid's {E}lements translated from the
  text of {H}eiberg. {V}ol. {I}: {I}ntroduction and {B}ooks {I}, {I}{I}. {V}ol.
  {I}{I}: {B}ooks {I}{I}{I}--{I}{X}. {V}ol. {I}{I}{I}: {B}ooks
  {X}--{X}{I}{I}{I} and {A}ppendix}.
\newblock Dover Publications Inc., New York, 1956.
\newblock Translated with introduction and commentary by Thomas L. Heath, 2nd
  ed.

\bibitem{MR1932864}
Euclid.
\newblock {\em Euclid's {E}lements}.
\newblock Green Lion Press, Santa Fe, NM, 2002.
\newblock All thirteen books complete in one volume. The Thomas L. Heath
  translation, edited by Dana Densmore.

\bibitem{MR0142459}
T.~Frayne, A.~C. Morel, and D.~S. Scott.
\newblock Reduced direct products.
\newblock {\em Fund. Math.}, 51:195--228, 1962/1963.

\bibitem{MR0154807}
T.~Frayne, A.~C. Morel, and D.~S. Scott.
\newblock Correction to the paper ``{R}educed direct products''.
\newblock {\em Fund. Math.}, 53:117, 1963.

\bibitem{MR89b:12010}
Michael~D. Fried and Moshe Jarden.
\newblock {\em Field arithmetic}, volume~11 of {\em Ergebnisse der Mathematik
  und ihrer Grenzgebiete (3) [Results in Mathematics and Related Areas (3)]}.
\newblock Springer-Verlag, Berlin, 1986.

\bibitem{Gauss}
Carl~Friedrich Gauss.
\newblock {\em Disquisitiones Arithmeticae}.
\newblock Springer-Verlag, New York, 1986.
\newblock Translated into English by Arthur A. Clarke, revised by William C.
  Waterhouse.

\bibitem{Gauss-Latin}
Carolo~Friderico Gauss.
\newblock {\em Disquisitiones Arithmeticae}.
\newblock Gerh.\ Fleischer Jun., Lipsiae, 1801.
\newblock Electronic version of the original Latin text from Goettingen State
  and University Library.

\bibitem{Goedel-compl}
Kurt G{\"o}del.
\newblock The completeness of the axioms of the functional calculus of logic.
\newblock In van Heijenoort \cite{MR1890980}, pages 582--91.
\newblock First published 1930.

\bibitem{MR533669}
K.~R. Goodearl.
\newblock {\em von {N}eumann regular rings}, volume~4 of {\em Monographs and
  Studies in Mathematics}.
\newblock Pitman (Advanced Publishing Program), Boston, Mass., 1979.

\bibitem{MR0463157}
Robin Hartshorne.
\newblock {\em Algebraic geometry}.
\newblock Springer-Verlag, New York, 1977.
\newblock Graduate Texts in Mathematics, No. 52.

\bibitem{German}
Roe-Merrill~S. Heffner.
\newblock {\em Brief {G}erman Grammar}.
\newblock D. C. Heath and Company, Boston, 1931.

\bibitem{MR0033781}
Leon Henkin.
\newblock The completeness of the first-order functional calculus.
\newblock {\em J. Symbolic Logic}, 14:159--166, 1949.

\bibitem{MR0120156}
Leon Henkin.
\newblock On mathematical induction.
\newblock {\em Amer. Math. Monthly}, 67:323--338, 1960.

\bibitem{MR1396852}
Leon Henkin.
\newblock The discovery of my completeness proofs.
\newblock {\em Bull. Symbolic Logic}, 2(2):127--158, 1996.

\bibitem{MR94e:03002}
Wilfrid Hodges.
\newblock {\em Model theory}, volume~42 of {\em Encyclopedia of Mathematics and
  its Applications}.
\newblock Cambridge University Press, Cambridge, 1993.

\bibitem{Hodges-Building}
Wilfrid Hodges.
\newblock {\em Building models by games}.
\newblock Dover Publications, Mineola, New York, 2006.
\newblock Original publication, 1985.

\bibitem{MR0384548}
Paul~E. Howard.
\newblock {\L}o{\'s}' theorem and the {B}oolean prime ideal theorem imply the
  axiom of choice.
\newblock {\em Proc. Amer. Math. Soc.}, 49:426--428, 1975.

\bibitem{MR600654}
Thomas~W. Hungerford.
\newblock {\em Algebra}, volume~73 of {\em Graduate Texts in Mathematics}.
\newblock Springer-Verlag, New York, 1980.
\newblock Reprint of the 1974 original.

\bibitem{MR1500471}
Edward~V. Huntington.
\newblock Errata: ``{S}ets of independent postulates for the algebra of logic''
  [{T}rans.\ {A}mer.\ {M}ath.\ {S}oc. {\bf 5} (1904), no. 3, 288--309;
  1500675].
\newblock {\em Trans. Amer. Math. Soc.}, 5(4):552, 1904.

\bibitem{MR1500675}
Edward~V. Huntington.
\newblock Sets of independent postulates for the algebra of logic.
\newblock {\em Trans. Amer. Math. Soc.}, 5(3):288--309, 1904.

\bibitem{MR0039982}
J.~L. Kelley.
\newblock The {T}ychonoff product theorem implies the axiom of choice.
\newblock {\em Fund. Math.}, 37:75--76, 1950.

\bibitem{MR85e:03003}
Kenneth Kunen.
\newblock {\em Set theory}, volume 102 of {\em Studies in Logic and the
  Foundations of Mathematics}.
\newblock North-Holland Publishing Co., Amsterdam, 1983.
\newblock An introduction to independence proofs, Reprint of the 1980 original.

\bibitem{Kuratowski-Zorn}
Casimir Kuratowski.
\newblock Une m{\'e}thode d'{\'e}limination des nombres transfinis des
  raisonnements math{\'e}matiques.
\newblock {\em Fundamenta Mathematicae}, 3(1):76--108, 1922.

\bibitem{MR12:397m}
Edmund Landau.
\newblock {\em Foundations of Analysis. {T}he Arithmetic of Whole, Rational,
  Irrational and Complex Numbers}.
\newblock Chelsea Publishing Company, New York, N.Y., third edition, 1966.
\newblock Translated by F. Steinhardt; first edition 1951; first German
  publication, 1929.

\bibitem{Lang-alg}
Serge Lang.
\newblock {\em Algebra}.
\newblock Addison-Wesley, Reading, Massachusetts, third edition, 1993.
\newblock Reprinted with corrections, 1997.

\bibitem{MR0048795}
J.~{\L}o{\'s} and C.~Ryll-Nardzewski.
\newblock On the application of {T}ychonoff's theorem in mathematical proofs.
\newblock {\em Fund. Math.}, 38:233--237, 1951.

\bibitem{MR0065527}
J.~{\L}o{\'s} and C.~Ryll-Nardzewski.
\newblock Effectiveness of the representation theory for {B}oolean algebras.
\newblock {\em Fund. Math.}, 41:49--56, 1954.

\bibitem{MR0075156}
Jerzy {\L}o{\'s}.
\newblock Quelques remarques, th\'eor\`emes et probl\`emes sur les classes
  d\'efinissables d'alg\`ebres.
\newblock In {\em Mathematical interpretation of formal systems}, pages
  98--113. North-Holland Publishing Co., Amsterdam, 1955.

\bibitem{Lowenheim}
Leopold L\"owenheim.
\newblock On possibilities in the calculus of relatives.
\newblock In van Heijenoort \cite{MR1890980}, pages 228--251.
\newblock First published 1915.

\bibitem{MR1924282}
David Marker.
\newblock {\em Model theory: an introduction}, volume 217 of {\em Graduate
  Texts in Mathematics}.
\newblock Springer-Verlag, New York, 2002.

\bibitem{MR0010555}
Oystein Ore.
\newblock Galois connexions.
\newblock {\em Trans. Amer. Math. Soc.}, 55:493--513, 1944.

\bibitem{Peano}
Giuseppe Peano.
\newblock The principles of arithmetic, presented by a new method.
\newblock In van Heijenoort \cite{MR1890980}, pages 83--97.
\newblock First published 1889.

\bibitem{Post}
Emil~L. Post.
\newblock Introduction to a general theory of elementary propositions.
\newblock {\em Amer. J. Math.}, 43(3):163--185, July 1921.

\bibitem{MR798475}
Herman Rubin and Jean~E. Rubin.
\newblock {\em Equivalents of the axiom of choice. {II}}, volume 116 of {\em
  Studies in Logic and the Foundations of Mathematics}.
\newblock North-Holland Publishing Co., Amsterdam, 1985.

\bibitem{Russell-letter}
Bertrand Russell.
\newblock Letter to {F}rege.
\newblock In van Heijenoort \cite{MR1890980}, pages 124--5.
\newblock First published 1902.

\bibitem{MR2213624}
Eric Schechter.
\newblock Kelley's specialization of {T}ychonoff's theorem is equivalent to the
  {B}oolean prime ideal theorem.
\newblock {\em Fund. Math.}, 189(3):285--288, 2006.

\bibitem{Scott-1954}
Dana Scott.
\newblock Prime ideal theorems for rings, lattices, and boolean algebras.
\newblock {\em Bull. Amer. Math. Soc.}, 60(4):390, July 1954.
\newblock Preliminary report.

\bibitem{MR1809685}
Joseph~R. Shoenfield.
\newblock {\em Mathematical logic}.
\newblock Association for Symbolic Logic, Urbana, IL, 2001.
\newblock reprint of the 1973 second printing.

\bibitem{Skolem-some-remarks}
Thoralf Skolem.
\newblock Some remarks on axiomatized set theory.
\newblock In van Heijenoort \cite{MR1890980}, pages 290--301.
\newblock First published 1922.

\bibitem{Skolem-LS}
Thoralf Skolem.
\newblock Logico-combinatorial investigations in the satisfiability or
  provability of mathematical propositions: {A} simplified proof of a theorem
  by {L}. {L}{\"o}wenheim and generalizations of the theorem.
\newblock In van Heijenoort \cite{MR1890980}, pages 252--63.
\newblock First published 1920.

\bibitem{MR1501865}
M.~H. Stone.
\newblock The theory of representations for {B}oolean algebras.
\newblock {\em Trans. Amer. Math. Soc.}, 40(1):37--111, 1936.

\bibitem{MR1890980}
Jean van Heijenoort, editor.
\newblock {\em From {F}rege to {G}\"odel: {A} source book in mathematical
  logic, 1879--1931}.
\newblock Harvard University Press, Cambridge, MA, 2002.

\bibitem{von-Neumann-ax}
John von Neumann.
\newblock An axiomatization of set theory.
\newblock In van Heijenoort \cite{MR1890980}, pages 393--413.
\newblock First published 1925.

\bibitem{von-Neumann}
John von Neumann.
\newblock On the introduction of transfinite numbers.
\newblock In van Heijenoort \cite{MR1890980}, pages 346--354.
\newblock First published 1923.

\bibitem{PM}
Alfred~North Whitehead and Bertrand Russell.
\newblock {\em Principia Mathematica}, volume~I.
\newblock University Press, Cambridge, 1910.

\bibitem{MR0264581}
Stephen Willard.
\newblock {\em General topology}.
\newblock Addison-Wesley Publishing Co., Reading, Mass.--London--Don Mills,
  Ont., 1970.

\bibitem{Zermelo-invest}
Ernst Zermelo.
\newblock Investigations in the foundations of set theory {I}.
\newblock In van Heijenoort \cite{MR1890980}, pages 199--215.
\newblock First published 1908.

\bibitem{MR1563165}
Max Zorn.
\newblock A remark on method in transfinite algebra.
\newblock {\em Bull. Amer. Math. Soc.}, 41(10):667--670, 1935.

\end{thebibliography}

\end{document}
