% This started out as the notes used in 2005
%\documentclass[a4paper,openany]{book} % eps file does not appear
				% in draft!!! 
\documentclass[%
version=last,%
a5paper,%
open=any,%
cleardoublepage=empty,%
10pt,%
%headings=small,%
index=totoc,%
bibliography=totoc,%
reqno,%
draft=false,%
%draft,%
twoside,%
BCOR=2mm,%
DIV=18,%
headinclude=true,%
pagesize]%{scrreprt}
{scrbook}

\usepackage{scrpage2}
\pagestyle{scrheadings}
\ohead{\pagemark}
\ihead{\headmark}
\ofoot{}

\renewcommand{\captionformat}{\ \ }

\usepackage{pstricks,pst-plot}
\usepackage{hfoldsty}
\usepackage{url}
\usepackage{cclicenses}
\usepackage[polutonikogreek,english]{babel}
%\usepackage[oxonia]{psgreek}
\usepackage{amsmath,amssymb}
\usepackage{graphicx}  % For the photos
\usepackage[all]{xy}
%\usepackage[notref,notcite]{showkeys} % For use when writing

\usepackage{longtable}
\usepackage{multicol}
\usepackage[mathscr]{euscript}
\usepackage{amscd}     % commutative diagram

% For index

\usepackage{makeidx}
%\usepackage{showidx}  % For use when editing
\makeindex 
%\newcommand{\itech}[1]{\textsl{#1}\index{#1|textsl}}
%\newcommand{\idefn}[1]{\textbf{#1}\index{#1|textbf}}
\newcommand{\tindex}[1]{\index{#1|textsl}}
\newcommand{\dindex}[1]{\index{#1|textbf}}
\newcommand{\tindexsub}[2]{\index{#1!#2|textsl}}
\newcommand{\dindexsub}[2]{\index{#1!#2|textbf}}

% For symbol table

%\makeglossary % This command must be removed when the glossary is to
	      % be printed (during the editing stage)
%\newcommand{\glossaryentry}[2]{#1\quad #2\\ } % This is used in notes.glo
\newcommand{\glossaryentry}[2]{#1& #2\\ } % This is used in notes.glo

% In the tex file, the \glossary command, with one argument, puts that
% argument as the first argument of \glossaryentry (the second being a
% page number) in the notes.glo file. 


% At the end I took this file and
% made from it, by hand, the symbols.tex file.

\usepackage{upgreek}
\newcommand{\vnn}{\upomega}

\usepackage{bm}
\newcommand{\sv}[1]{\bm{#1}} % syntactical variable

\newcommand{\liff}{\Leftrightarrow}
\newcommand{\lto}{\Rightarrow}
\newcommand{\Enot}{\text{\sf not-}}
\usepackage{stmaryrd}
\renewcommand{\land}{\mathbin{\binampersand}}
\newcommand{\amp}{\;{}\mathbin{\&}{}\;}
\newcommand{\eor}{\nLeftrightarrow}

\renewcommand{\phi}{\varphi}

\newcommand{\Ey}{\mathcal A}
\newcommand{\Np}{\N^+}
\newcommand{\Qp}{\Q^+}
\newcommand{\Qpp}{\Q^{++}}
\newcommand{\Rp}{\R^+}
\newcommand{\olt}{\olessthan}
\newcommand{\pred}[1]{\operatorname{pred}(#1)}
\newcommand{\class}[1]{\mathbf{#1}}    % class
\newcommand{\Russell}{\class R}        % {x:x\notin x}

\newcommand{\on}{\class{ON}}           % the class of ordinals
\newcommand{\cn}{\class{CN}}           % the class of cardinals

\newcommand{\shstroke}{\mathrel{|}}    % Sheffer stroke
\newcommand{\psys}[1]{\mathcal{#1}}    % proof-system
\newcommand{\proves}[1][]{\vdash_{\psys{#1}}}
%\newcommand{\nproves}[1][]{\nvdash_{\psys{#1}}}
\newcommand{\fcom}{,\;}                % comma in list of formulas

\let\oldepsilon\epsilon
\renewcommand{\epsilon}{\varepsilon}
\newcommand{\abs}[1]{\lvert#1\rvert}


\newcommand{\inv}{^{-1}}                % mult. inverse

\newcommand{\divides}{\mathrel{|}}
\newcommand{\ndivides}{\mathrel{\nmid}}
\newcommand{\radix}{\sqrt{\rule{0ex}{1.5ex}}}
\renewcommand{\leq}{\leqslant}
\renewcommand{\geq}{\geqslant}


% Standard structures
\newcommand{\stnd}[1]{\mathbb{#1}}
\newcommand{\N}{\stnd{N}}         % natural numbers
\newcommand{\Z}{\stnd{Z}}         % integers
\newcommand{\Q}{\stnd{Q}}         % rationals
\newcommand{\R}{\stnd{R}}         % reals
\newcommand{\C}{\stnd{C}}         % complex numbers
\newcommand{\B}{\stnd{B}}         % the 2-element Boolean algebra


% Symbolic logic

\DeclareMathOperator{\arity}{arity}    %
\newcommand{\qsep}{\;}                 % follows a quantified variable
\newcommand{\Forall}[1]{\forall{#1}\qsep }
\newcommand{\Exists}[1]{\exists{#1}\qsep }
\newcommand{\existsunique}{\exists!}
\newcommand{\Existsunique}[1]{\exists!\,{#1}\qsep }
\newcommand{\Frall}[2]{(\forall#1\in#2)\qsep }
\newcommand{\Exsts}[2]{(\exists#1\in#2)\qsep }

\newcommand{\interpretation}{\mathscr I}% interpretation (part of a
    %structure)

\newcommand{\fv}[1]{\operatorname{fv}(#1)}  % set of free variables in #1
\newcommand{\Cn}[2][\lang]{\operatorname{Con}_{#1}(#2)} % logical consequences
\newcommand{\conseq}[1]{\operatorname{Con}(#1)} % logical consequences
\renewcommand{\models}{\vDash}   % for parallelism with the following:
\newcommand{\nmodels}{\nvDash}


\newcommand{\Fm}[2]{\operatorname{Fm}^{#1}(#2)} % #1-ary formulas

\newcommand{\str}[1]{\mathfrak{#1}}     % structure

%%%% SET THEORY

\newcommand{\universe}{\mathcal U}     

% set-theoretic operators:

% nullary:

\renewcommand{\emptyset}{\varnothing}  % [I like the other symbol better]

% unary:

\newcommand{\comp}{^{\mathrm{c}}}      % set-theoretic complement
\newcommand{\pow}[1]{\mathscr{P}(#1)}  % power set

% binary:
\renewcommand{\setminus}{\smallsetminus} % I don't like the standard symbol
\newcommand{\symdiff}{\vartriangle}    % symmetric difference

% [infinitary]:

\newcommand{\family}[1]{\mathcal{#1}}  % family (of sets)

% set-theoretic relations:

\newcommand{\included}{\subseteq}      % [the name suggests the meaning here]
\newcommand{\nincluded}{\not\subseteq} % not included
\newcommand{\pincluded}{\subset}       % proper inclusion    

% cardinality

\newcommand{\injects}{\preccurlyeq}    % injects in
\newcommand{\pinjects}{\prec}
\newcommand{\equip}{\approx}           % equipollent (is in bijection with)
\newcommand{\nequip}{\not\approx}      % not equipollent
\newcommand{\vscr}[1]{#1'}            % successor ordinal (v for von Neumann)
\newcommand{\size}[1]{\lvert#1\rvert}  % cardinality


% Model theory

\newcommand{\lang}{\mathcal{L}}        % a language or signature

% relations and functions

\newcommand{\conv}[1]{\breve{#1}}       % converse of a relation
\newcommand{\To}{\longrightarrow}
\DeclareMathOperator{\dom}{dom}        % domain of a function
\newcommand{\setim}[1][]{''{#1}} % image of set under preceding function 
\newcommand{\setimb}[1]{[#1]}    % image of set under preceding function 
\newcommand{\rest}[1]{\restriction{#1}}% restriction of function to #1
\newcommand{\id}{\mathsf{id}}          % identity-map
\newcommand{\mapset}[2]{{}^{#1}#2}     % set of functions from #1 to #2
\newcommand{\chf}[1]{\chi_{#1}}        % characteristic function
\newcommand{\coordproj}[2]{\pi^{#1}_{#2}} % coordinate projection,
				% omitting entry {#2} from an {#1}-tuple
\newcommand{\modsim}{/\mathord{\sim}}  % modulo the eq-ren \sim


% Number theory
\newcommand{\scr}[1]{#1^{+}}           % successor
\DeclareMathOperator{\lcm}{lcm}
\newcommand{\parity}[1]{\operatorname{p}(#1)}
\newcommand{\rem}[2]{\operatorname{rem}(#1,#2)} % remainder on
				% dividing #1 by #2



\usepackage{verbatim}  % for the comment environment

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\input{../../../../../TeX/format}

\usepackage[neverdecrease]{paralist}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\usepackage{amsthm}

\newtheorem{theorem}{Theorem}[section]
\newtheorem{axiom}[theorem]{Axiom}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{porism}[theorem]{Porism}

\theoremstyle{definition}

\newcommand{\raisebullet}{\mbox{}\\[-2.3\baselineskip]\mbox{}} % for
				% use with the following:

\newtheorem{example}[theorem]{Example}
\newtheorem*{example*}{Example}

\newtheorem{examples}[theorem]{Examples}

\theoremstyle{remark}

\newenvironment{exercise}%
  {\subsection*{Exercise}}%
  {}

\newenvironment{exercises}[1][]%
  {\subsection*{Exercises}{#1}\begin{enumerate}}%
  {\end{enumerate}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\full}{full}  % as in full truth-table; it used to be
			  % `proper'

\newcommand{\tuple}[1]{\vec{#1}\,}

\newcommand{\named}[1]{\widehat{#1}}  % truth-value of #1

% For the numbers chapter:

\newcommand{\axz}{Axiom~Z}
\newcommand{\axu}{Axiom~U}
\newcommand{\axi}{Axiom~I}

\newcounter{fnexample}
\newcommand{\nex}{\stepcounter{fnexample}\thefnexample}

\newcommand{\Tur}[1]{\textsf{#1}} % for words in Turkish
%\newcommand{\Eng}[1]{\uline{\textsf{#1}}} %for words in English
\newcommand{\Eng}[1]{\textsf{#1}} %for words in English
%\newcommand{\Gk}[1]{\begin{greektext}#1\end{greektext}}
\newcommand{\Gk}[1]{\selectlanguage{polutonikogreek}#1\selectlanguage{english}}
\newcommand{\Lat}[1]{\textsc{#1}} 
\newcommand{\letter}[1]{\textsf{#1}}  %for letters as such

\newcommand{\annot}[1]{\footnote{#1}}

\begin{document}

\title{%Introductory Notes\\ on the\\ 
Foundations\\
of\\
Mathematical Practice}
\author{David Pierce}
\date{\today}

\uppertitleback{\center{
This work is licensed under the\\
 Creative Commons Attribution--Noncommercial--Share-Alike
License.\\
 To view a copy of this license, visit\\
  \url{http://creativecommons.org/licenses/by-nc-sa/3.0/}\\
\mbox{}\\
\cc \ccby David Pierce \ccnc \ccsa\\
\mbox{}\\
Mathematics Department\\
Middle East Technical University\\
Ankara 06531 Turkey\\
\url{http://metu.edu.tr/~dpierce/}\\
\url{dpierce@metu.edu.tr}
}
}

  \maketitle


 \tableofcontents
\listoffigures


%\input{preface}

%\setcounter{chapter}{-1}
\addchap{Preface}
%[2006.01.16:  These notes are being edited, following their use in
%the first semester of 2005/6.]  

This book concerns the foundations\index{foundation} of mathematics in two
ways:
\begin{compactenum}
  \item
this book is about concepts and techniques that all mathematicians use,
implicitly or explicitly;
\item
this book (or parts of it) is intended for use in a first
university-level mathematics
course.
\end{compactenum}
More precisely, these notes are originally written for a course called
Fundamentals of Mathematics, given at Middle East Technical University
in Ankara
under the designation Math 111.\footnote{The catalogue description of
  Math 111 is: 
\begin{quote}
  Symbolic logic.  Set theory.  Cartesian product.  Relations.
  Functions.  Injective, surjective and bijective functions.
  Composition of functions.  Equipotent sets.  Countability of sets.
  More about relations:  equivalence relations, equivalence classes
  and partitions.  Quotient sets.  Order relations:  Partial order,
  Total order, Well ordering.   Mathematical induction and recursive
  definitions of functions.
\end{quote}}
  The notes also offer additional
reading for those interested in the topics they discuss.  In
particular, the notes may be useful for Math 320 (Set Theory) and Math 406
(Introduction to Mathematical Logic and Model Theory) at METU.

What are foundations?
A wooden house may by built on a stone foundation.  A mason lays down
the stones; then a carpenter erects the house on top.
The carpenter cannot construct the walls and floors of the house
before the stone-mason creates a place to set those floors and walls;
but the stone-mason cannot create this foundation without knowing what
the carpenter intends to place there.

So it is with the foundations of mathematics.  You cannot do
mathematics without a place to start; but you cannot create the starting-point
without knowing the mathematics that will proceed from it.
This is a paradox---a seeming contradiction.  It is not a
\emph{real}\index{real}
contradiction; but it does suggest that the nascent mathematician (the
first-year
student) cannot read this book page after page as if it
constituted an easy novel.  The book might considered as a difficult
novel with lots of interrelated events.  (However, not every novel has
an index or a list of symbols like this one.)  Not every
section of the book should be studied in sequence during the
reader's first encounter.  Even if an earlier section \emph{is}
required for a
later section, still, that earlier section may not be fully
comprehensible without some knowledge of the later section.

What can the reader do?  Read slowly, but jump ahead; reread what
you have already read; 
\emph{think} the whole time, but do not think too much without really
knowing what you are thinking about. Talk to classmates; talk to
teachers.  Read with a pencil.  Summarize passages in your own words.
Invent your own symbolism (while remembering that communicating with
others requires a common symbolism).
Read other books on the same subjects. 

Also: {do exercises.}  Create your own exercises.  Most sections of
the book end with exercises.  The student who is in a hurry will
find out from a teacher which exercises
to work on and
will then try to do them immediately, looking back into the sections
as necessary for examples.  A difficulty in this approach is that most
exercises here do not have unique correct \emph{answers;} they have
\emph{solutions,} some of which are better than others.  Finding the
best solutions---even acceptable solutions---will require
reading, thinking, and experience.  Still, many of the exercises can
be approached as puzzles\index{puzzle}: they do not need deep insight
into the nature of things, but aim only to develop facility with some
basic ideas.   

Most exercises here could not very well be cast as multiple-choice
questions.  In a multiple-choice question, if you can somehow figure
out the correct answer, even without being able to say how you did it,
your answer is still $100\%$ correct.  Here, correct solutions to
problems will carry \emph{within themselves} the reasons why they are
correct. 

There are no answers at the back of the book.  Problems here can have
more than one correct solution; \emph{you} should be able to tell
whether a particular solution is correct.  It is true that you may
fail to notice some mistakes; the only way to avoid this is
\emph{experience,} not desire or {will.}

Somebody who does not know a language very well will not
avoid mistakes just by trying hard: 
s/he\footnote{The
construction \Eng{s/he} can be pronounced as \Eng{she or he} (or as
\Eng{he or she}).  English has not evolved a generally accepted
singular pronoun that refers to humans of either sex: it lacks the
\Tur{o(n)} of Turkish.  In the
fourteenth century, according to the Oxford English Dictionary (OED) \cite{OED}, the
second-person plural pronoun \Eng{you} began to be used respectfully
in place of the singular pronoun \Eng{thou}, just as the Turkish
\Tur{siz} replaces \Tur{sen}.  In the same way, currently, some
people use \Eng{they} with a
singular sense.  Other people are bothered by this usage, and they may
insist that \Eng{he} can refer to humans of unknown
sex.  The original OED does not recognize this usage.  However, the OED does
claim that \Eng{she} comes from a different base from \Eng{he},
because the feminine form derived from the base of \Eng{he} was too
much like the masculine form.}
must \emph{practice.}
Likewise with games: even if you memorize all of the moves of chess
and think real hard, you will not play a good chess-game at first.
Depending on how seriously you take mathematics, you can see this
book as lessons in a language or a game.

It would be worthwhile for the reader to have a look at
Euclid's \emph{Elements}. 
(Heath's English translation from the Greek is \cite{MR17:814b}---see
the bibliography
at the end of the book.  This translation is available in print
and in various places around the Web.)  The present book does not share
much \emph{content} with the \emph{Elements;} but Euclid's work does
establish a sort of foundation or prototype for the mathematics of his
and all succeeding generations, including our own.

Euclid wrote the \emph{Elements}, the original
textbook of mathematics, some 2300 years ago.\footnote{Euclid practiced
  mathematics in 
  Alexandria around 300 \textsc{bce}, probably having learned
  mathematics in Athens from the students of Plato \cite[vol.~I,
    pp.~1~f.]{MR17:814b}.}  This textbook is still in use in some
classrooms today.  It consists of 13 books.  You are
not likely to read all of them; as with the present work, you will
jump around, reading what you are interested in, perhaps with the
guidance of a teacher.  Indeed,
perhaps Euclid expected few people to read his work unaided.
His work does bring the reader instantly into real mathematics;
but it also sets a standard for \emph{spareness} (terseness, economy)
of mathematical composition.  

The \emph{Elements} contains no commentary, no guidance
for the reader.  After a few
definitions and \textsl{axioms,}\tindex{axiom} the work
consists solely of \textsl{propositions}\tindex{proposition} and their
\textsl{proofs.}\tindex{proof}
Euclid
does not \emph{tell} you, but he \emph{shows} you what
proofs of propositions are.  


The present book contains
more than just propositions and their proofs; but it \emph{does}
contain these.  
Each proof here is labelled as such, and it ends with a
little \label{page:box} box.   (The first
example is on p.~\pageref{first-proof}.)
The propositions and proofs in the book consist of sentences of
ordinary language, with some abbreviative
symbolism (as well as the symbolism required by what the proofs are
\emph{about}). 
Such proofs might be called \emph{informal,} because ordinary language
is itself informal.  Grammatical rules for English or Turkish or any
other human language can indeed be formulated, and the conscientious
speaker or writer will try to follow them; but it seems
impossible to formulate grammatical rules that are obeyed by, and only
by, everything that one wants to say.  

Informal proofs are to be distinguished from
\textsl{formal proofs.}%
\tindexsub{formal}{--- proof}%
\tindexsub{proof}{formal ---}
Again, the notion of proof 
itself---\emph{informal} proof---is over two thousand years old; but the
notion of a \emph{formal} proof dates only from the
1920s.\footnote{Perhaps the invention can be attributed to 
  Hilbert \cite[\S07, n.~110]{MR18:631a}.}
This book \emph{tells} you, as well as shows you, what formal
proofs are.  Briefly described, a
formal proof is a list of sentences of an \emph{artificial}
language;\index{language!artificial ---}\index{artificial language}
but such a list must satisfy certain requirements.  The last sentence
on the list is
what the formal proof is a proof \emph{of}: it is what the proof
\emph{proves}.  A machine could check whether a
given list of sentences is a formal proof.  To establish the truth
of an interesting proposition, a formal proof is practically never
called for.  However, if it is held to the highest standard, an
informal proof of some proposition $P$ can be seen 
as an argument that a 
formal proof of $P$ could in principle be written.

It will be an exercise in this book to write some formal proofs; but
the ultimate goal is the ability to check the validity of
\emph{informal} proofs (like Euclid's, or any later mathematician's),
\emph{and} the ability to write one's own (informal) proofs.

I assume that you, the reader, have some experience with high-school
algebra, and specifically with the algebra of the
\textsl{integer}s\tindexsub{number}{integer}.
Then you can prove an identity\index{identity} like
\begin{equation*}\tag{$0.1$}
  x^3+y^3=(x+y)(x^2-xy+y^2)
\end{equation*}
(by multiplying out the right member and combining like terms).
The algebra of the integers serves as a pattern for \textsl{Boolean
  algebra,} which I shall 
introduce as the algebra of the numbers~$0$ and~$1$ alone.  If one
considers these numbers to represent \emph{falsity} and \emph{truth,}
then Boolean algebra determines an algebra of
\textsl{propositions,}
\tindex{proposition} or a
propositional \textsl{logic}%
\tindexsub{logic}{propositional ---}%
\tindexsub{proposition}{---al logic}. 

After we have propositional logic, we can say something about
\textsl{predicate} logic.%
\tindexsub{logic}{predicate ---}%
\tindexsub{predicate}{--- logic}
This logic provides for the analysis of propositions into parts, some
of which are \emph{not} propositions.  (Some parts of propositions
will be \textsl{predicates:}\tindex{predicate} hence the name of the logic.)
We cannot define everything precisely until 
we have the notion of a \textsl{relation.}%
\tindex{relation}  
Relations are certain
\textsl{sets;}\tindex{set} they are
\textsl{subsets}\tindexsub{sub}{---set}\tindexsub{set}{sub---} of
\textsl{Cartesian products}\tindex{Cartesian
  product}\tindexsub{product}{Cartesian ---} of  
sets.  So all of these things will need to be discussed.

A \textsl{function}%
\tindex{function} can be defined as a kind of relation.  Functions
give us a way to say when two sets `have the same size', or are
\textsl{equipollent}%
\tindex{equipollent, equipotent}
(or \textsl{equipotent}).  
The set of integers has the same size as the set of \emph{even}
integers; both sets are 
\textsl{countably infinite;}%
\tindexsub{infinite}{countably ---}%
\tindexsub{count}{---ably infinite}
but there are strictly \emph{larger},
\textsl{uncountable}%
\tindex{uncountable}%
\tindexsub{infinite}{uncountable} 
sets, such as the
set of 
\textsl{real numbers.}%
\tindexsub{real}{--- number}%
\tindexsub{number}{real ---}

The predicate logic given here is more precisely called \textsl{first-order}
predicate logic.\tindexsub{logic}{first-order
  ---}\tindexsub{first}{---{}-order logic}
Functions also allow us to give an account of \textsl{first-order
  logic} in general.

The integers have an \textsl{ordering.}\tindexsub{order}{---ing}  This
 is a kind of relation. 
 There is a generalization called a \textsl{partial
 ordering}\tindexsub{partial}{--- ordering}\tindexsub{order}{partial
 ---ing}.  We shall 
 prove a \textsl{representation theorem}\tindexsub{represent}{---ation
 theorem}\tindexsub{theorem}{representation ---}, namely the proposition
 that every partial ordering behaves like the subset-relation (in a
 clearly defined way).

Equality is also a relation and is the motivating example of an
\textsl{equivalence-relation}.
\tindexsub{equivalence}{---{}-relation}
\tindexsub{relation}{equivalence-{}---}
The standard sorts of
numbers---integers, rational numbers, real numbers, complex
numbers---can be formally defined in terms of equivalence-rela\-tions,
once one has the 
\textsl{natural numbers}%
\tindexsub{number}{natural ---}%
\tindex{natural number}
$0$, $1$, $2$, $3$, \dots.  The 
idea of this book is that we do not \emph{really} have these numbers,
mathematically, until we can give a logical account of them.  This
book ends with such an account.

The topics of this book are so interrelated that, in any discussion
of them, it is hard to avoid the appearance of circularity.   This
circularity is a part of the foundational aspect of the book.  As I say,
I assume that the reader is familiar with the integers; but I also say
that we do not
officially \emph{have} the integers until the end of the book.  Yet
my supposedly rigorous account of the integers depends on all of the
machinery that the book develops first, with the aid of a familiarity
with\dots the integers.

Our path will have been, not circular, but spiral or rather
\emph{helical}, as if along a winding staircase.  We start
from the integers, and then we return to them, but at a higher (or deeper)
level than where we started.

\section*{Typography}\label{sect:typography}

These printed words are assembled by means of the collection of
typesetting programs and packages known as \AmS-\LaTeX.  Here,
\TeX\ is in Greek letters\footnote{See \cite[p.~1]{Knuth}.}; the same
three letters will appear below, in \S~\ref{sect:logic}, in
the full Greek name of logic.  In the Latin alphabet, 
the letters are written \Lat{tech}, as in \Eng{technical}.  The \AmS\
is the American Mathematical Society.  The original \TeX\ program was
expanded 
into \LaTeX\ and independently into \AmS-\TeX; then the benefits of
both expansions were combined into \AmS-\LaTeX.

The original \TeX\ program
distinguishes between ordinary text and mathematical text.  In
ordinary text, in this book, words are \emph{italicized} for the
usual sorts of
reasons: they (or their meanings) are being emphasized, they
are titles, they are not in the language of the
surrounding text, and so forth.  I am also making some further
distinctions. Technical terms are in \textbf{bold-face} when they are
being defined, explicitly or implicitly.  Technical terms might simply be
\textsl{slanted} if their precise definitions will come later or are
simply not needed.
Words that are being \emph{talked about} or \emph{mentioned} (and not
simply being
\emph{used})\footnote{The distinction between the 
\textsl{use}%
\tindex{use}
 and the
\textsl{mention}%
\tindex{mention}
of a word (or symbol) is attributed to Quine in \cite[\S~08,
    p.~61]{MR18:631a}.  The sentence `A woman or a man is a
  human' uses the word \Eng{woman}.  The sentence `The English word
  for \Tur{kad\i n} also has five letters' mentions the word
  \Eng{woman} without using it.  The sentence `\Eng{Woman} has five
  letters' uses the word \Eng{woman} to mention the same word.  Such a
  use can be called \textbf{autonymous,} following Carnap: again,
  the attribution is in \cite[\S~08, p.~61]{MR18:631a}, where it is
  said that Frege introduced the practice of indicating autonymous use
  of words by quotation-marks (inverted commas).  By this practice,
  the last quoted sentence would be ``\,`Woman' has five letters.''}
are in \Eng{sanserif}.
However, I may not have always been consistent in making these
distinctions. 

Footnotes here are intended to contain only material that is not
essential to the main point.  They may contain historical information
that I have happened to discover, although much of the history of what
I am discussing is still unknown to me.  
%Footnotes may also contain forward references.

Labelled proofs here end with boxes, as noted above; labelled
examples end with bullets (the first is on p.~\pageref{first-example}).

\section*{Acknowledgments}

The contents of this book have appeared in several editions:

\begin{asparaenum}[1.]
  \item
Some of the material on logic and numbers
was first prepared by me in 2001; at the same time, Andreas Tiefenbach
prepared notes on sets and relations.  Andreas, Belgin Korkmaz, and I
taught Math 111 from those notes.  
\item
Andreas and I revised our respective
notes, with Belgin's advice,
the following year.  
\item
In 2004, in preparing to teach Math 111 that fall
along with Ay\c se 
Berkman and Mahmut Kuzucuo\u glu, I composed my own complete set of notes,
drawing on Andreas's notes in giving my own account of sets and
relations.  
\item
After that semester,
I revised the notes, keeping in mind the experience of Ay\c se, Mahmut
and myself, along with impressions from students.  
Advice from my friend Steve Thomas was also useful for this
revision.  The notes were used next year, in the fall semester of
2005/6, when I taught Math 111 with Belgin Korkmaz and Turgut
\"Onder.  
\item
This new revision is based on that experience.  However, there have been many changes, and this book must still be considered as a rough draft, a work in progress.
\end{asparaenum}

Many of the topics dealt with in this book are also covered by basic texts
like \cite{Epp} or \cite{01461249}.  I am not trying to write such a
book as these are, but I find it useful to look at them.  The preface of
\cite{01461249} is particularly reassuring, as it describes the many
changes that the authors have made in each new edition of their book.

My own notes on logic draw
from various sources, especially \cite{MR18:631a} and \cite{Burris};
Ali Nesin's book~\cite{Nesin} is an account in Turkish of some of the
same material.

Set-theory on the level of my coverage seems generally to be found
only in more advanced texts like
\cite{MR0349389} or \cite{MR1924429}.  I use these books, but try to
give more elementary exercises than they do.

For my notes on natural numbers, \cite{MR12:397m} is inspirational. 

As a student, I appreciated the style of Spivak \cite{0458.26001}: not
condescending, but treating the reader as a fellow mathematician.

\chapter*{}


\begin{quote}
  \begin{center}
      \textsc{Open Your Own Treasure House}
  \end{center}
Daiju visited the master Baso in China.  Baso asked: ``What do you
  seek?''

``Enlightenment,'' replied Daiju.

``You have your own treasure house.  Why do you search outside?''
  Baso asked.

Daiju inquired: ``Where is my treasure house?''

Baso answered: ``What you are asking is your treasure house.''

Daiju was enlightened!  Ever after he urged his friends: ``Open your
  own treasure house and use those treasures.''

  \begin{flushright}
Paul Reps and Nyogen Senzaki\\
`101 Zen Stories'\\
\emph{Zen Flesh, Zen Bones}\\
\cite[p.~55]{ZFZB}
  \end{flushright}
\end{quote}



%\input{chapter-preliminaries.tex}

\chapter{Introduction}\label{ch:prelim}


%\renewcommand{\dictumwidth}{0.667\textwidth}
%\dictum{
%}


\setcounter{section}{-1}
\section{Logic}\label{sect:logic}

The name of \textbf{logic}\dindex{logic} comes ultimately from the (ancient) Greek adjective
\Gk{logik'h}, which is short for \Gk{<h logik`h t'eqnh}.  This phrase
can be rendered in English as
\Eng{the rational art}, or \Eng{the art of reason}.  I shall not
attempt to define \Eng{reason}.
In Latin letters, the Greek phrase is \Eng{h\=e logik\=e techn\=e}.  But
knowing the Greek alphabet is worthwhile, if only because  
mathematicians use it as a source of symbols.  See Figure
\ref{fig:Greek} below.

Logic as a field of study can be counted as a part of
\textsl{philosophy.}%
\tindex{philosophy}
One can do logic with ordinary language
alone. Aristotle (384--322 \textsc{bce}
\cite[pp.~vii--ix]{Aristotle-XVII})
is classically considered the originator of logic, and his texts are
in ordinary Greek, albeit with some use of (Greek) letters to stand
for parts of sentences.  I shall take him as a source for some
fundamental ideas: see \S\S~\ref{sect:language}
and~\ref{sect:p-formulas}, as well as
Appendix \ref{Aristotle}.

%\subsection{Symbolic logic}
%\label{subs:Boole}

\textbf{Symbolic logic}\dindex{symbolic logic}\dindexsub{logic}{symbolic
  ---} consciously
develops a special notation for the notions that 
logic examines.  Some two thousand years after Aristotle, George Boole
describes the process at the beginning
of \emph{The Laws of Thought} \cite[\textbf{[1]}, p.~1]{Boole}, first
published in 1854:
\begin{quote}
  The design of the following treatise is to investigate the
  fundamental laws of those operations of the mind by which reasoning
  is performed; to give expression to them in the symbolic language of
  a Calculus,\footnote{This is calculus in the sense of a method of
  calculating; it has little to do with the
\textsl{infinitesimal}%
\tindexsub{calculus}{infinitesimal ---}%
\tindexsub{infinite}{---simal calculus}
  calculus, which is
  the subject now called just calculus.  What this book refers to as
  propositional logic can also be called
  \textsl{propositional
  calculus}\tindexsub{calculus}{propositional
  ---}\tindexsub{proposition}{---al calculus}.}
and upon this foundation to establish the science of
  Logic and construct its method; to make the method itself the basis
  of a general method for the application of the mathematical doctrine
  of Probabilities; and, finally, to collect from the various elements
  of truth brought to view in the course of these inquiries some
  probable intimations concerning the nature and constitution of the
  human mind.
\end{quote}
Boole's project is grander than mine.  My interest here is almost
entirely \emph{mathematical}.  The introduction of symbolism
to logic allows logical notions to be examined as if they were numbers
or geometric figures.  In short, symbolism makes \textbf{mathematical
  logic}\dindex{mathematical logic}\dindexsub{logic}{mathematical ---}
possible.  This, then---\emph{mathematical} logic---is one subject
of this book. 

\begin{figure}[t!]
  \begin{center}
      \begin{tabular}{| c  l | c  l | c l | c l |} \hline
\Gk{A a} & \textbf alpha  & \Gk{H h} & \textbf{\=e}ta   & \Gk{N n} & \textbf nu
& \Gk{T t} & \textbf tau \\ 
\Gk{B b} & \textbf beta   & \Gk{J j} & \textbf{th}eta & \Gk{X x} & \textbf xi
& \Gk{U u} & \textbf upsilon \\ 
\Gk{G g} & \textbf gamma  & \Gk{I i} & \textbf iota  & \Gk{O o} & \textbf
omicron& \Gk{F f} & \textbf{ph}i\\ 
\Gk{D d} & \textbf delta  & \Gk{K k} & \textbf kappa & \Gk{P p} & \textbf pi
& \Gk{Q q} & \textbf{ch}i\\ 
\Gk{E e} & \textbf epsilon& \Gk{L l} & \textbf lambda& \Gk{R r} & \textbf{r}ho
& \Gk{Y y} & \textbf {ps}i\\ 
\Gk{Z z} & \textbf zeta   & \Gk{M m} & \textbf mu    & \Gk{S s,c} & \textbf
sigma  & \Gk{W w} & \textbf{\=o}mega\\ \hline 
  \end{tabular}
  \end{center}
\caption[The Greek alphabet]{The Greek alphabet.
Mathematicians use (some of) these letters all the time.
In this table, the first letter or two of the (Latin) name for a Greek
letter provides a transliteration
for that letter.  In texts, the rough-breathing mark (\Gk{<}) over
an initial
vowel (or \Gk r) is transcribed as a preceeding (or following) \letter
h; the smooth-breathing mark~(\Gk{>}) and the three tonal accents
%(\Gk{'{\ }}, \Gk{~}, \Gk{`{\ }}) 
(\Gk{'a}, \Gk{~a}, \Gk{`a}) 
can be
ignored.}
\label{fig:Greek}
\end{figure}

Section~\ref{sect:language} of the book makes a preliminary
approach to the notion 
of a proposition, introducing the terminology of
\textsl{axioms}%
\tindex{axiom} and 
\textsl{theorems.}%
\tindex{theorem} 
Section~\ref{sect:sets} introduces the basic terminology of
\textsl{sets}%
\tindex{set} and 
\textsl{natural numbers;}%
\tindex{natural number}%
\tindexsub{number}{natural ---} 
some of this terminology is used in the review of
arithmetic in \S~\ref{algebra}.  Arithmetic will be
familiar to everybody from school; the main purpose here is to develop a
point of view, a way of looking at mathematics, which we shall then
apply to logic.  Also, the notion of 
\textsl{arithmetic term}%
\tindexsub{arithmetic}{--- term}%
\tindexsub{term}{arithmetic ---}
introduced 
in \S~\ref{algebra} will provide an example of a 
\textsl{proof by induction.}%
\tindexsub{proof}{--- by induction}%
\tindexsub{induction}{proof by ---}
Finally, arithmetic is the setting for some 
ancient mathematical proofs; these are given as examples in
\S~\ref{proofs}.  Further investigation into these examples is in
\S~\ref{sect:anthyphaeresis}.
In \S\S~\ref{sect:parity} and~\ref{sect:connectives}, some operations
on the set $\{0,1\}$ are introduced 
by means of, and by analogy with, the usual arithmetic operations.
What these correspond to in ordinary language is discussed in
\S~\ref{sect:p-formulas}; further logical analysis of language is in
\S~\ref{sect:quantifiers}.

The operations on $\{0,1\}$ are essential to the study of mathematical
logic as such, which begins in Chapter~\ref{ch:logic}.

\subsection*{Exercise}

  Memorize the Greek alphabet.


\section{Language and propositions}\label{sect:language}

We are using language right now.  We divide up language into
\textbf{sentences.}\dindex{sentence}
Some sentences, but not all, can be described as
\textbf{true}%
\dindex{true}
or
\textbf{false.}%
\dindex{false}
At least, some
sentences are true or false when placed
in a \textsl{situation}%
\tindex{situation}
 or 
\textsl{context.}%
\tindex{context}  Let us refer 
to such sentences as 
\textbf{statements}%
\dindex{statement} or
\textbf{propositions.}%
\dindex{proposition}\footnote{We could make a
  distinction here:
we could let a \textsl{statement} be a bit of language of a certain
grammatical form, letting a \textsl{proposition} be the
\textsl{meaning}%
\tindex{meaning}
 of a statement.  See \cite[p.~26]{MR18:631a}.  I am
\emph{not} going to try to make such a distinction.}  For
example, the sentence 
\begin{center}
  \Eng{I went to Van last year}
\end{center}
is a statement (or a proposition).  Whether it is true or false
depends on who says it
and when: the speaker and the time would be the \emph{context} in
which the sentence is true or false.\footnote{The context can also
  include the listener, as when the sentence is \Eng{You went to Van
    last year.}}

We shall mainly be interested in \emph{mathematical} propositions.
Such propositions are timeless and personless:
their truth or falsity does not change with time
 or with the person who utters them.  Still, in \S~\ref{sect:1st}, we
 shall see a way in which, strictly, a mathematical proposition must
 still be placed in a context in order to become true or false.

The \emph{belief} that a mathematical proposition is true or false may
  change with time.  Certain mathematical propositions have been
  accepted as true for many years, only to be found false.  For
  example, Proposition I.16 of Euclid's \emph{Elements} can be called
  false, even in its
  context, since its proof relies on unstated assumptions that do
  \emph{not} follow from the stated assumptions; but this
  falsehood was not 
  recognized\footnote{See Heath \cite[vol.~1, 
  p.~280]{MR17:814b} for some commentary.} until
  the nineteenth century.  However, the philosopher
 R. G. Collingwood writes in his autobiography
  \cite[pp.~31--33]{Collingwood-Auto}: 
  \begin{quote}
    [Y]ou cannot find out what a man means by simply studying his
    spoken or written statements, even though he has spoken or written
    with perfect command of language and perfect truthful intention.
    In order to find out his meaning you must also know what the
    question was (a question in his own mind, and presumed by him to
    be in yours) to which the thing he has said or written was meant
    as an answer\dots If the meaning of a proposition is relative to
    the question it answers, its truth must be relative to the same
    thing.
  \end{quote}
If we \emph{translate} Euclid's work into the kind of formal proofs that
will be developed in this book, then indeed we shall find errors or
gaps in the
proofs.  Euclid himself was not writing formal proofs; there was no notion
of such a thing for over two thousand years.  However, Euclid \emph{was} doing
mathematics, and correctly, I would say; but this is for you to
judge, \emph{after} reading Euclid himself and understanding his
purpose---after understanding the questions he was answering.

Euclid's work begins with five propositions that we call \textsl{axioms}
or \textsl{postulates}.  (He, apparently \cite[p.~442]{MR13:419a},
called them \Gk{a>it'hmata}, that is, requests, demands, or assumptions.)  An
\textbf{axiom}\dindex{axiom}\dindexsub{proposition}{axiom} is usually a proposition
that satisfies two criteria:
\begin{compactenum}
  \item
it is \textsl{self-evident;}
\item
it is useful for proving other propositions.  
\end{compactenum}
In common usage, the first criterion is probably more important; in
mathematical usage, the second.

A \textbf{self-evident}%
\dindexsub{self}{---{}-evident}%
\dindexsub{proposition}{self-evident ---}
 proposition is self-evidently
\emph{true:} that is, obviously true without any need of appeal to
some other authority.  A classical use of the compound word
\Eng{self-evident} is found in a certain revolutionary
manifesto\footnote{Namely, the Declaration of Independence of the
  United States of America, written in 1776 by Thomas Jefferson, who,
  with other signers of the document, possessed other human beings as
  slaves.  In 1945, Vietnamese revolutionaries led by Ho Chi Minh
  issued a Declaration of Independence that enunciated some of the
  truths of the American declaration \cite[ch.~18]{Zinn}; this did not
  prevent a later American invasion.}
of the eighteenth century.  I transcribe from \cite[p.~15]{Heffner},
with my own formatting:
\begin{quote}
  We hold these truths to be self-evident,
  \begin{compactenum}[1)]
    \item
that all men are created equal,
\item
that they are endowed by their Creator with certain unalienable
rights,
\item
that among these are life, liberty and the pursuit of happiness.
\item
That to secure these rights, governments are instituted among men,
deriving their just powers from the consent of the governed.
\item
That whenever any form of government becomes destructive of these
ends, it is the right of the people to alter or abolish it, and to
institute new government, laying its foundation\index{foundation} on
such principles and
organizing its powers in such form, as to them shall seem most likely
to effect their safety and happiness.
  \end{compactenum}
\end{quote}
Two thousand years earlier, before Euclid even, in the collection of
books now
known as the \emph{Metaphysics} \cite{Aristotle-XVII}, Aristotle writes of
axioms, using the Greek
source of our word \Eng{axiom}, namely \Gk{>axi'wma}.  This word has the
root meaning of \emph{something worthy,} or an \emph{honor.}
Aristotle seems to use
\Eng{axiom} almost as a synonym of \Eng{principle} (\Gk{>arq'h}) or
\Eng{common notion} (\Gk{koin`h d'oxa}).
His writing is elliptical, in the style of
lecture-notes---which is probably just what his works are
\cite[pp.~xxv \& xxxi]{Aristotle-XVII}; I
translate accordingly below, with seemingly missing words supplied in
brackets.  (Some of the original Greek words in parentheses are the
sources of modern technical terms.)

In Book \Gk{B} (that is, Book Beta, also called Book III) of the
\emph{Metaphysics}, 
Aristotle introduces some questions:
\begin{quote}
  [996 b 26]  
Yet indeed, concerning the demonstrative (\Gk{>apodeiktik'oc}) principles,
  whether they belong to one science (\Gk{>epist'hmh}) or more
  (\Gk{plei'wn}) is debatable.  I call \Eng{demonstrative} the common
  notions from which everybody proves (\Gk{de'iknumi}) [propositions],
  for example, \Eng{it is necessary to affirm or deny
  everything,\footnote{\Gk{p~an >anagka~ion <`h f'anai <`h
  >apof'anai}.}} or \Eng{it is
  impossible to be and not be at the same time,\footnote{\Gk{>ad'unaton
  <'ama e>~inai ka`i m`h e>~inai}.}} and however many other
  such premisses (\Gk{prot'asic}).
\end{quote}
Aristotle's examples of common notions are called the Law of
 the 
\textsl{Excluded Middle}%
\tindexsub{law}{L--- of the Excluded Middle}%
\tindexsub{excluded}{Law of the E--- Middle}%
\tindexsub{middle}{Law of the Excluded M---}
and the Law of 
 \textsl{Contradiction;}%
\tindexsub{law}{L--- of Contradiction}%
\tindexsub{contradiction}{Law of C---}
 they  
 are discussed further in Book \Gk{G} (IV).  That book
opens with a statement of the general subject, which we call
\textbf{metaphysics,}%
\dindex{metaphysics} 
but Aristotle called
\textbf{first philosophy:}%
\dindexsub{first}{--- philosophy}%
\dindexsub{philosophy}{first ---, metaphysics}%
\label{look-at}  
\begin{quote}
  [1003 a 20] There is a science (\Gk{>epist'hmh})
  that looks at (\Gk{jewr'ew}) being as such
  (\Gk{t`o >`on <~h| >'on}) 
and what applies to it (\Gk{t`a
  to'utw| <up'arqonta}) according to itself (\Gk{kaj''
  a<ut'o}).\footnote{The whole Greek sentence, as given in
  \cite{Aristotle-XVII}, is \Gk{>'Estin
  >epist'hmh tis <'h jewre~i t`o >`on
  <~h| >'on ka`i t`a to'utw| <up'arqonta kaj'' a<ut'o.}  The
  Greek \Gk{>'on} (stem \Gk{>'ont-})
  is the neuter participle corresponding to the Engish \Eng{being} and
  the Turkish \Tur{olan}; it appears in modern technical terms like
  \Eng{ontology}.  The feminine stem of the participle is \Gk{o>'us-};
  from this is derived the abstract noun \Gk{o>us'ia}, which I
  translate below as \Eng{beingness}, although a traditional (and
  misleading) translation is \Eng{substance}.}
\end{quote}
A Turkish version of this passage, from \cite{Metafizik}, is
\begin{quote}
  Varl\i k olmak bak\i m\i ndan varli\u g\i\ ve
  ona \"oz\"u gere\u gi ait olan ana nitelikleri inceleyen bir bilim
  vard\i r.
\end{quote}
Later in Book \Gk{G}, in ch.~3, Aristotle takes up axioms; but he
understands them as something more general than the subject of
a particular field like mathematics or physics.  First he seems to
repeat the question raised in Book~\Gk{B}:
\begin{quotation}
[1005 a 19] It must be said whether [the inquiry] concerning the
so-called axioms (\Gk{>axi'wmata}) of mathematics, and concerning
beingness (\Gk{<h o>us'ia}), belongs to one science (\Gk{>epist'hmh})
or another (\Gk{<et'era}).

It is evident (\Gk{faner'on}) that the inquiry (\Gk{skey'ic})
concerning these belongs to one [science], namely that of the
philosopher (\Gk{filos'ofoc}).

For, [the axioms] apply to all beings, not just to some particular
class (\Gk{g'enoc}) apart from the others.

Also, all [scientists] use [the axioms]---because they are of being as
such---while each class [has] being.

To whatever extent is appropriate for them, to that extent they use
[the axioms]---that is, to the extent of the class concerning which
they carry out their proofs (\Gk{>apode'ixeic}).

So, because it is clear (\Gk{dhl'on}) that [the axioms] apply to all
things as beings---for this [namely, being] is common to them---the
theory (\Gk{jewr'ia}) concerning them belongs to those who are gaining
knowledge (\Gk{gnwr'izontoi}) concerning being as such.

Therefore, none of those making particular investigations (\Gk{o<i
  kat`a m'eros >episkopo'untoi}) tries to say something concerning
  them, wheth\-er [they] are true or not---not the geometer
  (\Gk{gewm'etrhs}), not the arithmetician (\Gk{>arijmhtik'oc}).

But some of the physicists (\Gk{fusiko~i})\footnote{Aristotle's
  `physicists' are pre-Socratic philosophers such as Thales of
  Miletus; they are discussed in Book \Gk{A} of the \emph{Metaphysics}.}
 [were] doing this
appropriately (\Gk{e>ik'otwc}).

For, they thought they alone were doing research (\Gk{skop'ew})
concerning all nature (\Gk{<h f'usic}) and concerning being.

But since there is somebody even higher (\Gk{>anwt'erw}) than the
physi\-cist---for
nature is [just] some one class of being---the inquiry concerning
these would
belong to the theoretician (\Gk{jewrhtik'oc}) of generality
(\Gk{kaj'olou}) and first [or primary] beingness (\Gk{<h pr'wth o>us'ia}).

Physics (\Gk{<h fusik'h}) is a kind of wisdom (\Gk{sof'ia}), but not
the first [or foremost] (\Gk{pr'wth}).
\end{quotation}
Presently we come to what were called common notions in Book \Gk B,
then axioms (in Book \Gk G), and now principles:
\begin{quote}
[1005 b 8] It is proper for the one who knows best each class [of
  things] to be
able to state the most certain principles (\Gk{>arqa'i}) of the thing
(\Gk{pr'agma}):

So that the one [who knows best] being as such [can
  state] the most certain [principles] of all [things].  This is the
  philosopher. 

The most certain principle of all is that about which being mistaken is
impossible.
\end{quote}
This principle is the 
\textbf{Law of Contradiction,}%
\dindexsub{contradiction}{Law of C---}%
\dindexsub{law}{L--- of Contradiction} 
which Aristotle now states more precisely than in Book \Gk B:
    \begin{quote}
[1005 b 19]   For the same [\textsl{predicate}%
\tindex{predicate}] to apply and not apply at
the same time to
the same [\textsl{subject}%
\tindex{subject}] in the same [respect] is
impossible.\footnote{\Gk{t`o g`ar a>ut`o <'ama <up'arqein te ka`i m`h
    <up'arqein >ad'unaton t>~w|  a>ut>~w| ka`i kat`a t`o
    a>ut'o.}} 
\end{quote}
The grammatical\index{grammar} notions of subject and predicate are
discussed briefly in \S~\ref{sect:sets} below; there also the Law of Contradiction will be put to use.  Meanwhile,
a Turkish rendition of Aristotle's formulation of the Law of
Contradiction, again from \cite{Metafizik}, is
\begin{quote}
  Ayn\i\ niteli\u gin, ayn\i\ zamanda, ayn\i\ \"ozneye, ayn\i\ bak\i
  m\i ndan hem ait olmas\i, hem de olmamas\i\ imk\^ans\i zd\i r.
\end{quote}
After a long discussion of the Law of Contradiction and those who
question it, Aristotle gives the 
\textbf{Law of the Excluded Middle,}%
\dindexsub{law}{L--- of the Excluded Middle}%
\dindexsub{excluded}{Law of the E--- Middle}%
\dindexsub{middle}{Law of the Excluded M---} 
again with slightly different wording from Book~\Gk B:
\begin{quote}
  [1011 b 23] Neither does [any]thing admit to being between a contradiction, but
  it is necessary either to affirm or deny one of one,
  whatsoever.\footnote{\Gk{>All`a m`hn o>ud`e metax`u >antif'asewc
  >end'eqetai e>~inai o>uj'en, >all'' >an'agkh >`h f'anai
  >`h >apof'anai <`en kaj'' <en`oc <otio\^un.}}

\"Ote yandan \c celi\c sik \"onermeler aras\i nda bir arac\i n\i n
olmas\i\ da imk\^an\-s\i zd\i r.  Bir \"ozne hakk\i nda tek bir
y\"uklemi---hangi y\"uklem olursa ol\-sun---, zorunlu olarak ya tasdik
etmek veya ink\^ar etmek gerekir.
\end{quote}
In other words, a proposition is true or false; there is no third alternative.
A \textbf{contradiction}\index{contradiction} [\Gk{>antif'asis}] for
Aristotle is evidently a pair of propositions, one affirming
something, the other denying the same thing. 
The continuation of this passage is in \S~\ref{sect:p-formulas} below.
If we follow Aristotle, it seems that, as mathematicians, we need not
concern ourselves with the Laws of Contradiction and the Excluded
Middle; we can just 
accept these principles and use them; it is the philosopher's job to
identify and enunciate them.  But the logician is also a philosopher.
In any case, we shall use these principles
explicitly in the 
next section; but we shall also see an apparent violation of one of
them.  There
we shall also begin to state axioms in our mathematical sense.

A \textbf{theorem}\dindex{theorem} today is usually considered just as a 
\emph{noteworthy} proposition with a proof from axioms.  The first
example is Theorem~\ref{thm:Russell} in the next section.
The word \Eng{theorem} itself comes from the Greek \Gk{je'wrhma}, and
it is related to the verb with the meaning of \Eng{look at}. (This
verb is found at the beginning of Book~\Gk{G} of the
\emph{Metaphysics} as quoted above on p.~\pageref{look-at}.) 
In former 
  times, finer distinctions were considered.  A few centuries after
  Aristotle, Pappus of
  Alexandria\footnote{Pappus may 
    have been born during the reign of
  Theodosius I, 379--395 \textsc{bce}, or he may have flourished
  earlier, during the reign of Diocletian, 284--305 \textsc{bce}.  The
  possibilities are discussed in
  \cite[pp.~564--567]{MR13:419b}, where also are found the text and
  translation from which the quotation is adapted.} writes:
  \begin{quote}
    Those who favor a more technical terminology in geometrical
    research use \textbf{problem}\dindex{problem} (\Gk{pr'oblhma}) to
    mean a [proposition\footnote{Ivor Thomas \cite[p.~567]{MR13:419b}
    uses \Eng{inquiry} here;
    but there is \emph{no} word in the Greek original corresponding to
    this or \Eng{proposition}.}] in which it is proposed to do or construct
    [something]; and 
\textbf{theorem,}%
\dindex{theorem}
 a [proposition] in which the
    consequences and necessary implications of certain hypotheses are
    investigated; but among the ancients some described them all as
    problems, some as theorems.
  \end{quote}
A \textbf{lemma}\dindex{lemma} is a
proposition proved mainly for the sake of proving other
propositions; the first example will be Lemma~\ref{lem:even-odd}.  (The
  Greek \Gk{l'emma} means that which is peeled off, and is from the
  verb, \Gk{l'epw}, with the meaning of \Eng{peel}.)   A
  \textbf{corollary}\dindex{corollary} to a theorem is a proposition that follows almost
  immediately from the theorem; the first example will be
  Corollary~\ref{cor:root-2}.  (The word derives from the Latin
  \Lat{corollarivm,} which is the neuter form of the adjective derived from
  \Lat{corolla;} this means, among other things, a wreath of flowers
  \cite{OED}.)  


\section{Classes, sets, and numbers}\label{sect:sets}

In Chapter~\ref{ch:sets}, we shall have a lot to say
about \textsl{sets;} but it will be useful to have the basic notion
available from the beginning.

A \textsl{set}%
\tindex{set} is many things, made into one.  There are many special
cases of sets:  Two matching earrings make a \emph{pair;} several
football-players make a 
\emph{team;} the pigeons descending on bread-crumbs in the park make a
\emph{flock.}  Words like 
\Eng{pair}, \Eng{team} and \Eng{flock} are 
\textbf{collective nouns.}%
\dindex{collective noun}%
\dindexsub{noun}{collective ---}%
\dindexsub{word}{noun}
In mathematics, the word \Eng{set} is the most general
collective noun---except for the word
\Eng{class}.\footnote{Levy~\cite{MR1924429} seems to use
  \Eng{collection} more generally even 
  than \Eng{class}.}

In the previous section, I translated Aristotle's word \Gk{g'enoc} as
\Eng{class}, but that does not mean that our understanding
will be the same as Aristotle's.
For us, every set is a {class}, but not every class is a set.
Classes and sets are made up of \textbf{elements}\dindex{element} or
\textbf{members.}%
\dindexsub{member}{--- of a class} 
In
the context 
of classes, there is no mathematical difference 
between the words \Eng{element} and \Eng{member}.
(However, 
 in an equation,
  such as~\eqref{eqn:A=B} below,
the 
  expressions on either side of the 
\textsl{sign of equality,}%
\tindex{sign of equality}%
\tindexsub{equality}{sign of ---} 
or $=$\glossary{$=$},
  can be called the 
\textbf{members}%
\dindexsub{member}{--- of an equation}%
\dindexsub{equation}{member of an ---}
of the equation.) 

A 
\textbf{class}%
\dindex{class} 
is determined by a
\textbf{property.}%
\dindexsub{proper}{---ty}  
The property 
\textbf{defines}%
\dindex{defines} 
the class.  I do not attempt to define
\Eng{property;} I shall just say that, for every
property, there is a class whose members are precisely the things that
have that property.  This does not mean that a class is necessarily a
thing that can itself be a member of classes.  Indeed, if we assume that every class \emph{can} be a member of classes, then we can derive a
contradiction.  This is what we do in the proof of Theorem~\ref{thm:Russell} below.  The contradiction is the reason why we have to distinguish classes and sets.

A \textbf{set}\dindex{set} is a class that \emph{is} a member of some classes.
If $A$ is a set, and $\class C$ is a class, then the sentence
\begin{center}
  \Eng{$A$ is a member of $\class C$} 
\end{center}
is true or false---it is a
proposition. 
Again, all sets are classes; but Theorem~\ref{thm:Russell} shows that
not all classes are sets.
  
  A class can be indicated in writing or print by the presence of
  \textbf{braces}\dindex{brace} around its members. 
  So, if we 
have, say, three objects,
\begin{equation*}
  a,b,c,
\end{equation*}
then we can form the \emph{single} object
\begin{equation*}
  \{a,b,c\}.\glossary{$\{a,b,c\}$}
\end{equation*}
This single object is a \emph{class,} namely the class of all things with the property of being one of $a$, $b$, and $c$.
In fact, this class will be a \emph{set}.  In
particular, this set
\textbf{contains}\dindex{contains} $a$, $b$, and $c$
(and nothing else) as elements.

Elements of a class are
\textbf{in}\dindex{in} the class.  If $\class C$
is a class,
then we have several ways of saying the same thing:
\begin{gather*}
\Eng{$\class C$ contains $d$;}\\
\Eng{$d$ is an element of $\class C$;}\\
\Eng{$d$ is a member of $\class C$;}\\
\Eng{$d$ is in $\class C$.}
\end{gather*}
Any of these can be expressed by the symbolism\footnote{The sign $\in$
  is apparently derived from the Greek \Gk e.  Peano \cite{Peano} used
  this letter in 1889 as a symbol with the meaning of \Eng{is,}
  perhaps because the Greek word for \Eng{is} is \Gk{>est'i}.  For
  Peano, $d\in\class C$ means $d$ is a $\class C$, that is, $d$ is one
of the things denoted by the term $\class C$.}
\begin{equation*}
  d\in \class C.
\end{equation*}\glossary{$d\in\class C$}
To 
\emph{deny} that $d\in \class C$, we can write
\begin{equation*}
  d\notin\class C,
\end{equation*}\glossary{$d\notin\class C$}
which can be read as \Eng{$d$ is not in $\class C$.}

One can say that a class \textbf{comprises}\dindex{comprise}\dindexsub{verb}{comprise} its elements, and the
elements \textbf{compose}\dindex{compose}\dindexsub{verb}{compose} the class.  Unfortunately, the verbs
\Eng{comprise} and \Eng{compose} are often confused by native
English-speakers.  Alternatively, a set \textbf{consists
  of}\dindex{consists of} its elements.

Words like \Eng{collection}, \Eng{aggregate} and \Eng{family} are
sometimes
used as synonyms for \Eng{set} (or perhaps for \Eng{class}).  

I say that a
set is \emph{many} things, 
made into one; but I am using the word \Eng{many} more
generally than is usual in ordinary language.  A set might have two
elements or
one element.  A set might have \emph{no} element at all: such a set is 
\begin{equation*}
  \emptyset,
\end{equation*}\glossary{$\emptyset$}
the \textbf{empty set}\dindex{empty set}\dindexsub{set}{empty
  ---}.  I shall also assume that sets can have
\textsl{infinitely}\tindex{infinite} many elements, and that, in particular,
the \textsl{natural numbers}%
\tindex{natural number}%
\tindexsub{number}{natural ---} compose such a set, namely
 \begin{equation*}
\{0,1,2,3,4,\dots\}.
\end{equation*}
In Chapter~\ref{ch:numbers}, this assumption will turn out to be a
\emph{consequence} of the Axiom of Infinity, \ref{ax:infinity}.

A class $\class C$ is \textbf{included in}\dindex{included in} a class
$\class D$ if every 
element of $\class C$ is an element of $\class D$.  In this case, we
can write 
\begin{equation*}
   \class C\included \class D,
\end{equation*}\glossary{$\class C\included\class D$}
and we can say also\footnote{Some people would say here that $\class D$
  \emph{contains} $\class C$; but it is desirable to read
  $\class C\included
  \class D$ differently from $c\in \class D$.} that $\class D$
  \textbf{includes}\dindex{includes} $\class C$ or that $\class C$ is a
\textbf{subclass}\dindexsub{sub}{---class}\dindexsub{class}{sub---} of $\class
  D$.  
A subclass of $\class D$ that is also a set is a \textbf{subset}\dindexsub{sub}{---set}\dindexsub{set}{sub---} of $\class D$.
  If $\class C$ is \emph{not} a
  subclass of $\class D$, we can
write
\begin{equation*}
  \class C\nincluded\class D.
\end{equation*}\glossary{$\class C\nincluded\class D$}

The first \textsl{axiom} of set-theory is that
sets are determined by their elements, so that if two sets have the
same elements, then the sets themselves are the same, that is,
\textbf{equal.}\dindex{equal}  We can express
this more symbolically:

\begin{axiom}[Extension]\dindexsub{extension}{Axiom of
    E---}\dindexsub{axiom}{A--- of Extension} \label{axiom:extension}
  If $A$ and $B$ are sets such that $A\included B$ and $B\included A$,
  then 
  \begin{equation}\label{eqn:A=B}
      A=B.
  \end{equation}\glossary{$A=B$}
\end{axiom}

Instead of $A\included B$, some people write
\begin{equation*}
  A\pincluded B;
\end{equation*}\glossary{$A\pincluded B$}
but I prefer to use this to mean that $A$ is a
\textbf{proper}\dindexsub{proper}{--- subset}\dindexsub{sub}{proper
  sub---}\dindexsub{set}{proper sub---} subset of 
$B$, that is, $A\included B$, but $A\neq B$.\glossary{$A\neq B$}

Again, a class is defined by a property.  A property
can be symbolized by a \textbf{predicate.}\dindex{predicate}  A predicate \emph{says something}
about a \textsl{subject.}\tindex{subject}  (See the Law of Contradiction, in the
previous section, as translated from Aristotle.\footnote{The English
  \Eng{predicate} is from the Latin \Lat{praedicatvm,} a participle of
  the verb \Lat{praedicare.}  This verb consists of the prefix
  \Lat{prae-} (or \Lat{pre-}) and the verb \Lat{dicare.}  This verb,
  with root \Lat{dic-}, means \emph{say,} and it can be found in
  various English words, such as \Eng{indicate} and \Eng{dictionary.}
  The Latin \Lat{praedicatvm} is a translation of the Greek
  \Gk{kathgore'umenon} \cite{COD6}, a participle of \Gk{kathgor'ew};
  this verb consists of the prefix \Gk{kata-} and the verb
  \Gk{agore'uw}, which means \emph{speak before an assembly of the
    people;} such an assembly is an \Gk{agor'a}.  See
  Appendix~\ref{Aristotle}.  The verb \Gk{kathgor'ew} (or some related
  word) is the source of the English \emph{category.}})  If $P$ is a
predicate, 
then the corresponding class can be denoted by
\begin{equation}\label{eqn:Px}
  \{x:Px\};
\end{equation}\glossary{$\{x:Px\}$}
this is read as \Eng{the class of $x$ such that $P$ [applies to] $x$};
here, the 
\textsl{variable}%
\tindex{variable} $x$ takes the place of a grammatical
subject of $P$.

If $A$ is a set, then \emph{being an element of $A$} is a property; the class defined by this property is
\begin{equation*}
\{x\colon x\in A\}.
\end{equation*}
This class is just the set $A$.  Again, by the Extension Axiom, two
sets are equal if they have the same members; more generally; two
\emph{classes} are equal if they have the same members.  In
particular, two predicates that are different as symbols may
nonetheless define the same class; we may have $\{x\colon
Px\}=\{x\colon Qx\}$, even though $P$ and $Q$ are different
predicates. 

Often,
in place of $Px$ in~\eqref{eqn:Px}, we have to write something that features $x$ more
than once.  For example, there is a property of \emph{not being a
  member of oneself}.  In words, the corresponding predicate is
something like
\begin{equation}\label{eqn:word-predicate}
\text{\Eng{\underline{\qquad} is not a member of \underline{\qquad}-self},}
\end{equation}
with two spaces left for a subject.  The phrase \Eng{is not a member
  of} is also symbolized by $\notin$; so
the given property determines the class
\begin{equation}\label{eqn:Russell}
  \{x:x\notin x\}.
\end{equation}
This is the historically first\footnote{Russell gives the example in a
  letter \cite{Russell-letter} to Frege in 1902; but Levy \cite[p.~6
    correction]{MR1924429} cites an article attributing an 
  independent discovery of the example to Zermelo.} example of a class
that is not a set:   

\begin{theorem}[Russell Paradox]\label{thm:Russell}
  The class $\{x:x\notin x\}$ is not a set.
\end{theorem}

\begin{proof}\label{first-proof}
  Call this class
  $\Russell$, and suppose it
  \emph{is} a set.  Then by the Law of the Excluded Middle, either
  $\Russell\in\Russell$ or $\Russell\notin\Russell$.

Suppose $\Russell\in\Russell$.  Then, by the Law of Contradiction, it is not
the case that $\Russell\notin\Russell$.  This means $\Russell$
  fails to have the defining property of members of $\Russell$, and so
  $\Russell\notin \Russell$.  In short, if $\Russell\in\Russell$, then
  $\Russell\notin\Russell$.  On the other hand, trivially, if
  $\Russell\notin\Russell$, 
  then $\Russell\notin\Russell$.  

Having considered both
  possibilities, we conclude 
 $\Russell\notin\Russell$.  This means $\Russell$
  \emph{does} have the defining property of members of $\Russell$, so
$\Russell\in\Russell$.
Thus $\Russell$ is and is not a member of itself.  This violates the
  Law of Contradiction.
  Therefore the assumption that $\Russell$ is a set must be mistaken, so
  $\Russell$ is not a set (by the Law of the Excluded Middle).
\end{proof}

The proof ends with a box,\footnote{Other writers use a different
  symbol, or none at all.  An old-fashioned termination of a proof is
  \Lat{qed}, for the Latin \Lat{qvod erat demonstrandvm}, with the
  meaning of \Eng{which was to be demonstrated.}} as noted on
  p.~\pageref{page:box}.  
  This particular proof is a \textbf{proof by
    contradiction,}\dindexsub{proof}{--- by
    contradiction}\dindexsub{contradiction}{proof by ---} because it
  assumes the falsity of what is to be proved, and it derives from
  this a violation of the Law of Contradiction. 

This particular proof also shows that there is a class to
  which the predicate in~\eqref{eqn:word-predicate} neither
  applies nor fails to apply.  So we have an apparent violation of the Law of
  the Excluded Middle.  I would say rather that we have an example of
  a class that is not a \emph{real thing,}\index{real} so that it is just meaningless to try to apply predicates to it.
  \emph{Elements} of classes are the real things.

We do assume that subclasses of sets are sets:\footnote{The following Axiom of Separation is also
  called the \textbf{Axiom of Comprehension;}\tindexsub{axiom}{A--- of
    Comprehension}\tindexsub{comprehension}{Axiom of C---}  but I think this term is better reserved for the original, but false, assumption that every property defines a set.} 

  \begin{axiom}[Separation]\dindexsub{separation}{Axiom of
      S---}\dindexsub{axiom}{A--- of
      Separation}\label{ax:separation} 
Suppose $\universe$\glossary{$\mathcal U$} is some set, and $P$ is a predicate.
The class of elements \emph{of
  $\universe$} to which $P$ applies is a set.
  \end{axiom}

The set created by the axiom is denoted by
\begin{equation*}
\{x\in\universe\colon Px\}.
\end{equation*}
I use the letter $\universe$\index{universe}\index{set!universal ---}
here because it 
stands for \Eng{universe;} but the set
could be anything.
For a mundane example, we could let $\universe$ be the set of human
beings living now, and let $P$ be the predicate \Eng{is over two
  meters tall.} 
Then $\{x\in\universe\colon Px\}$ is the set of people now taller than
two meters.   
However, in using sets for mathematics, we have no need to
consider classes that contain anything other than sets.  This is
because, by starting with the empty set, and by putting sets into
other sets, we can create the \textsl{natural numbers;} from these, we
can create all of the other objects of mathematics.  The procedure is
as follows. 

Any two classes $\class C$ and $\class D$ have a
\textbf{union,}\dindex{union} which is the 
class comprising every element of $\class C$ or $\class D$ (or both); this union is
denoted by
\begin{equation*}
  \class C\cup\class D.\glossary{$\class C\cup\class D$}
\end{equation*}
(See \S~\ref{sect:Boole}.)  

\begin{axiom}[Adjunction\footnote{The terminology is from George
      Boolos, according to Wikipedia
      \url{http://en.wikipedia.org/wiki/General_set_theory} (accessed
      September 15, 2010).}]\label{ax:adjunction}% 
\dindex{Adjunction Axiom}
If $A$ is a set, then for all $b$, there is a set whose elements are
just $b$ and the elements of $A$. 
\end{axiom}

The new set guaranteed by the Axiom is denoted by
\begin{equation*}
A\cup\{b\}.
\end{equation*}

\begin{theorem}[Pairing]\label{thm:pairing}
For every $a$ and $b$, the classes $\{a\}$ and $\{a,b\}$ are sets.
\end{theorem}

\begin{proof}
By the Adjunction Axiom, the class $\emptyset\cup\{a\}$ is a set; but this set is just $\{a\}$.  Then, by the Axiom again, $\{a\}\cup\{b\}$ is a set; but this set is just $\{a,b\}$.
\end{proof}

The set $\{a\}$ is called
a \textbf{singleton.}\dindex{singleton}\dindexsub{set}{singleton}  Evidently the proof can be continued to show that $\{a,b,c\}$ is a set, $\{a,b,c,d\}$ is a set, and so on; in short, \emph{finite}\index{finite} classes are sets.

From any
set $A$, we can now form the union
\begin{equation*}
  A\cup\{A\}.
\end{equation*}
This idea can be used to give the following 
\textbf{recursive definition}%
\dindexsub{definition}{recursive ---}%
\dindexsub{recursive}{--- definition} 
of the 
\textbf{natural numbers.}%
\dindex{natural number}%
\dindexsub{number}{natural ---}
First, we declare that the number \textbf{zero} is just the empty set:
\begin{equation*}
  0=\emptyset.\glossary{$0$}
\end{equation*}
Then we define the natural numbers by two rules:
\begin{compactenum}
  \item
$0$ is a natural number.
\item
if $n$ is a natural number, then $n\cup\{n\}$ is a natural 
number. 
\end{compactenum}
The latter rule assumes that $n$ is a set; but then
$n\cup\{n\}$ is also a set.  The latter set can be called the \textbf{successor}\dindex{successor} of $n$.  Hence all natural numbers are sets, and
every natural number has a successor, which is a natural number.  

To the recursive defition of natural numbers, some writers might add a
third condition:
\begin{compactenum}
\setcounter{enumi}2
\item
Nothing else is a natural number.  
\end{compactenum}
However, I understand such a condition to be implicit in every
recursive definition as such.

If $n$ is a natural number, let us denote its successor $n\cup\{n\}$ by
\begin{equation*}
\vscr n.
\end{equation*}
Then we have
\begin{align}\label{eqn:A-in-A'}
n&\in\vscr n,&n&\included\vscr n.
\end{align}
The recursive definition of the natural numbers makes \textbf{proof by
  induction}\dindexsub{proof}{--- by induction}\dindex{induction} in
the following sense possible.  Suppose $P$ names a property that some natural
numbers may have, and suppose moreover that we can establish the
following two conditions.
\begin{compactenum}
  \item
$P0$.
\item
For every natural number $n$, if $Pn$, then $P(\vscr n)$.
\end{compactenum}
Then we have proved by induction that every natural number must have
the property (named by) $P$. 
In the second condition, $Pn$ is called the \textbf{inductive
  hypothesis.}\dindexsub{inductive}{--- 
  hypothesis}\dindexsub{hypothesis}{inductive ---}  The method of
proof by 
induction is first used in
Lemma~\ref{lem:includes-elements} below.  In general, an inductive
proof consists of two steps:
\begin{compactenum}[1)]
  \item
the \textbf{base step,}\dindex{base step}\dindexsub{step}{base
  ---} in which $P0$ is proved; 
\item
the \textbf{inductive step,}\dindexsub{inductive}{---
  step}\dindexsub{step}{inductive ---} in which $P(\vscr n)$ is
  proved from the inductive hypothesis $Pn$.
\end{compactenum}

It is not obvious that there is even a \textbf{class} consisting
of the natural numbers: what \emph{property} do these numbers share?
Well, they share the property that they can be obtained by starting
with $\emptyset$ and taking successors; but it is not obvious how to
make this property precise.  One way that works is the following, as
we shall show in \S~\ref{sect:ordinals}. 
We can first define an 
\textbf{ordinal}%
\dindex{ordinal} to be a set $\alpha$ such that
\begin{compactenum}[1)]
\item
$\alpha$ \emph{includes} each of its elements (that is, if $x\in \alpha$, then
  $x\subseteq \alpha$); 
\item
if $\alpha$ has two distinct elements, then one of them contains the other;
\item
If $A\included \alpha$, and $A$ is not empty, then $A$ has an element $b$ that is contained by all of
the other elements of $A$, though not by $b$ itself. 
\end{compactenum}
Then every element of an ordinal is an ordinal.  An ordinal is a
\textbf{limit}\dindex{limit} if it is not empty and is not of the form
$\beta\cup\{\beta\}$ for any set $\beta$.  Then a natural number is an
ordinal that neither \emph{is} a limit nor \emph{contains} limits.
Again, this will be worked out in \S~\ref{sect:ordinals}; meanwhile,
let us accept the informal definition of the natural numbers. 
  
  The class of natural numbers
is denoted by
\begin{equation*}
  \vnn.
\end{equation*}
This symbol is not the Latin minuscule letter
\letter w (the so-called double \letter u); it is the Greek minuscule 
\emph{omega.}  Observe that \Eng{mega} means big, so
an omega is a big \letter o---rather, a double \letter o, or \Eng{oo},
which, if written quickly, may come out looking like $\vnn$.

As we have just defined them, the natural numbers can be called more
precisely the \textbf{von-Neumann natural numbers.}%
\dindexsub{number}{non-Neumann natural ---}%
\dindexsub{natural number}{von-Neumann ---}%
\dindexsub{von Neumann}{--- natural number}%
\footnote{These natural numbers are instances of the von-Neumann
  \textsl{ordinal numbers,}%
%\tindex{ordinal}%
\tindexsub{number}{ordinal ---} defined by von Neumann in 1923
\cite{von-Neumann}.  However, 
  in his introduction to von Neumann's paper, van Heijenoort cites
  Bernays as saying that Zermelo had a similar idea for ordinals in
  1915; also, in this context, Levy \cite[II.3.8, p.~52]{MR1924429}
  cites Zermelo from 1916.}  
The first four von-Neumann natural numbers are
\begin{align*}
&\emptyset,& 
&\{\emptyset\},&
&\bigl\{\emptyset,\{\emptyset\}\bigr\},&
&\Bigl\{\emptyset,\{\emptyset\},\bigl\{\emptyset,\{\emptyset\}\bigr\}\Bigr\},
%\{\emptyset, \{\emptyset\}, \{\emptyset,\{\emptyset\}\},
%    \{\emptyset,\{\emptyset\},\{\emptyset,\{\emptyset\}\}\}\},
\end{align*} 
where again $\emptyset=0$.
We have the following
standard symbols for some successors:
\begin{equation*}
  \begin{array}{c||c|c|c|c|c|c|c|c|c}
    n & 0&1&2&3&4&5&6&7&8\\ \hline
\vscr n &1&2&3&4&5&6&7&8&9
  \end{array}
\end{equation*}
Also, we may write
\begin{equation*}
  n+1
\end{equation*}\glossary{$n+1$}
for $\vscr n$.
If $m$ and $n$ are in $\vnn$, and $m\included n$, then we usually
write
\begin{equation*}
  m\leq n.
\end{equation*}\glossary{$m\leq n$}
The class $\vnn$ has two more properties, besides admitting proofs by
induction; these are given by the next two theorems.

\begin{theorem}\label{thm:z}
  $0$ is not the successor of any natural number.
\end{theorem}

\begin{proof}
We argue by contradiction.
Suppose $0$ is a successor; say $0=\vscr n$.  But $n\in\vscr n$, as
noted in~\eqref{eqn:A-in-A'}; so $n\in 0$.  This contradicts that $0$
is empty.  Therefore $0$ is not a successor.
\end{proof}

\begin{lemma}\label{lem:includes-elements}
  Every von-Neumann natural number includes all of its elements.
\end{lemma}

\begin{proof}
We use induction.
  Let $P$ be the predicate
  \begin{center}
    \Eng{\underline{\qquad} includes all of its elements.}
  \end{center}
Since $0$ has no elements, trivially $0$ includes all of its elements.
Therefore $P0$. 
This completes the base step of the proof.

For the inductive step, suppose $Pn$ (as an inductive hypothesis).
Say $k\in \vscr n$.  Since 
$\vscr n=n\cup\{n\}$, either
$k\in n$, or $k\in\{n\}$.  If $k\in n$, then $k\included n$ by inductive
hypothesis.  If $k\in\{n\}$, then $k=n$, so $k\included n$.  In either case,
$k\included n$.  But $n\included \vscr n$.  Hence $k\included\vscr
n$.  (We use the obvious proposition that if $A\included B$ and
$B\included C$, then $A\included C$; this proposition will be part of
Lemma~\ref{lem:implications}.) 
In short, if $k\in\vscr n$, then $k\included\vscr n$.
Therefore $P(\vscr n)$.
This completes the induction.
\end{proof}

\begin{theorem}\label{thm:u}
  Natural numbers with the same successor are the same.
\end{theorem}

\begin{proof}
  Suppose $k$ and $n$ are natural numbers, and $\vscr k=\vscr n$.  We must show $k=n$.
We have
  \begin{equation*}
    k\cup\{k\}=n\cup\{n\}.
  \end{equation*}
In particular, $k\in n\cup\{n\}$ and $n\in k\cup\{k\}$.
Suppose if possible $k\neq n$.  Then we must have $k\in n$ and
$n\in k$, hence $k\included n$ and $n\included k$ by the previous
lemma, and therefore $k=n$ by the Axiom of Extension,~\ref{axiom:extension}.
This contradicts the assumption that $k\neq n$; therefore the assumption is false, and $k=n$.
\end{proof}

We can call $m$ an \textbf{immediate
  predecessor}\dindex{immediate
  predecessor}\dindexsub{predecessor}{immediate ---} of $\vscr m$.  By
our recursive definition, every natural number that is not $0$ must be
$\vscr m$ for some $m$; that is, every natural number $n$ other than
$0$ has an immediate predecessor.  By the last theorem, this
predecessor is \emph{unique:}\index{unique} it is \emph{the} immediate
predecessor of $n$, and it can be denoted by 
\begin{equation*}
  n-1.\glossary{$n-1$}
\end{equation*}
The von-Neumann definition of the natural numbers is convenient,
because according to this definition, each natural number $n$ is just
the set that can be denoted by
\begin{equation*}
  \{0,\dots,n-1\}.
\end{equation*}\glossary{$\{0,\dots,n-1\}$}
If $n=0$, then this is the empty set.

If we do not happen to care about whether each natural number is a
particular set, then we can denote the set of natural numbers by
\begin{equation*}
  \N;\glossary{$\mathbb N$}
\end{equation*}
this is the usual notation when one is not interested in
set-theory.  Then $\N$ is just a class that contains an element $0$,
and whose every element $n$ has a successor, which can be denoted by
\begin{equation}\label{eqn:scrn}
  \scr n\glossary{$\scr n$} 
\end{equation}
or $n+1$\glossary{$n+1$}, such that:
\begin{compactenum}[1)]
  \item
$0$ is not the successor of any element of $\N$;
\item
elements of $\N$ with the same successor are the same;
\item
$\N$ is included in every class that contains $0$ and that, for every $n$ in $\N$, contains $\scr n$ if it contains $n$.
\end{compactenum}
These conditions on $\N$ are the 
\textsl{Peano Axioms;}\tindexsub{Peano}{---
  Axioms}\tindexsub{axiom}{Peano A---s} 
we shall show in
Chapter~\ref{ch:numbers} that all properties of $\N$ 
follow from them.

\subsection*{Exercises}

\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Prove by induction that every element of $\vnn$ either \emph{is} $0$
or \emph{contains} $0$. 
\item
What is wrong with the following proof that every element of $\vnn$ is
equal to each of its elements? 
\begin{quote}
For all $n$ in $\vnn$, if $k\in n$, we show $k=n$.  We use induction on $n$.
The claim is trivially true when $n=0$, since $0$ has no elements.
Suppose the claim is true when $n=m$.  Suppose $k\in\vscr m$.  Then
either $k\in m$ or $k=m$.  If $k\in m$, then by inductive hypothesis,
$k=m$.  Therefore, in any case, $k=m$.  That is, every element of
$\vscr m$ is $m$.  But by inductive hypothesis, $m$ is equal to its
immediate predecessor (since this  is an element of $m$).  Let the
immediate predecessor of $m$ be $\ell$.  Then $\ell=m$, so
$\vscr{\ell}=\vscr m$.  But $\vscr{\ell}=m$, since $\ell$ is the
immediate predecessor of $m$.  Therefore $m=\vscr m$.  If $k\in\vscr
m$, we have already shown $k=m$; now we can conclude $k=\vscr m$.
This completes the induction. 
\end{quote}
\item
From the `theorem' in the preceeding exercise, prove that all natural
numbers are equal to $0$. 
\end{enumerate}

\section{Algebra of the integers}\label{algebra}

Now that we have, from the previous section, a precise definition of the
natural numbers, I want to review some things that we know about them from
school.  We cannot yet define all of these things precisely, or prove
them: this will happen in Chapter~\ref{ch:numbers}.  Meanwhile, we
just have a set called $\N$, whose members form the list
\begin{equation*}
  0,1,2,3,\dots.
\end{equation*}
As we have seen, every natural number $n$ has a
successor, which is usually denoted by $n+1$.  Some 
mathematicians start the list of natural numbers at $1$ instead of $0$;
but I shall just say that the members of the set $\{1,2,3,\dots\}$ are the
\textbf{positive}\dindexsub{number}{positive
  ---}\dindexsub{positive}{--- number} natural numbers.

The number $0$ does not have an immediate predecessor that is a
natural number; but it does have the 
immediate 
predecessor called
$-1$.\glossary{$-1$}  This is not a natural number, but it is an
\textsl{integer}.
The set of \textbf{integers}\dindex{integer}\dindexsub{number}{integer}
comprises every natural number, along with a 
\textbf{negative,}%
\dindex{negative}
denoted by $-n$,\glossary{$-n$} for each
positive natural number $n$.  Then $-n$ has the successor $-(n-1)$
and the immediate predecessor $-(n+1)$.  The integers that are not
natural numbers are also called
\textbf{negative}%
\dindexsub{integer}{negative ---}%
\dindexsub{number}{negative integer}%
\dindexsub{negative}{--- integer} 
integers.  \emph{Every}
integer $n$ has a
\textbf{negative,} denoted by $-n$, although this number is itself negative
only if $n$ is positive.

The set of
integers is commonly denoted by\footnote{Here the letter zed or zee stands for
the German 
$\mathfrak{Zahl}$, \Eng{number.}  In English, the integers are also
called \textbf{whole numbers.}\dindex{whole
  number}\tindexsub{number}{whole ---}  In fact,  
the English word
\Eng{integer} comes from the Latin \Lat{integer,} which means whole.
This Latin word developed in France into the French word 
\textsf{entier,} 
which entered English and became \Eng{entire}.  Thus two English
words---\Eng{integer} and \Eng{entire}---represent the same Latin
word.
People interested in such matters may refer to such pairs of words as
\textsl{doublets.}\tindexsub{double}{---t}\tindexsub{word}{doublet}} 
\begin{equation*}
  \Z.\glossary{$\mathbb Z$}
\end{equation*}
This set is equipped with three
\textsl{operations,}\tindex{operation} namely
\textbf{addition,}\dindex{addition}\dindexsub{operation}{addition} 
\textbf{additive inversion,}\dindexsub{additive}{---
  inversion}\dindexsub{inversion}{additive
  ---}\dindexsub{operation}{additive inversion} and 
\textbf{multiplication.}\dindex{multiplication}\dindexsub{operation}{multiplication}
 (Operations are
\textsl{functions;}\tindex{function} functions in general and operations
in particular are
defined formally in \S~\ref{sect:functions}.)  In particular, if $x$
and~$y$ are 
integers, then so are
\begin{compactenum}[1)]
  \item
  $x+y$\glossary{$x+y$} (the \textbf{sum}\dindex{sum} of $x$ and $y$, which here are
  \textbf{addends}\dindex{addend}), 
  \item
  $-x$\glossary{$-x$} (\textbf{minus-}$x$\dindex{minus}, the \textbf{additive
  inverse}\dindexsub{additive}{---
  inverse}\dindexsub{inverse}{additive ---} or
  \textbf{negative}%
\dindex{negative} of $x$), and 
  \item
  $x\cdot y$\glossary{$x\cdot y$} (the \textbf{product}\dindex{product} of the
  \textbf{factors}\dindex{factor}
  $x$ and $y$).
\end{compactenum}
By convention, multiplication is also indicated by
\textbf{juxtaposition;}\dindex{juxtaposition} that is, the product $x\cdot y$ is also denoted by
\begin{equation*}
  xy.\glossary{$xy$}
\end{equation*}
Something like the symbol for additive
inversion is also used for a fourth operation,
\textbf{subtraction,}\dindex{subtraction} which
can be defined in terms of the other operations.
\emph{Subtracting}\annot{The English verb \Eng{subtract} is sometimes
  pronounced as if it were \Eng{substract}.  The
  English verb comes from a participle of the Latin
  verb whose infinitive is \Lat{subtrahere}.  This verb is in turn
  built up from \Lat{trahere} (meaning draw or carry) and the preposition
  \Lat{sub} (meaning from below or away).  According to the OED
  \cite{OED}, in
  medieval times, an \letter s was inserted between \Lat{sub} and
  \Lat{trahere}, yielding \Lat{substrahere}, from which came
  \Eng{substract} in English; but this formation is considered
  incorrect.  The English word \Eng{abstract} is from the Latin
  \Lat{abstrahere}, but here the \letter s belongs properly to the
  preposition \Lat{abs}, although the preposition is more commonly
  seen as \Lat{ab} or even \Lat{a}.}
 $y$ from
$x$ produces a \textbf{difference,}\dindex{difference} which is denoted by 
\begin{equation*}
  x-y,
\end{equation*}
and which is
just the sum of $x$ and $-y$.  Note that $x-y$ is not generally the same
as $y-x$.  If we want to assign names, then, in the difference $x-y$,
we can call
$x$ the \textbf{minuend}\dindex{minuend} (from the Latin, with the meaning of \Eng{that
  which is to be diminished}), and we can call $y$ the
\textbf{subtrahend}\dindex{subtrahend} (\Eng{that which is to be subtracted}).

Subtraction is thus a \textbf{composition}\dindex{composition} of two other operations.  The
process of computing $x-y$ can be indicated by a
\textbf{tree,}\dindex{tree}\annot{Trees as such are covered in a later course, Math 112.}
thus:
%\input{first-trees.tex}
\begin{equation*}
  \xymatrix@!0{
*+[F]{x} &                               &   & *+[F]{y}               \\
     &                                      & *+[o][F]{-} \ar@{-}[ur] &\\
     & *+[o][F]{+} \ar@{-}[uul] \ar@{-}[ur] &                  &
}
\end{equation*}
More complicated compositions and trees are possible.  For
example, the tree
\begin{equation*}
  \xymatrix@!0{
*+[F]{x} & & & *+[F]{y} & & *+[F]{z} & & *+[F]{w}\\
&& *+[o][F]{-} \ar@{-}[ur] &&&& *+[o][F]{+} \ar@{-}[ul] \ar@{-}[ur] & \\ 
&&&& *+[o][F]{\cdot} \ar@{-}[ull] \ar@{-}[urr] &&&\\
& *+[o][F]{+} \ar@{-}[uuul] \ar@{-}[urrr] &&&&&&
}
\end{equation*}
indicates the sum of $x$ and the product of minus-$y$ and the sum of $z$
and $w$.  Usually this sum is written on one line, as
\begin{equation}\label{eqn:arith-term}
  x+-y\cdot(z+w),
\end{equation}
or more simply as
\begin{equation*}
x-y(z+w).
\end{equation*}
I shall refer to such a \textbf{string}\dindex{string} of symbols as an
\textsl{arithmetic term.}%
%\tindexsub{term}{arithmetic ---}\tindexsub{arithmetic}{--- term}
\footnote{Here the word 
  \Eng{arithmetic} is an 
  adjective and is pronounced with the stress on the penultimate
  (next-to-last) syllable.}
  (The 
Greek word\footnote{Strictly, the Greek word \Gk{>arijm'oc} refers to
  \emph{a number of
    things}, in particular, more than one;---certainly not zero or
  `fewer' than zero.  See \cite{MR1215482}.} for \Eng{number} is
\Gk{>arijm'oc}, which is \Lat{arithmos} in Latin letters.  Our general
definition\footnote{In another context, Aristotle's definition of
  \Eng{term} is in Appendix~\ref{Aristotle}.} of \Eng{term} comes in
\S~\ref{sect:1st}.)

Officially, \textbf{(arithmetic) terms}\dindexsub{arithmetic}{---
  term}\dindexsub{term}{arithmetic ---} will be certain strings
composed of 
\begin{compactenum}[1)]
 \item
 the symbols $+$, $-$ and ${}\mathrel{\cdot}{}$ (a dot);
 \item
 \textbf{variables,}\dindex{variable} such\footnote{The convention of
 using letters from 
 the end of the Latin alphabet for `unknown quantities' dates back to
 Descartes; see \cite{Descartes-Geometry}.  Since we don't want any
 limit on the number of variables we can use, and yet we want to
 define things precisely, we could declare
 officially that our variables must come from the list $x$, $x'$,
 $x''$, and so forth.} as $x$, $y$ and $z$;
 \item
 symbols for certain integers, such as $12$, $0$ and $-137$---such
 symbols can be called
 \textbf{numerals}\dindex{numeral}\footnote{It is
 probably
 simplest to think of a numeral as a single symbol, even though,
 typographically, it may be a string of digits, possibly preceeded by
 a minus-sign.  For example, the numeral $-137$ should be thought of as
 a single symbol like $c_{-137}$ (that's $c$ with the subscript~$-137$).
 Our decimal convention for writing numerals is just
 that, a convention; it has no essential relation to our
 definition of arithmetic terms.  See also Footnote~\ref{footnote:+}
 below.} or
 \textbf{(numeral) constants}%
\dindex{constant}%
\dindexsub{constant}{numeral ---}%
\footnote{Letters from the front of the Latin
 alphabet are used to denote such constants; again the convention is
 found in Descartes.  Used in this way, the letters can be called
 \textbf{literal constants,}\dindex{literal
 constant}\dindexsub{constant}{literal ---} where the word
 \Eng{literal} is just the
 adjectival form of \Eng{letter}.  But for us, literal constants are not
 \emph{literally} parts of terms; they just \emph{stand} for parts of
 terms---namely, numerals.};
 \item
the parentheses $($ and $)$.\glossary{$(\quad)$}
\end{compactenum}
The formal definition of arithmetic terms is
{recursive}\index{recursive!--- definition}\index{definition!recursive
---}, in the 
sense of the previous section:
\begin{compactenum}
  \item
Every variable is an arithmetic term.
\item
Every numeral is an arithmetic term.
\item
If $t$ is an arithmetic term, then so is $-t$.
\item
If $t_0$ and $t_1$ are arithmetic terms, then so are $(t_0+t_1)$ and
$(t_0\cdot t_1)$.
\end{compactenum}
Our definition of arithmetic terms is recursive in the following way.
Suppose $A$ is \emph{some} set of strings of symbols such that each of
the following conditions holds:
\begin{compactenum}
  \item
Every variable is in $A$.
\item
Every numeral is in $A$.
\item
If $t$ is in $A$, then $-t$ is in $A$.
\item
If $t_0$ and $t_1$ are in $A$, then so are $(t_0+t_1)$ and
$(t_0\cdot t_1)$.
\end{compactenum}
Then $A$ contains all arithmetic terms.  Therefore, {proof by
  induction}\index{proof!--- by
  induction}\index{induction!proof by ---} on arithmetic terms is
  possible; here is an example: 

\begin{proposition}\label{prop:arith-terms}
  Every arithmetic term has as many left parentheses as right
  parentheses. 
\end{proposition}

\begin{proof}
  Let $A$ be the set of arithmetic terms that have as many left parentheses
  as right parentheses.  Then $A$ contains all variables and constants
  (since these have no parentheses).  Suppose $A$ contains $t$.  Then
  $t$ has as many left as right parentheses (just because it is in
  $A$), so the same is true of
  $-t$.  This means $-t$ is in $A$.  Similarly, if $t_0$ and $t_1$
  are in $A$, then each of them has as many left as right parentheses,
  so the same is true of $(t_0+t_1)$ and $(t_0\cdot t_1)$;
\begin{comment}
 (for
  example, if $t_0$ has $n_0$ left parentheses, and $t_1$ has $n_1$
  left parentheses, then $(t_0+t_1)$ has $n_0+n_1+1$ left
  parentheses)
\end{comment}
this means 
  these terms are also in $A$.  By the recursive definition of
  arithmetic terms, every term is in $A$.
\end{proof}

By the formal definition of arithmetic terms,
string~\eqref{eqn:arith-term} above is not strictly a
term; to satisfy the definition, the term should be written as
\begin{equation*}
  (x+(-y\cdot(z+w))).
\end{equation*}
By convention, we can leave out the dot between $-y$ and $(z+w)$,
and we can remove some of the parentheses.  But we can do this only
because we have a conventional \textbf{order of
  operations}\dindexsub{order}{--- of
  operations}\dindexsub{operation}{order of ---s} in terms.
By this convention,
expressions in brackets are evaluated before all else, and then
multiplication is performed
before addition (and subtraction), but otherwise operations are
performed as they are read from left to right.  So, $(x+y)z$ means
something different from $x+yz$: the former is an informal version of
the term $((x+y)\cdot z)$; the latter, of $(x+(y\cdot z))$.

The formal definition of arithmetic terms should ensure that each term
indicates uniquely how to
calculate an integer, once integral values are assigned to the
variables.  In short, arithmetic terms should be \textbf{uniquely
  readable.}\dindexsub{unique}{---ly
  readable}\dindexsub{readable}{uniquely ---}  As we have defined
them, they \emph{are}
uniquely readable: this is a theorem with a proof
like that of Theorem~\ref{thm:UR} below.

An arithmetic term is not exactly the same thing as a
\textsl{polynomial.}  For example, the terms $(x\cdot(y+z))$ and
$((x\cdot y)+(x\cdot z))$ are different.
However, they always yield the same number if $x$, $y$ and $z$ are
respectively replaced by the same three integers.  We therefore write
\begin{equation}\label{eqn:identity}
  x(y+z)=xy+xz,
\end{equation}
and we shall say that the two members of this equation
\textbf{represent}\dindex{represent} the same
\textbf{polynomial.}\dindex{polynomial}
  Also, Equation (\ref{eqn:identity}) is called an
\textbf{(arithmetic) identity.}\dindexsub{arithmetic}{---
  identity}\dindexsub{identity}{arithmetic ---}  

An equation of arithmetic terms can be called a 
\textbf{Diophantine equation,}%
\dindex{Diophantine equation}%
\dindexsub{equation}{Diophantine ---} in memory of the
  ancient Alexandrian mathematician  
  Diophantus, who studied such equations.\footnote{Diophantus wrote
  the \emph{Arithmetica,} in thirteen books, of which six have come down to us
  \cite[pp.~516, n.~$a$]{MR13:419b}.  One problem that he considers,
  for example,
  is, in our notation, to find rational solutions to the pair
  \begin{gather*}
    8x+4=y^2,\\
6x+4=y^2
  \end{gather*}
of equations \cite[pp.~526--535]{MR13:419b}.}  A Diophantine equation is
  an example of an
\textsl{(arithmetic) formula}\tindexsub{arithmetic}{---
  formula}\tindexsub{formula}{arithmetic ---}.  For example, the equation
\begin{equation}\label{eqn:elliptic}
  y^2=4x^3-ax-b
\end{equation}
(where $a$ and $b$ are understood to be integers) is an
arithmetic formula.  Its \textbf{solutions}\dindex{solution} are those
pairs of integers
that \textbf{satisfy}\dindex{satisfy} the equation: those pairs $(c,d)$ of
integers such that $d^2=4c^3-ac-b$.  Formula~\eqref{eqn:elliptic} is
not an identity, because not every pair of integers satisfies it.  (For
example, if $(c,d_0)$ and $(c,d_1)$ satisfy it, then we must have $d_1=\pm
d_0$; there is no other possibility.)\footnote{Equations like
  \eqref{eqn:elliptic} are of ongoing interest to number-theorists.
  It is a twentieth-century result that the equation $y^2=x^3+17$ has
  two solutions, $(-2,3)$ and $(2,5)$, from which all rational
  solutions can be found by certain rules; and only eight of these
  solutions are integral \cite[Example~III.2.4, pp.~59~f.]{MR817210}.}

By our definition, a polynomial is an abstraction from the notion of a
term.  It is an
\textsl{equivalence-class}\tindexsub{equivalence}{---{}-class} of terms,
in the sense of 
\S~\ref{sect:eq}.  You can think of a polynomial as
an operation.  Then a term is a set of instructions---a recipe for how
to perform the 
operation.  The point then is that the same operation can be
performed in different ways.  This is why different terms can
represent the same polynomial; this is why we have nontrivial identities like~\eqref{eqn:identity}.

For example, the term $x+y$ says, `Start with $x$, and add $y$'; the
term $y+x$ says, `To $y$, add $x$.'  These are different activities,
but they yield the same result; so we write $x+y=y+x$.

How can you tell when two terms represent the same polynomial?  It is easy
to show when they represent different polynomials.  For example,
$x^2$\glossary{$x^2$}
(that is, $xx$) represents a different polynomial from $x$, since
$(-1)^2\neq-1$. 
But how do we know that the two members of
Equation~(\ref{eqn:identity}) represent the same polynomial? 
As an identity, the equation expresses the \textbf{distributive}\dindex{distributive} property of
multiplication over addition.  So how do we know that multiplication
\emph{has} this property with respect to addition?  We 
can check it for certain integers, say $x=5$ and $y=17$ and $z=-14$:
\begin{gather*}
5(17+-14)=5\cdot 3=15;\\
5\cdot 17+5\cdot-14=85-70=15.
\end{gather*}
But we cannot check the property for all integers in this way, since there are
infinitely many integers.

Strictly speaking, if one wants to use the distributive property with
full understanding, then one should give precise definitions of the
integers and their operations, and then one should 
\textsl{prove}%
\tindex{prove}
the distributive property.  We shall be able to do this in
Chapter~\ref{ch:numbers}: see Theorem~\ref{thm:mult}. 
However, we did not need to know all of the properties like the
distributive property, just to be able to \emph{define} the notion of a
polynomial.

As we have discussed them so far, the integers form the
\textsl{structure}%
\tindex{structure}
\begin{equation}\label{eqn:Z}
(\Z,-,+,\cdot).
\end{equation}
Structures are defined generally in \S\S~\ref{sect:relations}
and~\ref{sect:1st}.  The structure in~\eqref{eqn:Z} is the set $\Z$
equipped with
certain specified operations, namely addition, additive inversion and
multiplication.  Now, $\Z$
also has the named\footnote{\label{footnote:+}In fact, \emph{every}
integer can be given a name in decimal notation.  Alternatively we can just
write every positive integer as the appropriate sum $1+1+\dots+1$,
write zero as $0$, and write every negative integer as
$-(1+\dots+1)$.} elements $0$ and $1$.  Moreover, $\Z$ is
equipped with  
the \textsl{ordering}\tindexsub{order}{ing} denoted by $<$\glossary{$x<y$}.
An ordering is a kind of \textsl{relation;}\index{relation} relations are 
defined generally in \S~\ref{sect:relations}.  So we may think of the
integers as composing the structure 
\begin{equation}\label{eqn:Z<}
  (\Z,0,1,-,+,\cdot,<).
\end{equation}
The ordering on $\Z$ allows us to write some new
arithmetic formulas, on of the simplest being
\begin{equation*}
  x<y,
\end{equation*}
read as \Eng{$x$ is less than $y$}.
There are some `derivative' relations: 
\begin{compactenum}
  \item
$x>y$ is read as \Eng{$x$ is greater than $y$}, and means $y<x$.
\item\label{page:leq}
$x\leq y$ means $x<y$ or $x=y$: that is, $x\leq y$ is satisfied by
  those $(a,b)$ such that $a<b$ or $a=b$.
\item
$x\geq y$ is read as \Eng{$x$ is greater than or equal to $y$}, and means
  $y\leq x$. 
\end{compactenum}
These are all \textsl{(arithmetic)
  inequalities}\tindexsub{arithmetic}{---
  inequality}; as such, they are new examples of arithmetic 
  formulas.  In general, an \textbf{inequality}\dindex{inequality} is an expression
\begin{equation*}
  t_0*t_1,
\end{equation*}
where $t_0$ and $t_1$ are terms, and $*$ is one of the symbols, $<$,
$>$, $\leq$, and $\geq$.  In this context, we may also speak of the
\textbf{inequation}\dindex{inequation} 
\begin{equation*}
  t_0\neq t_1,
\end{equation*}
which is satisfied in $\Z$ by just those integers that do \emph{not} satisfy
the equation $t_0=t_1$.

The positive integers\index{positive!---
  integer}\index{integer!positive ---} are just the
positive natural numbers;
symbolically, these are the integers that satisfy the inequality $0<x$.
The negative integers%
\index{negative!--- integer}%
\index{integer!negative ---} are those integers that satisfy
  $x<0$.  The
non-negative integers\index{non-negative
  integer}\index{integer!non-negative ---}
satisfy $0\leq x$ and are the natural numbers, composing the set $\N$
as we said in \S~\ref{sect:sets}.

An integer $x$ is a \textbf{factor}\dindex{factor} or \textbf{divisor}\dindex{divisor} of the integer $y$
if
$xz=y$
for some integer~$z$.  In this case, if $x\neq0$, then $z$ is unique;
we may then say that $z$ is the
\textbf{quotient}\dindex{quotient} of $y$ by $x$; this quotient is denoted by
\begin{equation*}
\frac yx\glossary{$y/x$}  
\end{equation*}
or $y/x$.
In general, for any integer $y$ and non-zero
integer $x$, there is a quotient $y/x$, but this quotient may only be an
element of
the set of \textbf{rational numbers;}\dindex{rational
  number}\dindexsub{number}{rational ---} it may not be an integer.
The set of rational numbers is denoted by
\begin{equation*}
  \Q;\glossary{$\mathbb Q$}
\end{equation*}
but I prefer to work only with integers for now.

If $x$ is a divisor of $y$, we write
\begin{equation*}
x\divides y,\glossary{$x\mid y$}
\end{equation*}
and we say that $x$ \textbf{divides}\dindex{divides} $y$, or $y$ is \textbf{divisible}\dindex{divisible} by $x$.  So the symbol $\divides$
denotes a relation, just as $<$ denotes a relation.

A positive integer is called \textbf{prime}\dindex{prime
  number}\dindexsub{number}{prime
  ---} if its only positive factors 
are $1$ and itself, and these are distinct.  So $1$ itself is not
prime.  A positive integer that is not $1$ and is not prime is
\textbf{composite.}%
\dindex{composite}%
\dindexsub{number}{composite ---}  
The list of prime numbers begins: 
\begin{equation*}2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,
59,61,67,71,73,79,83,89,97.\end{equation*} 
Does the list end?  That the list does \emph{not} end is Proposition
IX.20 of Euclid's \emph{Elements}; a version of Euclid's
proof is in the next section.

\subsection*{Exercises}

\begin{enumerate}\renewcommand{\labelenumi}{\theenumi.}
\item 
Is there a way to define arithmetic terms without using brackets?
(See~\S~\ref{sect:unique} for some ideas.)
\item
Which of the following equations are arithmetic identities?
\begin{enumerate}
\renewcommand{\labelenumii}{(\theenumii)}
\item
$xy=yx$,
\item
$x(yz)=xyz$,
\item
$(x+y)^2-2xy-y^2=x^2$,
\item
$2x+3=4$,
\item
$2x+3y=4$,
\item
$x^2+y^2=2xy$,
\item
$x^4+y^4=(x^2+y^2)^2-2x^2y^2$,
\item
$(x^2-y^2)^2+(2xy)^2=(x^2+y^2)^2$,
\item
$x^4+4y^4=(x^2+2xy+2y^2)(x^2-2xy+2y^2)$.
\end{enumerate}
\item
There are terms like $x+(y+z)$; are there formulas like $x<(y<z)$?  Explain.
\end{enumerate}

\section{Some classical theorems}\label{proofs}

We have a few proofs so far, as of Theorem~\ref{thm:Russell} and of
Proposition~\ref{prop:arith-terms}.  
What constitutes a proof in general?  It is hard to say.  By means of reason
alone, a proof should persuade any (sufficiently knowledgeable) reader
that a certain proposition is true.  This is the ideal.  In practice, the
standards for what is `reasonable' in a proof can vary.  

I said in the last section that we should be proving the distributive
property of the integers in Chapter~\ref{ch:numbers}.  By some
standards---ultimately, the standards of this book---such basic
properties of the integers were not proved until about a century ago.
On the other hand, by taking for granted these basic
properties, mathematicians have known for over two thousand years
how to prove important propositions about the integers.  Many of these
propositions are stated and proved in Euclid's \emph{Elements}
\cite{MR17:814b}.

Here I shall
offer proofs of three of these propositions, namely:
\begin{compactenum}[1)]
  \item
that there are infinitely many prime numbers;
\item
that the diagonal and side of a (geometrical) square have no common
measure (that is they are not both integral multiples of the same unit);
\item
that there is a method for determining the greatest common divisor of
two positive integers.
\end{compactenum}
  The proofs of these propositions rely
on claims that should be plausible, but that we have not yet fully
justified.   A goal of this entire collection of
notes is to provide some of the justification.

Of the three propositions named,
the first two might
be called theorems, and the last, a problem, in the ancient sense
given by Pappus in \S~\ref{sect:language}.

\subsection*{Infinity of primes}

Without more ado, we can state the following, and prove it by contradiction:

\begin{proposition}\label{thm:inf-primes}
There are infinitely many prime numbers.\footnote{Euclid puts it a bit
  differently: \Gk{O<i pr~wtoi >arijmo`i ple'iouc e>is`i pant`oc
  to~u protej'entoc pl'hjouc pr'wtwn >arijm~wn}: `The prime numbers
  are more than any given multitude of prime numbers.'  If for
  \Eng{multitude} we understand \Eng{set}, then, for Euclid, there is
  no such thing as an \emph{infinite} set; in particular, there is
  no set such as we have called $\N$.} 
\end{proposition}

\begin{proof}
Suppose there are only finitely many prime numbers.  Then there are
$n$ primes for some $n$ in $\N$.  We can now list the primes thus:
\begin{equation*}
p_0,p_1,\dots,p_{n-1}.
\end{equation*}
The product $p_0p_1\cdots p_{n-1}$ must
be divisible by each prime $p_i$ on our list, and therefore the sum
\begin{equation*}1+p_0p_1\cdots p_{n-1}\end{equation*}
is indivisible by each prime $p_i$ (why?).  Therefore this sum has
a prime factor not on our list of primes.  This contradicts
our assumption that our list contains all primes.  Therefore there
are infinitely many primes.
\end{proof}

Are you satisfied with the proof of Proposition \ref{thm:inf-primes}?
What details does it leave out?  We have not proved that
every positive integer 
(besides $1$) \emph{has} prime factors.  (However, this fact
is Euclid's Proposition VII.32; see also Example~\ref{example:si} below.)
Nor have we defined what `infinitely many' means.  (We shall
in \S~\ref{Peano}.)

Still, by some standards, we \emph{have} given a proof: a proof by
contradiction.\annot{A proof with a similar level of detail is offered
  to the general reader by Hardy \cite[\S~12]{MR92j:01070}.}

\subsection*{Incommensurability of diagonal and side}

The next proposition is also proved by contradiction.  We first need a
definition and some {lemmas}.

An integer is \textbf{even}\dindex{even number}\dindexsub{number}{even
  ---} if $2$ divides
it; otherwise, the integer is 
\textbf{odd}\dindex{odd number}\dindexsub{number}{odd ---}; so $2n$ is
even, but $2n+1$ is odd.

\begin{lemma}\label{lem:even-odd}
  The product of two integers is
\begin{compactenum}[1)]
  \item
\emph{even}, if one of the integers is even;
\item
\emph{odd}, otherwise.
\end{compactenum}
\end{lemma}

\begin{proof}
Let the two integers be $a$ and $b$.  If $a$ is even, so that
$2\divides a$, then $a=2c$ for some integer $c$, so $ab=2cb$, which
means $ab$ is even.  If $a$ and $b$ are odd, then they are $2c+1$ and
$2d+1$ for some integers $c$ and $d$, so that
$ab=(2c+1)(2d+1)=4cd+2c+2d+1=2(2cd+c+d)+1$, which is odd.
\end{proof}

%The proof used standard facts about arithmetic, which however we shall
%not be able to state and prove \emph{precisely} until
%Chapter~\ref{ch:numbers}.  

The following is a fundamental property\footnote{Born around 1601,
 Pierre Fermat developed the method of 
\textsl{infinite descent}%
\tindexsub{infinite}{--- descent}%
\tindexsub{descent}{infinite ---}%
\dindexsub{method}{--- of infinite descent} 
to prove such theorems as that no right triangle whose sides are
 integral has an area that is the \emph{square} of an integer:  If
 there were such a triangle, 
 then there would be a smaller one, and so on.  See Weil \cite[II.IX,
 pp.~75~ff.]{MR734177}.}
 of $\N$; we shall use it here
and there before proving it in Chapter~\ref{ch:numbers}.  (It is a
 consequence of the 
\textsl{Peano Axioms}\tindexsub{Peano}{---
  Axioms}\tindexsub{axiom}{Peano A---s} 
given at the end of \S~\ref{sect:sets}, but
 it cannot be proved by induction alone.)

\begin{lemma}[Infinite Descent]%
\label{lem:inf-desc}%
\index{infinite!proof by --- descent}%
\index{proof!--- by infinite descent}%
\index{descent!proof by infinite ---} 
Every 
\textsl{strictly decreasing}%
\tindexsub{strict}{---ly decreasing}%
\tindexsub{decreasing}{strictly ---} 
\textsl{sequence}%
\tindex{sequence} 
of positive integers must be finite: that is, if there is a sequence
$(a_0,a_1,a_2,a_3, \dots)$
of positive integers such that
\begin{equation*}
  a_0>a_1>a_2>a_3>\dotsb,
\end{equation*}
then the sequence must stop---must have a final entry $a_n$ for some
$n$ in $\N$. 
\end{lemma}

\begin{proof}
The claim follows because $\N$ is
\textsl{well-ordered}\tindexsub{well}{---{}-ordered}\tindexsub{order}{well-{}---ed},
which means that every non-empty subset of $\N$
has a least element; we shall discuss this in
\S~\ref{sect:well-ordered}.  The set of terms in a strictly decreasing
sequence $(a_0,a_1,\dots)$ of positive integers must have a least
element, $a_n$; then there can be no term after this, since it would
be less than $a_n$.
  \end{proof}

We can now state and prove the following.  Its geometric
interpretation is that there is no unit length into which
 the diagonal and side of a square can be divided.
 Aristotle\footnote{In the \emph{Prior Analytics;} the passage is
   quoted and discussed at \cite[pp.~110 f.]{MR13:419a}.} alludes to a
 proof similar to ours.

\begin{proposition}\label{thm:2}
The Diophantine equation
\begin{equation}\label{eqn:Dioph-2}
  x^2=2y^2
\end{equation}
has no non-zero integral solution.
\end{proposition}

\begin{proof}
  Suppose, if possible, that $(a_0,a_1)$ satisfies the equation, where
  $a_0$ and $a_1$ are non-zero 
  integers.  In particular then, 
  \begin{equation}\label{eqn:root-2}
          {a_0}^2=2{a_1}^2.
  \end{equation}
Hence ${a_0}^2$ is even, so $a_0$ is even by Lemma~\ref{lem:even-odd}
(since if $a_0$ were odd, then ${a_0}^2$ would be odd\footnote{Here,
  in the notation of \S~\ref{sect:entailment}, we use $\sv P\lto\sv
  Q,\lnot\sv P\lto\lnot\sv Q\models\sv Q\lto\sv P$.});
say $a_0=2a_2$.  
  Then
${a_0}^2=4{a_2}^2$; this, with~\eqref{eqn:root-2}, implies\footnote{The
    properties of equality that allow this conclusion are discussed in
    detail in \cite[Ch.~III, pp.~54--67]{Tarski-Intro}.}
    that
  $2{a_1}^2=4{a_2}^2$, hence
\begin{equation*}\label{eqn:root-2next}
  {a_1}^2=2{a_2}^2.
\end{equation*}
Thus $(a_1,a_2)$ is also a solution of~\eqref{eqn:Dioph-2}.
In short, given the solution $(a_0,a_1)$, we can find a solution
$(a_1,a_2)$.  Continuing, we can
find an integer $a_3$ such that ${a_2}^2=2{a_3}^2$, and so forth.  That
is, there is an infinite sequence
\begin{equation*}
  a_0, a_1, a_2, a_3, \dots
\end{equation*}
of integers $a_k$ such that $(a_k,a_{k+1})$ is a solution
of~\eqref{eqn:Dioph-2} 
for each natural number $k$.  (Strictly, the existence of such a
sequence is only justified by the Recursion
Theorem,\index{recursion!R--- Theorem} which
is~\ref{thm:recursion} below.)
But we may also assume (why?) that each
integer $a_k$ is
\emph{positive.}  Then
\begin{equation*}
  a_0>a_1>a_2>a_3>\dotsb,
\end{equation*}
which is absurd: no such sequence can be infinite, by
Lemma~\ref{lem:inf-desc}.  Therefore such $a_0$ and $a_1$ cannot
exist. 
\end{proof}

\subsection*{Euclidean algorithm}

An alternative proof of the last proposition is given in
\S~\ref{sect:anthyphaeresis} in terms of the
\textsl{Euclidean algorithm}\tindex{Euclidean
  algorithm}\tindexsub{algorithm}{Euclidean ---} for 
finding the greatest common divisor of 
two positive integers.

Suppose $a$ and $b$ are positive integers.  Then there is a unique
natural number $k$ such that
\begin{equation}\label{eqn:goes-into}
  ka\leq b<(k+1)a.
\end{equation}
We say that $k$ is the \textbf{number of times}\dindexsub{number}{--- of
  times}\dindexsub{times}{number of ---} that $a$ goes into $b$.
Then $b-ka$ is the \textbf{remainder}\dindex{remainder} after division of $b$ by $a$.  Let
us denote this remainder by 
\begin{equation*}
\rem ba.
\end{equation*}
So we have $b=ka+\rem ba$ for some integer $k$, and $0\leq\rem ba<a$,
and these rules determine $\rem ba$.

For the sake of completeness, we can extend this analysis to arbitrary
integers.
Every integer $a$ has an \textbf{absolute
  value}\dindex{absolute value}\dindexsub{value}{absolute
  ---}, which is denoted by $\abs
a$\glossary{$\left|a\right|$} and is given by the following rule:
\begin{equation*}\abs a=\begin{cases}a,& \text{ if }0\leq a;\\
-a,& \text{ if }a<0.
\end{cases}\end{equation*}
If $a\neq0$, and $b$ is any integer, then there is a unique natural
 number $\rem ba$ satisfying two requirements:
 \begin{compactenum}
   \item
$0\leq \rem ba<\abs a$;
\item
$b=ka+\rem ba$ for some integer $k$.
 \end{compactenum}
Here $k$ is also uniquely determined.  If $a$ and $b$ are positive,
then $\rem ba$ and $k$ are as before.  We can now say that $a\divides
b$ just in case $\rem ba=0$.

The following is similar to Euclid's Proposition VII.2.  The proof
omits some details; supplying them is left as an exercise.

\begin{proposition}\label{thm:gcd}
Any two integers that are not both zero have a greatest common
divisor.  This divisor is found by alternately replacing each number
with its remainder after division by the other, until one of the
numbers becomes $0$; then the other number is the greatest common
divisor. 
\end{proposition}

\begin{proof}
Let $a$ and $b$ be integers, not both zero.  
If $\abs a=\abs b$, then $\abs a$ is the greatest common divisor of
$a$ and $b$.  Suppose now $\abs b<\abs a$.  We
\textsl{recursively}\tindexsub{recursive}{---ly}
define a sequence of natural numbers 
in the following way.  Let $a_0=\abs a$
  and $a_1=\abs b$.  Suppose
$a_0,\dots,a_{i+1}$ have been defined.  Then let
  \begin{equation*}
    a_{i+2}=
    \begin{cases}
          \rem{a_i}{a_{i+1}},&\text{ if }a_{i+1}\neq0;\\
0,&\text{ if }a_{i+1}=0.
    \end{cases}
  \end{equation*} 
The sequence is strictly decreasing until it reaches $0$; therefore, 
by Lemma~\ref{lem:inf-desc}, the sequence \emph{must}
reach $0$.  Let $c$ be its last non-zero entry.  Then $c$ is
positive and divides each $a_i$; in 
particular, it divides $a$ and $b$.  Also, if
$d\divides a$ and $d\divides b$, then $d$ divides each $a_i$; so
$d\divides c$.  Thus $c$ is the greatest of the common divisors of
$a$ and~$b$.
\end{proof}

The greatest common divisor of $a$ and $b$ can be denoted by
\begin{equation*}
\gcd(a,b).\glossary{$\gcd(a,b)$}
\end{equation*}
The technique of Proposition \ref{thm:gcd} for calculating this number is the
\textbf{Euclidean algorithm.}\dindex{Euclidean
  algorithm}\dindexsub{algorithm}{Euclidean
  ---}\footnote{The word \Eng{algorithm} is an
  `erroneous refashioning' \cite{OED}, apparently influenced by
  \Gk{>arijm'oc}, of the earlier English \Eng{algorism}, which was
  adapted from \Eng{al-Kow\=arasm\=\i}, the surname of Abu Ja'far
  Mohammed Ben Musa, whose work in algebra gave the so-called Arabic
  numerals to Europe.}  A modern formulation of this algorithm is
found in \cite{Dries-Mosch}:
\begin{equation*}
  \gcd(a,b)=
  \begin{cases}
    b,&\text{ if }\rem ab=0;\\
\gcd(b,\rem ab),&\text{ otherwise;}
  \end{cases}
\end{equation*}
assuming $0<b\leq a$.


There is a set of \textbf{real numbers,}\dindexsub{real}{---
  number}\dindexsub{number}{real ---} denoted by
\begin{equation*}
  \R, \glossary{$\mathbb R$}
\end{equation*}
which contains all
of the integers and rational numbers, and more.  The real numbers can
be thought of as corresponding to points on a geometrical line, once
distinct points corresponding to~$0$ and~$1$ are chosen.  Richard
Dedekind \cite[p.~2]{MR0159773} claims to have discovered a rigorous
formulation of this correspondence only in 1858; in
\S~\ref{sect:reals} below is a formal definition of the real numbers based
ultimately on Dedekind's work.
One of the real numbers is a positive number,
denoted by\annot{This number is also written $\sqrt 2$.  However, the
  symbol $\sqrt{\rule{0ex}{1.5ex}\hspace{0.7em}}$ is strictly made up of
  two parts: a 
\textbf{radical,}%
\dindex{radical} 
$\surd$, and a 
\textbf{vinculum,}%
\dindex{vinculum} 
$\overline{\rule{0ex}{1.5ex}\hspace{0.7em}}$. 
  The vinculum serves merely as a grouping-symbol.  So writing $\sqrt
  2$ is like writing $\surd(2)$; that is, the vinculum is
  unnecessary.  Note the properly omitted vincula in the facsimile
  from a 1637 publication at \cite[p.~77]{Descartes-Geometry}.  Note
  also that $\surd(4+5)=\sqrt{4+5}=3$, while $\radix 4+5=7$.}
\begin{equation*}
\radix 2,\glossary{$\sqrt{\rule{0ex}{1.5ex}}2$}
\end{equation*}
whose 
\textsl{square,}%
\tindex{square}
$(\radix 2)^2$, is $2$.  Real numbers that are
not rational are 
\textbf{irrational.}%
\dindex{irrational}%
\dindexsub{number}{irrational ---}
From Proposition \ref{thm:2}
then, we have the following consequence. 

\begin{corollary}\label{cor:root-2}
The real number $\surd 2$ is irrational.
\end{corollary}

I proposed in \S~\ref{sect:language} that
propositions are sentences that, in context, are either true or false.
In Chapter~\ref{ch:logic}, we shall
develop a formal way to 
work with propositions, merely with regard to whether they are true
or false.  (We have already worked with them \emph{informally} in this
way, as in defining $\leq$ on p.~\pageref{page:leq}: the proposition $a\leq b$ is true if and only if one of the propositions $a<b$ and $a=b$ is true.)
Our formal \label{page:01} method will be to think of a true
proposition as having 
the value $1$, and to think of a false proposition as having the
value $0$.  Then we shall be able to do computations involving
these values; we shall have a \textbf{propositional
  calculus.}\dindexsub{proposition}{---al
  calculus}\dindexsub{calculus}{propositional ---}

This is a reason why we looked at the structure $(\Z,+,-,\cdot)$.  In
\S~\ref{sect:connectives}, we shall develop a similar structure, based on the
set $\{0,1\}$ instead of $\Z$.

\subsection*{Exercises}

\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Using Lemma~\ref{lem:inf-desc} (and standard facts about $(\Z,<)$),
prove that every integer different from $1$ and $-1$ has prime factors. 
\item
Suppose $x$ and $p$ are integers, and $p$ is prime.  If $p\divides x$,
prove that $p\ndivides 1+x$.
\item
Use the Euclidean algorithm to find $\gcd(136,-192)$.
\item
Prove that $\radix 3$ is irrational.
\item
Prove that $\radix p$ is irrational, whenever $p$ is prime.
\item
Prove that $\radix n$ is irrational, unless $n$ is a square.
\item
Prove that $\radix[3]2$ is irrational.
\item
In the proof of Proposition~\ref{thm:2}, why may we assume that $a_k>0$?
\item
Supply the missing details of the proof of Proposition~\ref{thm:gcd}; specifically, for all $n$ in $\N$, prove:
\begin{enumerate}
\renewcommand{\labelenumii}{(\theenumii)}
\item
$a_{n+1}<a_n$ if $a_n\neq0$;
\item
$c\divides a_n$;
\item
if $d\divides a$ and $d\divides b$, then $d\divides a_n$.
\end{enumerate}
\end{enumerate}

\section{Excursus on anthyphaeresis}\label{sect:anthyphaeresis}

We have now proved three important propositions about integers.  In this
section, an alternative proof of Proposition~\ref{thm:2}
is developed; a version of this proof may possibly have been known in ancient
times, even before the proof above. 
Suppose $a$, $b$, $c$ and $d$ are integers such that $ad=bc$.
Let us then write\footnote{Why not write $a/b=c/d$?  Just because I
  prefer to work only with integers for now.}
\begin{equation*}
  a:b::c:d\glossary{$a:b::c:d$}
\end{equation*}
and say that \textbf{$a$ is to $b$ as $c$ is to $d$}\dindex{is to\dots
  as \dots is to \dots}.  This expresses
the relation called \textbf{proportionality}\dindex{proportionality} among the four numbers.

\begin{lemma}\label{lem:prop}
  If $a:b::c:d$, and $k$ is an integer, then 
\begin{equation*}
a:b::a-kc:b-kd.
\end{equation*}
\end{lemma}

\begin{proof}
  If $a:b::c:d$, then $ad=bc$, so $ab-kad=ab-kbc$, that is,
  \begin{equation*}
    a(b-kd)=b(a-kc),
  \end{equation*}
so $a:b::a-kc:b-kd$.
\end{proof}

\begin{lemma}\label{lem:div}
  Suppose $a$, $b$, $c$ and $d$ are positive integers such that
  $a:b::c:d$.  Then $b$ goes into $a$ just as many times as $d$ goes
  into $c$.
\end{lemma}

\begin{proof}
  The assumption is that $ad=bc$.  Then $nad=nbc$, that is,
  \begin{equation*}
    a(nd)=(nb)c,
  \end{equation*}
for all natural numbers $n$.  Hence $a<nb$ if and only if $c<nd$, and
$nb\leq a$ if and only if $nd\leq c$.  Consideration of~\eqref{eqn:goes-into} yields the claim.  
\end{proof}

\begin{proposition}
  There are no positive integers $a$ and $b$ such that 
  \begin{equation*}
b:a::a:\rem ba.
  \end{equation*}
\end{proposition}

\begin{proof}
  Suppose $a_0$ and $a_1$ are positive integers, and let
  $a_2=\rem{a_0}{a_1}$.  Suppose if possible
  \begin{equation}\label{eqn:prop}
    a_0:a_1::a_1:a_2.
  \end{equation}
  We shall derive a contradiction.
Now, $a_2<a_1$, so we may assume $a_1<a_0$
(otherwise~\eqref{eqn:prop} is false).  We may also assume
$a_2\neq0$.  By Lemma~\ref{lem:div}, if $a_0=ka_1+a_2$, then
$a_1=ka_2+a_3$, where $a_3=\rem{a_1}{a_2}$; hence, by Lemma~\ref{lem:prop},
\begin{equation*}
  a_0:a_1::a_2:a_3.
\end{equation*}
Thus, applying the Euclidean algorithm yields a strictly decreasing
sequence $a_0$, $a_1$, $a_2$, \dots, such that $a_0:a_1::a_n:a_{n+1}$
for all natural numbers $n$; this is absurd.  Therefore
Proportion~\eqref{eqn:prop} fails.
\end{proof}

For another proof of Proposition \ref{thm:2}, suppose $2a^2=b^2$.  Then
$a^2=b^2-a^2=(b+a)(b-a)$, so
\begin{equation*}
  b+a:a::a:b-a.
\end{equation*}
But also, $a<b<2a$; so $a$ goes into $a+b$ exactly twice, leaving the
remainder $b-a$.  This contradicts the last proposition.

This proof of the irrationality of $\radix 2$ can be recast as a
\emph{positive} result.  Suppose we take two positive real numbers $a_0$
and $a_1$; we can apply a version of the Euclidean algorithm to them
(as Euclid himself does in his Propositions~X.2 and~3).  Then $a_1$ goes into
$a_0$ some number $n_0$ (possibly zero) of
times, leaving a remainder $a_2$; so $0\leq a_2<a_1$.  If $a_2$ is not $0$,
then it goes into $a_1$ some number $n_1$ of times,
leaving a remainder $a_3$; so $0\leq a_3<a_2$.  We can continue this
process of \textbf{alternating subtraction}\dindexsub{alternating}{---
  subtraction}\dindexsub{subtraction}{alternating ---} or
\textbf{anthyphaeresis,}\dindex{anthyphaeresis}\footnote{\Gk{>anjuya'iresic}; see
  \cite[pp.~504--509]{MR13:419a}.} generating a sequence
$a_0$, $a_1$, $a_2$, \dots, possibly finite, of non-negative real numbers,
and a corresponding sequence, $n_0$, $n_1$, \dots, of natural numbers.  Call
the latter sequence the \textbf{anthyphaeretic
  sequence}\dindexsub{sequence}{anthyphaeretic ---} of
$(a_0,a_1)$.  Then we have shown that the anthyphaeretic sequence of
$(1+\radix 2,1)$ is $2$, $2$, $2$, \dots, never ending.

That the Ancients found interest in such sequences can be inferred from
certain old texts: see the brief discussion at
\cite[pp.~508~f.]{MR13:419a}.  In modern notation, we have
\begin{gather*}
a_k=n_k\cdot a_{k+1}+a_{k+2},\\
    \frac{a_k}{a_{k+1}}=n_k+\frac{a_{k+2}}{a_{k+1}}=
    n_k+\cfrac 1{\Bigl(\cfrac{a_{k+1}}{a_{k+2}}\Bigr)},\\
\frac{a_0}{a_1}=n_0+\cfrac 1{n_1+\cfrac 1{n_2+\cfrac 1{n_3+\cfrac1{\ddots}}}}.
\end{gather*}
Thus we can express quotients of real numbers as  \textbf{continued
  fractions.}\dindex{continued fraction}\dindexsub{fraction}{continued ---}
In particular, we have
\begin{equation*}
  \radix 2=1+\cfrac 1{2+\cfrac 1 {2+\cfrac 1{2+\cfrac 1{\ddots}}}},
\end{equation*}
although we cannot here say exactly what this \emph{means.}

\subsection*{Exercises}

\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Give a \emph{geometrical} argument for the incommensurability of the
diagonal and side of a square.  (One way to start is to let $ABCD$ be
a square.  Draw a circle with center $A$ and radius $AC$.  Extend $AB$ to meet
the circle at $E$; extend $BA$ to meet the circle at $F$.  Then
$FB:BC::BC:BE$.) 
\item
The expression for $\radix 2$ as a continued fraction determines a
sequence of rational numbers that approaches $\radix 2$ as a limit.
Calculate a few terms of this sequence, and find a recursive definition of the sequence. 
\end{enumerate}

\section{Parity}\label{sect:parity}

Here I propose one
possible approach to the so-called \textsl{Boolean
  connectives}\tindexsub{Boolean}{---
  connective}\tindexsub{connective}{Boolean ---}, which
will be defined in \S~\ref{sect:connectives}.  I
also give a warning about 
how \emph{not} to write a proof.

Every integer has a \textbf{parity,}\dindex{parity} which is $0$ if the integer is even,
and $1$ if it is odd.  Let the parity of the integer
$x$ be denoted by 
\begin{equation*}
  \parity x.\glossary{$\operatorname{p}(x)$}
\end{equation*}
Some basic facts about evenness and oddness can be expressed in terms
of this:
\begin{lemma}\label{lem:par2}
  The equation
$\parity{x+2}=\parity x$
is an identity.
\end{lemma}

\begin{proof}
  If $a$ is even, then so is $a+2$, so each member of
  the equation is $0$.  If $a$ is odd, then so is $a+2$, so
  each member of the equation is $1$.  Hence
  the equation is satisfied by all integers.
\end{proof}

The taking of parities respects multiplication in the following
sense:
\begin{lemma}\label{lem:par.}
  The equation
$\parity{xy}=\parity x\parity y$
is an identity.
\end{lemma}

Parity respects addition too, but in a more complicated sense:

\begin{lemma}\label{lem:par+}
  The equation
 $\parity{x+y}=\parity{\parity x+\parity y}$
is an identity.
\end{lemma}

Finally, applying the parity-operation twice is
the same as applying it once:\footnote{Therefore parity can be called
  \textbf{idempotent.}\dindex{idempotent}}

\begin{lemma}\label{lem:par-id}
  The equation $\parity{\parity x}=\parity x$
is an identity.
\end{lemma}

I have introduced parity so as to be able to define two new
operations on $\Z$ in the following way.  \emph{By definition} of the
operations $\odot$\glossary{$x\odot y$} and $\oplus$,\glossary{$x\oplus y$}
the following equations are
identities:
\begin{align*}
  x\odot y&=\parity{xy},\\
x\oplus y&=\parity{x+y}.
\end{align*}
These symbols are  not standard, and they will not be used beyond the next section.
I define two more operations.  The following are also
identities, by definition:\glossary{$x\ominus y$}\glossary{$x\sqcup y$}
%\label{defn:lnot}
\begin{align}\label{eqn:lnot}
  \ominus x&=x\oplus 1,\\
x\sqcup y&=(x\odot y)\oplus(x\oplus y).
\end{align}
For $\sqcup$, an alternative (but equivalent) definition is possible:


\begin{theorem}\label{thm:id}
The equation
  \begin{equation}\label{eqn:lor}
    x\sqcup y=\ominus(\ominus x\odot\ominus y)
  \end{equation}
is an identity.
\end{theorem}

There are two ways we can proceed.  

\begin{proof}[Proof 1.]
We reduce everything to the
  ordinary arithmetic operations.  By the definitions and Lemma
  \ref{lem:par+}, we have the following chain of identities:
  \begin{align*}
    x\sqcup y&=(x\odot y)\oplus(x\oplus y)\\
&=\parity{(x\odot y)+(x\oplus y)}\\
&=\parity{\parity{xy}+\parity{x+y}}\\
&=\parity{xy+x+y}.
  \end{align*}
 Similarly,
  \begin{align*}
    \ominus(\ominus x\odot\ominus y)&=((x\oplus 1)\odot(y\oplus
    1))\oplus 1&&\\
&=\parity{\parity{\parity{x+1}\parity{y+1}}+1}&&\\
&=\parity{\parity{\parity{(x+1)(y+1)}}+1}&&\text{[by Lemma \ref{lem:par.}]}\\
&=\parity{\parity{(x+1)(y+1)}+1}&& \text{[by Lemma \ref{lem:par-id}]}\\
&=\parity{\parity{(x+1)(y+1)}+\parity 1}&& \text{[by definition of parity]}\\
&=\parity{(x+1)(y+1)+1}&&\text{[by Lemma \ref{lem:par+}]}\\
&=\parity{xy+x+y+2}&&\text{[by arithmetic]}\\
&=\parity{xy+x+y}&&\text{[by Lemma \ref{lem:par2}]}.
  \end{align*}
Our computations show that $x\sqcup y$ and $\ominus(\ominus x\odot\ominus y)$
are equal to the same thing (namely $\parity{xy+x+y}$); so they are
equal to each other.
This completes one possible proof.
\end{proof}

Alternatively, we show that all that matters is the parities of $x$ and $y$.

\begin{proof}[Proof 2.]
By definition of $\oplus$ and by Lemma \ref{lem:par+}, we have
\begin{equation*}
  \parity x\oplus\parity y=\parity{\parity x+\parity y}=
  \parity{x+y}= x\oplus y.
\end{equation*}
By definition of $\odot$ and by Lemmas~\ref{lem:par.}
and~\ref{lem:par-id}, we have 
\begin{equation*}
  \parity x\odot\parity y=\parity{\parity x\parity y}=
  \parity{\parity{xy}}= \parity {xy}=x\odot y.
\end{equation*}
Therefore, to verify any identity involving only $\odot$ and
$\oplus$ (and operations derived from them, like $\ominus$ and
$\sqcup$), it suffices to replace each variable with its parity.  More
precisely, to verify \eqref{eqn:lor}, it is enough to check the four
possibilities when $x$ and $y$ are chosen from the set $\{0,1\}$.  We
have the following computations:
\begin{equation*}
  \begin{array}{c|c||c|c|c||c|c|c|c}
x & y & x\odot y & x\oplus y & x\sqcup y & \ominus x &
    \ominus y & \ominus x\odot\ominus y & \ominus(\ominus
    x\odot\ominus y)\\ 
    \hline
0&0&0&0&0&1&1&1&0\\
1&0&0&1&1&0&1&0&1\\
0&1&0&1&1&1&0&0&1\\
1&1&1&0&1&0&0&0&1
  \end{array}
\end{equation*}
%\begin{center}
%  \begin{tabular}{c|c|c|c|c|c|c|c|c}
%x & y & x\odot y & x\oplus y & x\sqcup y & \ominus x &
%    \ominus y & \ominus x\odot\ominus y & \ominus(\ominus x\odot\ominus y)\\
%    \hline
%0&0&0&0&0&1&1&1&0\\
%1&0&0&1&1&0&1&0&1\\
%0&1&0&1&1&1&0&0&1\\
%1&1&1&0&1&0&0&0&1
%  \end{tabular}
%\end{center}
The columns headed by the two members of~\eqref{eqn:lor} are
identical, so this equation is an identity.
\end{proof}

\emph{Either} of the two proofs just offered should be sufficient to
establish the theorem as true.  Note well the \emph{format} of the
first proof.  The aim was to arrive at~\eqref{eqn:lor}.  The
proof did not \emph{begin} with this equation; it began with one of
the \emph{members} of the equation, and it showed that this member was equal to a
new term.  Then the \emph{other} member of~\eqref{eqn:lor}
was shown to be equal to the same term. 
To write the proof as follows would \emph{not} be good style:
\begin{equation}\label{eqn:bad-proof}
\begin{aligned}
x\sqcup y&\overset?=\ominus(\ominus x\odot\ominus y),\\
(x\odot y)\oplus(x\oplus y)&\overset?=((x\oplus
1)\odot(y\oplus 1))\oplus 1,\\
\parity{(x\odot y)+(x\oplus y)}&\overset?=
\parity{\parity{\parity{x+1}\parity{y+1}}+1},\quad\\
\dots &\overset?=\dots,\\
\parity{xy+x+y}&= \parity{xy+x+y}.
\end{aligned}
\end{equation}
Do not write proofs this way!  It does
not show the connexion between consecutive 
lines.  Equations~\eqref{eqn:bad-proof} don't tell the
reader that, for example, $\ominus(\ominus x\odot\ominus y)= ((x\oplus
1)\odot(y\oplus 1))\oplus 1$.  In fact, the equations tell us
\emph{nothing} that can be assumed to be correct.

Think of the following example:
\begin{equation}\label{eqn:1=1}
\begin{aligned}
  -1&\overset?=1\\
(-1)^2&\overset?=(1)^2\qquad\\
1&=1.
\end{aligned}
\end{equation}
It certainly does not show that $-1=1$.

If you are \emph{searching} for a proof of
\eqref{eqn:lor}, then you might possibly write something like
Equations \eqref{eqn:bad-proof}.
Then, after you have found a correct line of argument, you should
rewrite your findings before presenting them to 
somebody else as a proof.  The next chapter will make this point again
with the notion of \textsl{formal proof}\tindexsub{formal}{---
  proof}\tindexsub{proof}{formal ---}:
What one writes down when 
\emph{looking} for a formal proof is generally a lot different from
the formal proof itself.

\subsection*{Exercises}
\begin{enumerate}\renewcommand{\labelenumi}{\theenumi.}
\item
  Prove Lemmas~\ref{lem:par.}, \ref{lem:par+} and~\ref{lem:par-id}.
\item
Explain why the equations~\eqref{eqn:1=1} do not constitute a
valid proof of the equation~$-1=1$. 
\item
Suppose $\rightsquigarrow$ is a new arithmetic operation defined on
the set $\{0,1\}$ as follows:
\begin{equation*}
  \begin{array}{c|c||c}
  x&y&x\rightsquigarrow y\\ \hline
  0&0&1 \\
  1&0&1 \\
  0&1&0 \\
  1&1&1
  \end{array}
\end{equation*}
Find an arithmetic term $t$ such that the equation $\parity t=\parity
  x\rightsquigarrow\parity y$ is an identity.
\end{enumerate}



\section{Boolean connectives}\label{sect:connectives}

In memory of George Boole,\footnote{See Boole himself~\cite[III.12, \textbf{[47]}, p.~51]{Boole}.} let us refer to the set
  $\{0,1\}$ as 
  $\B$.  In the last section, I defined some operations that
  convert integers into elements of $\B$.  Considering the elements of
  $\B$ as integers, I want to restrict those
  operations on $\Z$ so as to apply \emph{only} to elements of $\B$.
  In so doing, I change their names:
\glossary{$P\land Q$}\glossary{$P\eor Q$}
  \glossary{$\lnot P$}\glossary{$P\lor Q$}
  \begin{center}
    \begin{tabular}{c|c|c|c|c}
      on $\Z$: & $\otimes$ & $\oplus$ & $\ominus$ & $\sqcup$ \\ \hline
      on $\B$: & $\land$   & $\eor$   & $\lnot$   & $\lor$
    \end{tabular}
  \end{center}
I shall not use the four operations $\otimes$, $\oplus$, $\ominus$
and $\sqcup$ anymore.
Operations on $\B$ can be called \textbf{(Boolean)
  connectives.}\dindexsub{Boolean}{---
  connective}\dindexsub{connective}{Boolean ---}
Specific English names can be given as follows:
\begin{compactenum}[1)]
  \item
$\land$ is \textbf{conjunction;}\dindex{conjunction}
\item
$\lnot$ is \textbf{negation;}\dindex{negation}
\item
$\lor$ is \textbf{(inclusive) disjunction;}\dindex{inclusive
  disjunction}\dindexsub{disjunction}{inclusive ---}
\item
$\eor$ is 
\textbf{exclusive disjunction}%
\dindex{exclusive disjunction}%
\dindexsub{disjunction}{exclusive ---} or
\textbf{(material) non-equivalence.}%
\dindexsub{material}{--- non-equivalence}%
\dindexsub{non-equivalence}{material ---}
\end{compactenum}
Since $\B$ is finite, the definitions of connectives can be given in
tables like the table in the last section:
\begin{align*}
&  \begin{array}{c|c||c||c||c}
P & Q & P\land Q & P\lor Q & P\eor Q\\ \hline
0 & 0 & 0        & 0       & 0\\
1 & 0 & 0        & 1       & 1\\
0 & 1 & 0        & 1       & 1\\
1 & 1 & 1        & 1       & 0  
  \end{array}&&
  \begin{array}{c||c}
P & \lnot P\\ \hline
0 & 1 \\
1 & 0
  \end{array}
\end{align*}
It will be convenient to have two more connectives, namely:
\begin{compactenum}[1)]\setcounter{enumi}4
  \item\label{page:imp}\glossary{$P\lto Q$}
  \textbf{(material) implication}%
\dindexsub{material}{--- implication}%
\dindexsub{implication}{material ---} or the
\textbf{conditional:}%
\dindex{conditional}
$\lto$;   
\item\glossary{$P\liff Q$}
\textbf{(material) equivalence}%
\dindexsub{material}{--- equivalence}%
\dindexsub{equivalence}{material ---} 
or the
\textbf{biconditional:}%
\dindex{biconditional}
$\liff$.
\end{compactenum}
Again the definitions can be given in a table:
\begin{center}
  \begin{tabular}{c|c||c||c}
$P$ & $Q$ & $P\lto Q$ & $P\liff Q$\\ \hline
$0$ & $0$ & $1$        & $1$ \\
$1$ & $0$ & $0$        & $0$ \\
$0$ & $1$ & $1$        & $0$ \\
$1$ & $1$ & $1$        & $1$
  \end{tabular}
\end{center}
Certain identities should be evident:  For example, $P\eor Q$ seems to
mean the same thing as $\lnot(P\liff Q)$.  Here though, we shall
\emph{not} put a sign of equality%
\index{sign of equality}%
\index{equality!sign of ---} between the two expressions.
Rather, as
will be discussed more fully in \S~\ref{equivalent}, we shall write
\begin{equation}\label{eqn:first-sim}
  P\eor Q\sim \lnot(P\liff Q),
\end{equation}
using the \textbf{swung dash}\dindex{swung dash}\dindexsub{dash}{swung
    ---}\dindexsub{symbol}{swung dash}
    $\sim$
rather than the sign $=$ of equality.
Why?  First, by analogy with the definition of arithmetic terms in
\S~\ref{algebra}, we
define \textbf{Boolean terms}\dindexsub{Boolean}{---
  term}\dindexsub{term}{Boolean ---}\label{Boolean-terms} recursively as
follows.  Boolean 
terms are certain strings containing (some of) the following symbols:
\begin{compactenum}[1)]
  \item
$\land$, $\lnot$, $\lor$, $\eor$, $\lto$, $\liff$ (or other connectives,
    should we choose to define them);
\item
the \textbf{constants}\dindex{constant}
$0$ and $1$;
\item
\textbf{variables}\dindex{variable} from the list $P_0$, $P_1$, $P_2$, \dots;
\item
the parentheses $($ and $)$.
\end{compactenum}
Then the Boolean terms are determined by the following rules:
\begin{compactenum}
  \item
Variables and constants are Boolean terms;
\item
If $\sv F$ is a Boolean term, then so is $\lnot {\sv F}$;
\item
If ${\sv F}$ and ${\sv G}$ are Boolean terms, then so is $(\sv F*\sv
G)$, where $*$ is one of the connectives $\land$, $\lor$, $\eor$,
$\lto$, $\liff$. 
\end{compactenum}
Note that the constants $0$ and $1$ can also be considered as Boolean
connectives, since they give values (namely, themselves) in $\B$.

We could now define \Eng{Boolean polynomials,} and we could make from them what
me might call \Eng{Boolean polynomial
equations;} these would be examples of so-called \Eng{Boolean
    formulas.}  We shall 
\emph{not} use such expressions however, since our main interest will
lie in Boolean terms \emph{as such.}  To suggest this, we shall refer to
Boolean terms mainly as \textbf{(propositional)
  formulas.}\dindexsub{proposition}{---al
  formula}\dindexsub{formula}{propositional ---}

As with arithmetic terms, so with propositional formulas, we can
establish a conventional order of operations so as to avoid writing
too many parentheses.  We can always leave out an outer pair of
parentheses.  Then:
%In formulas whose only non-constant connectives are $\land$, $\lnot$,
%$\lor$, $\lto$ and $\liff$: 
\begin{compactenum}[1)]
  \item
$\lnot$ has priority over all other connectives;
\item
$\land$ and $\lor$ have priority over $\lto$, $\liff$, and $\eor$;
\item
in case of two instances of $\lto$, the one on the \emph{right} has
priority---we shall use this convention, because propositional
  formulas like $(P_0\lto(P_1\lto P_2))$ are more common than $((P_0\lto
  P_1)\lto P_2)$; so it will be convenient to let $P_0\lto P_1\lto P_2$
  stand for the \emph{former;}
\item
in case of two instances of $\land$ or of $\lor$ or of $\eor$, the one
on the right
has priority---we could just as well give priority to the one
  on the left; we just want to allow ourselves to let strings like
  $P_0\land P_1\land P_2$ denote Boolean terms.
\end{compactenum}
Also, instead of writing variables $P_k$,
we may use $\sv P$, $\sv Q$ and $\sv R$ instead.  Similarly, we may use letters
like ${\sv F}$, ${\sv G}$ and ${\sv H}$ to stand for formulas.  

The symbols
  $P_0$, $P_1$, 
  and so on are the variables that can appear officially
  in propositional formulas.  The symbols $\sv P$, $\sv Q$ are
  \textbf{syntactical variables;}\dindexsub{syntactic}{---al
    variable}\dindexsub{variable}{syntactic
    ---}\label{note:syntactical} in the sense of
  \cite[\S~08]{MR18:631a}
we use them to \emph{refer} to the variables in formulas.
    Likewise, ${\sv F}$ and so on are not literally
  formulas; we use them as syntactical variables for formulas.

\begin{examples}\label{first-example}
By the order of operations,
  \begin{compactenum}[1)]
    \item
the propositional formula denoted by ${\sv P}\lto {\sv Q}\lor {\sv R}$ is $({\sv P}\lto({\sv Q}\lor {\sv R}))$;
\item
$\lnot {\sv P}\land {\sv Q}$ is $((\lnot {\sv P})\land {\sv Q})$;
\item
${\sv P}\land {\sv Q}\lor {\sv R}$ is ambiguous; the writer must say whether $({\sv P}\land
  {\sv Q})\lor {\sv R}$ or ${\sv P}\land ({\sv Q}\lor {\sv R})$ is intended;
\item
${\sv P}\land {\sv Q}\land {\sv R}$ is $({\sv P}\land({\sv Q}\land {\sv R}))$;
\item
${\sv P}\land {\sv Q}\land {\sv R}\lor {\sv P}$ is ambiguous;
\item
${\sv P}\lto {\sv Q}\lto {\sv R}$ is ${\sv P}\lto({\sv Q}\lto {\sv R})$;
\item
${\sv P}\eor {\sv Q}\eor {\sv R}$ is $({\sv P}\eor({\sv Q}\eor {\sv R}))$;
\item
${\sv P}\lto {\sv Q}\land {\sv R}\lto {\sv S}$ is $({\sv P}\lto (({\sv
  Q}\land {\sv R})\lto {\sv S}))$. 
  \end{compactenum}
\end{examples}

A propositional formula like $0\lto 1$ can be called
\textbf{closed,}\dindex{closed formula}
because it has no variables.  By definition of the connective $\lto$,
this formula $0\lto 1$ has the \textbf{value}\dindex{value}~$1$.  The formulas $0\lto 1$ and~$1$
are not equal \emph{as formulas}; but the former can be considered as
a \textbf{name}\dindex{name} for the latter (considered as an element of $\B$).

Propositional formulas are so defined that every \emph{closed} formula
is the name of a \emph{unique} element of $\B$.  We shall prove this
in \S~\ref{sect:unique}; meanwhile, some applications are in
the following exercises.

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
By the order of operations, which propositional formulas, if any, are denoted by the following?
\begin{enumerate}
\renewcommand{\labelenumii}{(\theenumii)}
  \item
${\sv P}\land\lnot {\sv Q}\eor {\sv R}\lor {\sv P}$;
\item
${\sv P}\lto {\sv Q}\eor {\sv R}$;
\item
$P_0\lto P_1\lto P_2\lto P_3$;
\item
$P_0\lto P_1\lto\dots\lto P_n$.
\end{enumerate}
\item
The following closed formulas are names of which elements of $\B$?
\begin{enumerate}
\renewcommand{\labelenumii}{(\theenumii)}
\item
$1\lto 1\lto 1$,
\item
$1\lto 0\lto 1$,
\item
$(0\lto 1)\liff 1$,
\item
$\lnot(0\eor 1)\liff(0\liff 1)$,
\item
$\lnot\lnot\lnot 0$,
\item
$(1\lor 0)\land 0$,
\item
$1\lor (0\land 0)$.
\end{enumerate}
\end{enumerate}



\section{Propositional formulas and language}\label{sect:p-formulas}

In one sense of the word, a \emph{model}\index{model} is a representation or
description of something that one wants to build or understand.  Think
of an architect's model,\index{architect} or an orrery\index{orrery}
(a model of the solar system).
In this sense, symbolic logic can be seen as a model of ordinary
language.  In propositional logic, the Boolean connectives represent words such as \Eng{and, but, or, if,} and \Eng{not,}
some of which are traditionally called
\textsl{conjunctions}\tindex{conjunction}\dindexsub{word}{conjunction} (\Tur{ba\u gla\c clar}).
Our main interest here is how such words affect the truth of
statements, especially statements in mathematics.  

\subsection*{Truth}

Aristotle defines
truth in the \emph{Metaphysics} (IV, vii, 1: 1011 b 26).  A literal
translation of his words\footnote{The words are in \cite{Aristotle-XVII}:
\Gk{t`o m`en g`ar l'egein t`o >`on m`h e>'inai >`h t`o m`h >`on e>~inai ye~udos, t`o d`e t`o >`on e>~inai ka`i t`o m`h >`on m`h e>~inai >alhj'es} (\Tur{Varl\i\u g\i n var olmad\i\u g\i n\i{} veya varolmayan\i n var oldu\u gunu s\"oylemek yanl\i\c st\i r.  Bunu kar\c s\i l\i k varl\i\u g\i n var oldu\u gunu, var olmayan\i n var olmad\i\u g\i n\i{} s\"oylemek do\u grudur}).} is:
\begin{quote}
  To declare the being not to be, or the not being to be, is
  false;---the being to be, and the not being not to be, is true.
\end{quote}
Alternatively, `It is false to say that what is, is not, or what is not,
is; it is true to say that what is, is, and what is not, is not.'  

I propose
(inspired by Alfred Tarski \cite{Tarski-T&P}) to refine this definition as
follows:  Let $A$ be a statement.  Then:
\begin{quote}
  $A$ is true if $A$, and $A$ is false if not $A$.  
\end{quote}
This is a \emph{definition}; implicitly then, $A$
is true \emph{only} if $A$, and $A$ is false only if not~$A$.  
The definition is obscure.  It becomes slightly less cryptic in an
  example where we can use the typographical convention established in
  the Preface: 
  \begin{verse}
      \Eng{Grass 
  is green} is true if grass is green;\\ 
  \Eng{Grass is green} is false if grass is not green.  
  \end{verse}
Note what
  happens when we translate this into Turkish:  
  \begin{verse}
  {\c Cimen ye\c silse, \Eng{Grass is green} do\u grudur;\\ 
\c cimen ye\c sil de\u gilse, \Eng{Grass is green} yanl\i\c st\i r.}
  \end{verse}

\subsection*{Compound statements}

We can now analyse certain compound statements.  
Let $A$ and $B$ be
statements.

\subsubsection*{Conjunctions, disjunctions, and negations}

  The statement \Eng{$A$ and $B$} is true if and only
if $A$ and
$B$; hence \Eng{$A$ and $B$} is true if and only if $A$ is true and
$B$ is true.  Compare this with the observation that ${\sv P}\land {\sv Q}$ takes
the value $1$ if and only if ${\sv P}$ takes the value $1$ and ${\sv Q}$ takes the
value $1$.  If $1$ represents truth, then the connective $\land$
represents the conjunction \Eng{and}.  The proposition
\Eng{$A$~and~$B$} and the
propositional formula ${\sv F}\land {\sv G}$ can alike be called
\textbf{conjunctions.}\dindex{conjunction}  Note well, however, that
the proposition
\Eng{$A$~and~$B$} belongs to \emph{our} ordinary language, while the
formula ${\sv F}\land {\sv G}$ belongs to propositional logic.

Similarly, \Eng{$A$ or $B$} is true if and only if $A$ is true or $B$
is true.  Also, ${\sv P}\lor {\sv Q}$ takes the value $1$ if and only if ${\sv P}$ takes
the value $1$, or
${\sv Q}$ takes the value $1$.  So the connective $\lor$ represents the
conjunction \Eng{or}.  The
proposition \Eng{$A$ or $B$} and the propositional formula ${\sv F}\lor {\sv G}$
can alike be called \textbf{disjunctions.}\dindex{disjunction}

More precisely, $\lor$ represents \Eng{or} in its
\emph{inclusive} sense.  The
\emph{exclusive} sense of \Eng{or} is 
intended in a sentence like \Eng{You may have tea or coffee after your
meal}, if this means that you are allowed to have tea, and you are
allowed to have coffee, but you are not allowed to have both.  The
exclusive \Eng{or} is represented by the connective $\eor$.

The sentence \Eng{Not-$A$} is true if and only if $A$ is false; and
$\lnot {\sv P}$ takes the value $1$ if and only if ${\sv P}$ takes the value $0$.
If now $0$ represents falsity, then $\lnot$ represents \Eng{not}.
Both \Eng{Not-$A$} and $\lnot {\sv F}$ can be called
\textbf{negations.}\dindex{negation}  (In
fact the negation of an English statement is almost never formed
simply by the prefixing of the word \Eng{not}; the \Eng{not} goes
inside, perhaps with some other changes.)

Mathematics often involves ignoring certain distinctions.  From the
propositions $A$ and $B$, we can form several
compound propositions:
\begin{center}
 \Eng{$A$ and $B$}\\
  \Eng{$A$, but $B$}\\
\Eng{$A$; $B$}
\end{center}
Each of these may have its own rhetorical coloration, but we shall
take them all to
have the same truth-value.  We may use for any of them the
abbreviation
\begin{equation*}
  A\amp B.\glossary{$A\amp B$}
\end{equation*}
(Note the slight typographical distinction between $\&$ and $\land$.)
The
sentence $A\amp B$ here is not a propositional formula; it is just a
proposition or sentence of ordinary language.

\subsubsection*{Implications}

We can form some more compounds, all having the same truth-value:
\begin{center}
  \Eng{If $A$, then $B$}\\
  \Eng{When $A$, then $B$}\\
\Eng{$A$ implies $B$}\\
\Eng{$B$ if $A$}\\
\Eng{$B$, provided $A$}\\
\Eng{$A$ only if $B$}
\end{center}
These can be called \textbf{implications}\dindex{implication} and
\textbf{conditional}\dindex{conditional} 
statements.  Each of them has the \textbf{antecedent}\dindex{antecedent} $A$ and the
\textbf{consequent}\dindex{consequent} $B$.
We shall understand the compounds to be true if $B$ is true or $A$ is
false (or both); otherwise, the compounds are false.  We may use the
abbreviation\glossary{$A\implies B$}
\begin{equation*}
  A\implies B.
\end{equation*}
The
propositional formula ${\sv P}\lto {\sv Q}$ can be analysed similarly, and we can
apply the same terminology. 

The formulation
\Eng{$B$ if $A$} can be understood as emphasizing that $A$ is a 
\textbf{sufficient condition}%
\dindex{sufficient condition}%
\dindexsub{condition}{sufficient ---}
for $B$.
The formulation
\Eng{$A$ only if $B$} emphasizes that $B$ is a 
\textbf{necessary condition}%
\dindex{necessary condition}%
\dindexsub{condition}{necessary ---}
for $A$.


In ordinary language, the
sentence \Eng{If $A$, then $B$}
suggests causation. \Eng{If you drop that {\.Iznik} vase, then it will
break}---you will cause the vase to break by dropping it.  In mathematics
though, the sentence \Eng{If $A$, then $B$} means no more than
\Eng{$B$ is true or 
  $A$ is false.}  This is why the connective $\lto$ is called 
\textbf{material implication;}%
\dindexsub{material}{--- implication}%
\dindexsub{implication}{material ---}%
\footnote{See the discussions in Church \cite[\S~05, n.~89,
    pp.~37f.]{MR18:631a} and Tarski \cite[\S\S~8,~9]{Tarski-Intro};
  but these sources do not discuss the origin of the terminology.} 
it is to be distinguished from 
\textbf{formal implication,}%
\dindexsub{formal}{--- implication}%
\dindexsub{implication}{formal ---}
that is, the implication suggested by a sentence like 
 \Eng{If $A$, then $B$} in ordinary language.
I suggest the following mnemonic device.
In Platonic philosophy, the \emph{form} of something has a higher
level of reality than its \emph{matter.}\footnote{I suspect Descartes
  alludes to this distinction 
  when he says in the third of the \emph{Meditations on First
    Philosophy} (p.~41) 
\begin{quote}
That this idea contains this or that objctive reality rather than some
other one results from the fact that the idea gets its objective [that
  is, material?] reality from a cause in which there is at least as
much formal reality as there is objective reality contained in the
idea. \cite{Descartes-Med} 

Ama bu idea belirli bir nesnel olgusall\i k kapsad\i\u g\i{} i\c cin,
onu hi\c c ku\c skusuz en az\i ndan kendisinin kapsad\i\u g\i{} nesnel
olgusall\i k denli bi\c cimsel olgusall\i k kapsayan bir nedenden
t\"uretiyor olmal\i d\i r. \cite{Descartes-Med-Tur} 
\end{quote}}
In the sentence about a vase, there
is a \emph{formal} connexion between antecedent and consequent: they both
refer to the same vase, for example.  Such a
connexion is missing in a sentence like \Eng{If water is wet, then
  Constantine founded Constantinople}; but we count the sentence as
`materially' true if we accept the consequent as true.  (In this case,
it is irrelevant that the antecedent is true.)\footnote{Elsewhere in
  mathematics, the term \emph{formal} is used to denote what might be
  called a \emph{lower} level of reality than usual.  An expression $a+b$ may
  be called a \emph{formal sum} if $a$ and $b$ cannot `really' be
  added, except to produce the expression $a+b$.}

There is a saying in English, \Eng{If wishes were horses, then beggars would
  ride.}  We cannot analyse this as a material implication, simply
  because the antecedent and consequent are not propositions.  We
  can try to recast the sentence as, \Eng{If wishes are horses, then
  beggars ride.}  Then we can argue that the sentence is true,
  simply because the antecedent is false: wishes are \emph{not}
  horses.  This observation says
  nothing about the truth of the original saying.

In some mathematical writing, one sees statements like
\begin{equation*}
  A\implies B\implies C.
\end{equation*}
This should be understood as an abbreviation for
\begin{equation*}
  (A\implies B)\amp (B\implies C).
\end{equation*}
This conjunction is \emph{not} the same statement as the
implication 
\begin{equation*}
  A\implies(B\implies C), 
\end{equation*}
even though we understand the
formula ${\sv F}\lto {\sv G}\lto {\sv H}$ as an abbreviation for the formula ${\sv F}\lto ({\sv G}\lto
{\sv H})$. 

In mathematics, we often have occasion to write sentences like \Eng{$A$ is true, and therefore $B$ is true,} or more simply, \Eng{$A$, therefore $B$.}  Logically, the truth-value of the sentence is the same as the truth-value of $A\amp B$; so please resist the temptation to write the sentence as $A\implies B$, or as
\begin{gather*}
	\phantom{\implies{}}A\\
	\implies B.
\end{gather*}
Instead of an arrow, just use words, such as \Eng{therefore, hence, consequently,} or \Eng{as a result.}

\subsubsection*{Equivalences}

In ordinary language, we can
write indifferently
\begin{center}
  \Eng{$A$ if and only if $B$}\\
\Eng{$A$ just in case $B$}
\end{center}
These are \textbf{equivalences}\dindex{equivalence} and
\textbf{biconditional}\dindex{biconditional} statements, and
for them we can use the abbreviation\glossary{$A\iff B$}
\begin{equation*}
  A\iff B.
\end{equation*}
The formula ${\sv P}\liff {\sv Q}$ has a similar analysis and description.  In mathematical writing, one may see statements like
\begin{equation*}
A\iff B\iff C;
\end{equation*}
this should be understood an abbreviation for
\begin{equation*}
(A\iff B)\amp (B\iff C).
\end{equation*}

\subsection*{Reasoning with compounds}

Some fundamental rules of reasoning can be abbreviated thus:
\begin{gather}\label{eqn:MP-ord}
  A\amp (A \implies B)\implies B;\\ \label{eqn:not-imp}
\Enot(A\implies B)\iff A\amp\Enot B.
\end{gather}
(We are using a convention like that established in \S~\ref{sect:connectives}:
the expression $\amp$ has priority over $\implies$ and $\iff$.)

The operations of \textbf{conversion}%
\dindex{conversion} and
\textbf{contraposition}%
\dindex{contraposition}%
\dindexsub{proof}{contraposition}
can be performed on implications:
\begin{compactenum}[1)]
  \item
the \textbf{converse}\dindex{converse} of $A\implies B$ is $B\implies A$;
\item
the \textbf{contrapositive}\dindex{contrapositive} of $A\implies B$ is $\Enot B\implies \Enot A$.
\end{compactenum}
The contrapositive of an implication is true if
and only if the original implication is true: 
\begin{equation*}
  (A\implies B)\iff(\Enot B\implies\Enot A).
\end{equation*}
This observation is of
great value in the proving of mathematical propositions.  In
particular, it often allows for proofs that are superior in style to
proofs by contradiction.  The stylistic problem with a proof by contradiction is
that it contains false or even meaningless statements.  The proof may still be correct, but it is inelegant.  For example,
in the proof of the Russell Paradox (Theorem~\ref{thm:Russell}), since
$\Russell$ turns out not to be a set, the expressions
$\Russell\in\Russell$ and $\Russell\notin\Russell$ turn out to be
meaningless.  A proof that avoids this problem is the following. 

\begin{proof}[Alternative proof of the Russell Paradox.]\label{Rus-alt}
Let $A$ be an arbitrary set.  Then either $A\in A$, or $A\notin A$.
If $A\in A$, then $A\notin\Russell$, so $A\neq\Russell$.  If $A\notin
A$, then $A\in\Russell$, so again $A\neq\Russell$.  Thus $A\neq\Russell$.  That is, no set is equal to $\Russell$; so $\Russell$ is not a set. 
\end{proof}

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
  \item
Find a true implication whose converse is true.
  \item
Find a true implication whose converse is false.
\item
Recast all foregoing proofs-by-contradiction to avoid any contradictions.
\end{enumerate}

\section{Quantifiers}\label{sect:quantifiers}

As the Boolean connectives are used to model\index{model} the
conjunctions of
ordinary language, so the symbols called
\textsl{quantifiers}\tindex{quantifier} can be
used to model certain so-called \textsl{determiners,} especially
\Eng{all} and
\Eng{some.}  Quantifiers are a part of \textsl{predicate
  logic.}\tindexsub{predicate}{--- logic}\tindexsub{logic}{predicate
  ---}  
  
In a section of the `XVII. Meditation' of his \emph{Devotions upon Emergent
  Occasions} of 1624, the clergyman and so-called metaphysical
  poet John Donne uses the determiners \Eng{no, every,} and \Eng{any,} in addition to the indefinite article \Eng{a(n).}  The Meditation begins as follows (and here I preserve
  Donne's original spelling and   typography, as found in
  \cite[pp.~440f.]{Donne}): `\textsc{Perchance} hee for whom this
  \emph{Bell} tolls, may be so ill, 
    as that he knowes not it tolls for him;' later, the Meditation
  continues: 
  \begin{quote}
    No man is an \emph{Iland,} intire of it selfe; every man is a
    peece of the \emph{Continent,} a part of the \emph{maine;} if a
    \emph{Clod} bee washed away by the \emph{Sea, Europe} is the
    lesse, as well as if a \emph{Promontorie} were, as well as if a
    \emph{Mannor} of thy \emph{friends} or of \emph{thine owne} were;
    any mans \emph{death} diminishes \emph{me,} because I am involved in
    \emph{Mankinde;} And therefore never send to know for whom the
    \emph{bell} tolls; It tolls for \emph{thee.}
  \end{quote}
This text contains the following three clauses (and now I modernize
the spelling):
\begin{quote}
  No man is an island.\\
  Every man is a piece of the continent.\\
  Any man's death diminishes me.
\end{quote}
The first clause is contradicted some 350 years later by a verse of a
popular song by Simon and Garfunkel \cite{S&G}:
\begin{quote}
  I am a rock, I am an island.
\end{quote}
Donne says that the proposition \Eng{I am an island} is false, no
matter who says it: it is
false that some man is an island.  (I take Donne's man to be a \emph{human
being,} male or female.)  So we can abbreviate the first two
of Donne's clauses above by:
\begin{quote}
  Not-(some $x$ is an island) \&\ (every $x$ is a piece of the
  continent),
\end{quote}
where the variable $x$ is understood to range over humanity.
We can \emph{expand} this to
\begin{quote}
  Not-(there is some $x$ such that $x$ is an island) \&\ (for
  every $x$, $x$ is a piece of the 
  continent).
\end{quote}
The reason for this expansion is that the predicate \Eng{[is] an
  island} might be denoted by $P$, and \Eng{[is] a piece of the
  continent} might be denoted by $Q$.  For the phrase \Eng{there is
  some $x$ such that}, we write
\begin{equation*}
  \exists x;\glossary{$\exists x$}
\end{equation*}
for the phrase \Eng{for all $x$}, we write
\begin{equation*}
  \forall x.\glossary{$\forall x$}
\end{equation*}
Then Donne's two clauses can be written
\begin{equation*}
  \lnot\Exists xPx\amp \Forall xQx.
\end{equation*}
The symbol $\exists$ is the \textbf{existential
  quantifier;}\dindexsub{existential}{---
  quantifier}\dindexsub{quantifier}{existential ---} 
  the symbol
$\forall$ is the \textbf{universal quantifier.}\dindexsub{universal}{---
  quantifier}\dindexsub{quantifier}{universal ---}\footnote{With
    Chiswell and Hodges \cite{MR2319486}, one may prefer to say that the compound
    symbols $\Exists x$ and $\Forall x$ are the quantifiers.}   
  We have just seen that
these correspond respectively to the determiners \Eng{some} and
\Eng{every,} and $\lnot\exists$ corresponds to \Eng{no.}  We shall
discuss, by and~by, what $\lnot\forall$ corresponds to.

Let $\universe$ be some universal set as in \S~\ref{sect:sets}, let $P$ be a predicate, and
let $A$ be the resulting set $\{x\in \universe\colon Px\}$.  We can
form several equations and inequations whose members are $\emptyset$,
$A$ and $\universe$; with quantifiers, we can describe them.
\begin{compactenum}
  \item
$\Forall xPx$ means $A=\universe$.
\item
$\Exists xPx$ means $A\neq\emptyset$.
\item
$\lnot\Exists xPx$ means $A=\emptyset$.
\item
$\lnot\Forall xPx$ means $A\neq\universe$.
\end{compactenum}
The set denoted by
\begin{equation*}
  \{x\in\universe\colon \lnot Px\}
\end{equation*}
consists of those elements of $\universe$ that are \emph{not} in $A$:
it is the set
\begin{equation}\label{eqn:Ac}
  A\comp,\glossary{$A\comp$}
\end{equation}
called the \textbf{complement}\dindex{complement} of $A$ (in $\universe$).  Then we can form more
equations, inequations and propositions on the pattern of those above:
\begin{compactenum}
  \item
$\Forall x\lnot Px$ means $A\comp=\universe$.
\item
$\Exists x\lnot Px$ means $A\comp\neq\emptyset$.
\item
$\lnot\Exists x\lnot Px$ means $A\comp=\emptyset$.
\item
$\lnot\Forall x\lnot Px$ means $A\comp\neq\universe$.
\end{compactenum}
But we have, for example,
\begin{gather*}
    A\comp=\universe\iff A=\emptyset;\\
A\comp\neq\emptyset\iff A\neq\universe.
\end{gather*}
Correspondingly, we also have
\begin{gather}\label{eqn:notE}
  \lnot\Exists x Px\iff\Forall x\lnot Px;\\ \label{eqn:notA}
\lnot\Forall x Px\iff\Exists x\lnot Px.
\end{gather}
These equivalences are valuable tools for understanding propositions
written with quantifiers.

\begin{example}
  In calculus,
\index{calculus!infinitesimal ---}%
\index{infinite!---simal calculus} a
  function $f$ on $\R$ is said to be
\textsl{continuous}%
\tindex{continuous function}
at a real number $a$ if, for every positive real
number $\epsilon$, there is a positive real number $\delta$ such that,
for every real number $x$, if $\abs{x-a}<\delta$, then
$\abs{f(x)-f(a)}<\epsilon$.  In our new symbolism, we can write the
definition as
\begin{equation}\label{eqn:ed}
  \Forall{\epsilon}(\epsilon>0\implies\Exists {\delta}(\delta>0\amp
  \Forall x(\abs{x-a}<\delta\implies\abs{f(x)-f(a)}<\epsilon))).
\end{equation}
Some people abbreviate this proposition to
\begin{equation*}
  \Forall{\epsilon>0}\Exists {\delta>0}
  \Forall x(\abs{x-a}<\delta\implies\abs{f(x)-f(a)}<\epsilon).
\end{equation*}
By~\eqref{eqn:notE} and~\eqref{eqn:notA} above, as well
as~\eqref{eqn:not-imp} in \S~\ref{sect:p-formulas}, along with the proposition that, in $\R$, $x<y$ fails if and only if $x\geq y$,---by all of this, the negation of~\eqref{eqn:ed} is
\begin{equation*}
  \Exists{\epsilon}(\epsilon>0\land\Forall{\delta}(\delta>0\implies
  \Exists x(\abs{x-a}<\delta\land\abs{f(x)-f(a)}\geq\epsilon))),
\end{equation*}
which some people write as
\begin{equation*}
  \Exists{\epsilon>0}\Forall{\delta>0}\Exists
  x(\abs{x-a}<\delta\amp\abs{f(x)-f(a)}\geq\epsilon). 
\end{equation*}
For a specific example, let $f$ be the function given by
\begin{equation*}
  f(x)=
  \begin{cases}
    \sin\displaystyle\frac 1x,&\text{ if }x\neq0;\\
0,&\text{ if }x=0;
  \end{cases}
\end{equation*}
and $a=0$.  We can show that $f$ is not continuous at $a$ as follows.
The function $x\mapsto\sin x$ is periodic, with
period $2\pi$: that is, 
\begin{equation*}
  \Forall x \sin(x+2\pi)=\sin x.
\end{equation*}
Also, $\sin(\pi/2)=1$.  Let $\epsilon=1/2$.  Say $\delta>0$.  There is
some integer $n$
greater than $1/2\pi\delta$.  Then $2n\pi+\pi/2>2n\pi>1/\delta$.  Let
$x=1/(2n\pi+\pi/2)$.  Then $\abs{x-a}=x<\delta$, but
$\abs{f(x)-f(a)}=\abs{f(x)}=\sin(2n\pi+\pi/2)=1 \geq\epsilon$.  This
proves that 
$f$ is not continuous at $0$.
\end{example}

In~\eqref{eqn:ed}, note that the expression 
\begin{equation}\label{eqn:ed-core}
\abs{x-a}<\delta\implies\abs{f(x)-f(a)}<\epsilon
\end{equation}
can be understood as a predicate with \emph{three} subjects---let's call them \textbf{arguments:}\dindex{argument} $x$, $\delta$, and $\epsilon$.  If we wanted to abbreviate~\eqref{eqn:ed-core}, we might write it as $Sx\delta\epsilon$.  Then $S$ is a \textbf{ternary}\dindex{ternary} predicate.  Ternary predicates are common in mathematics, at least implicitly; for example, the equations
\begin{align*}
x+y&=z,&xy&=z
\end{align*}
can be understood as featuring ternary predicates.  The signs $=$ and $<$ are \textbf{binary}\dindex{binary} predicates.
\textbf{Singulary}%
\dindex{singulary}
predicates---predicates that take a single argument---are uncommon in mathematics, though they are needed in general treatments such as ours.

\subsection*{Quantifier elimination and introduction}

Let us return to the general setting where $\universe$ is some set, $P$ is a singulary predicate, and $A=\{x\in\universe\colon Px\}$.  In proofs, there are several moves we might make that involve introducing or eliminating quantifiers from known propositions.
\begin{compactdesc}
\item[$\forall$-elimination:]
If we know $\Forall xPx$, and $b$ is some element of $\universe$, then $b\in A$, so we can conclude
\begin{equation*}
Pb.
\end{equation*}
\item[$\exists$-introduction:]
If $c$ is an element of $\universe$ such that $Pc$, then $c\in A$, so $a\neq\emptyset$, and therefore
\begin{equation*}
\Exists xPx.
\end{equation*}
\item[$\forall$-introduction:]
If $b$ is an \emph{arbitrary} element of $\universe$, and we can show $Pb$, then it must be the case that
\begin{equation*}
\Forall xPx.
\end{equation*}
Of course it is essential that $b$ be \textbf{arbitrary.}\tindex{arbitrary}  This means, in the proof of $Pb$, nothing about $b$ can be used, except its membership in $\universe$.
\item[$\exists$-elimination:]
Suppose $\Exists xPx$.  If, by assuming $Pb$ for some
unknown element of $\universe$, we are able to prove a proposition $\sigma$ that says nothing about $b$, then we can conclude that $\sigma$ is true.
\end{compactdesc}
These rules are illustrated in the next subsection.

\subsection*{Prenex forms}

In compound propositions involving quantifiers, it may be desirable to move all of the quantifiers to the front, in order to better understand the complexity of the proposition, or simply to avoid confusion.  The result is said to be in
\textbf{prenex}\dindex{prenex} form.
For example,~\eqref{eqn:ed} can be rewritten in prenex form as
\begin{equation*}
  \Forall{\epsilon}\Exists {\delta}
  \Forall x(\epsilon>0\implies(\delta>0\land(\abs{x-a}<\delta\implies\abs{f(x)-f(a)}<\epsilon))).
\end{equation*}
This is a consequence of the following lemmas, where
$\sigma$ is a statement and $P$ is a singulary predicate.


\begin{lemma}
$(\sigma\implies\Exists xPx)\iff\Exists x(\sigma\implies Px)$.
\end{lemma}

\begin{proof}
\begin{compactdesc}
\item[($\bm{\Rightarrow}$)]
Suppose $\sigma\implies\Exists xPx$.  We consider two cases.  

Suppose first $\sigma$ is true.  Then so is $\Exists xPx$, and hence for some $a$ in $\universe$ we have $Pa$ and therefore $\sigma\implies Pa$.  By $\exists$-introduction, we can conclude $\Exists x(\sigma\implies Px)$.

On the other hand, if $\sigma$ is false, then for all $a$ in $\universe$, we have $\sigma\implies Pa$.  Since in particular there is \emph{some} $a$ in $\universe$, again by $\exists$-introduction we can conclude $\Exists x(\sigma\implies Px)$.

\item[($\bm{\Leftarrow}$)]
Suppose $\Exists x(\sigma\implies Px)$.  By $\exists$-elimination, $\sigma\implies Pa$ for some $a$ in $\universe$.  If $\sigma$ is false, then $\sigma\implies\Exists xPx$ is true.  If $\sigma$ is true, then so is $Pa$, and therefore $\Exists xPx$ is true; hence also $\sigma\implies\Exists xPx$ is true.\qedhere
\end{compactdesc}
\end{proof}

\begin{lemma}
$(\sigma\amp\Forall xPx)\iff\Forall x(\sigma\amp Px)$.
\end{lemma}

\begin{proof}
\begin{compactdesc}
\item[($\bm{\Rightarrow}$)]
Say $\sigma\amp\Forall xPx$.  Let $a$ be arbitrary.  then $Pa$ (by $\forall$-elimination), so $\sigma\amp Pa$, hence $\Forall x(\sigma\amp Px)$ by $\forall$-introduction (since $a$ was arbitrary).

\item[($\bm{\Leftarrow}$)]
Say $\Forall x(\sigma\amp Px)$.  Let $a$ be arbitrary.  Then $\sigma\amp Pa$ (by $\forall$-elimination), so $\Forall xPx$ by $\forall$-introduction (since $a$ was arbitrary) and hence $\sigma\amp\Forall xPx$.
\qedhere
\end{compactdesc}
\end{proof}

\begin{lemma}\label{lem:imp-all}
$(\sigma\implies\Forall xPx)\iff\Forall x(\sigma\implies Px)$.
\end{lemma}

Now, writing $Sx\delta\epsilon$ for~\eqref{eqn:ed-core}, we can rewrite~\eqref{eqn:ed} as
\begin{gather*}
\Forall{\epsilon}(\epsilon>0\implies\Exists{\delta}(\delta>0\amp\Forall xSx\delta\epsilon)),\\
\Forall{\epsilon}\Exists{\delta}(\epsilon>0\implies(\delta>0\amp\Forall x Sx\delta\epsilon)),\\
\Forall{\epsilon}\Exists{\delta}(\epsilon>0\implies\Forall x(\delta>0\amp Sx\delta\epsilon)),\\
\Forall{\epsilon}\Exists{\delta}\Forall x(\epsilon>0\implies(\delta>0\amp Sx\delta\epsilon)).
\end{gather*}

The various rules must be applied with sensitivity to variables:

\begin{lemma}
$(\Forall xPx\implies\Forall xQx)\iff\Forall y\Exists x(Px\implies Qy)$.
\end{lemma}

\begin{proof}
The following are equivalent.
\begin{align*}
&\Forall xPx\implies\Forall xQx,&&\\
&\Forall x(\Forall xPx\implies Qx),&&\\
&\Forall xPx\implies Qa&&\text{ for arbitrary $a$,}\\
&\lnot Qa\implies\Exists x\lnot Px&&\text{ for arbitrary $a$,}\\
&\Exists x(\lnot Qa\implies\lnot Px)&&\text{ for arbitrary $a$,}\\
&\Exists x(Px\implies Qa)&&\text{ for arbitrary $a$,}\\
&\Forall y\Exists x(Px\implies Qy).&&\qedhere
\end{align*}
\end{proof}

\subsection*{Models}

The assertion that the Diophantine equation $x^2-y^2=(x+y)(x-y)$ is an
identity is the proposition
\begin{equation*}
  \Forall x\Forall y x^2-y^2=(x+y)(x-y),
\end{equation*}
where $x$ and $y$ are understood to range over $\Z$.  To express this
last qualification, we can write\glossary{$\models$}
\begin{equation*}
  \Z\models\Forall x\Forall y x^2-y^2=(x+y)(x-y)
\end{equation*}
(a notation to be developed in \S~\ref{sect:1st}).
The expression $\Z\models\sigma$ can be read as one of
\begin{center}
 \Eng{$\sigma$ is true in $\Z$,}\\
 \Eng{$\Z$ satisfies $\sigma$,}\\
 \Eng{$\Z$ is a model of $\sigma$;}
 \end{center}
  here $\Z$ is the
    \emph{context} in which $\sigma$ is true (see
    \S~\ref{sect:language}).  The symbol $\models$ can be called the
    \textbf{semantic turnstile:}%
\dindexsub{semantic}{--- turnstile}%
\dindexsub{turnstile}{semantic ---} 
\emph{semantic,}
    because it concerns the
    \emph{meaning} of propositions (rather than the form), and
    \emph{turnstile,} because that is roughly what it looks like: a
    gate with a horizontal bar that you can turn away if you are
    allowed to pass (as for example when leaving the METU library).
    The \textsl{syntactic turnstile}%
\tindexsub{syntactic}{--- turnstile}%
\tindexsub{turnstile}{syntactic ---} $\proves$ will be
    introduced in \S~\ref{sect:formal}.

The notation $\Z\nmodels\sigma$ means $\sigma$ is false in $\Z$, that is, $\Z\models\lnot\sigma$.

\begin{example}
The sentence $\Forall x(x\neq0\implies\Exists yxy=1)$ is false in $\Z$, but true in $\Q$, that is,
\begin{align*}
\Z&\nmodels\Forall x(x\neq0\implies\Exists yxy=1),&
\Q&\models\Forall x(x\neq0\implies\Exists yxy=1).
\end{align*}
Hence also $\Z\models\Exists x(x\neq0\land\Forall yxy\neq1)$; for example, $\Z\models(2\neq0\land\Forall y2y\neq1)$.
\end{example}

\subsection*{Ordinary language}

Look again at the equations
\begin{equation*}
  A=\universe,\quad A\neq\emptyset,\quad A=\emptyset,\quad A\neq\universe.
\end{equation*}
These can be verbalized respectively as
\begin{compactenum}[1)]
  \item
{every}thing is in $A$,
\item
{some}thing is in $A$,
\item
{no}thing is in $A$,
\item
{not every}thing is in $A$.
\end{compactenum}
The first three of these clauses are obtained from the clause
  \Eng{thing is in 
  $A$} by adding, respectively, a universal, an existential, and a negative determiner.  The last clause
  needs the addition of \Eng{not every}; alternatively, the clause
  could be written as \Eng{something is not in $A$}.  Apparently, in
  English, there is not a one-word
  expression with the meaning of \Eng{not every} and
  \Eng{some\dots not}. 
  
Some people might
  write the last clause on the list as \Eng{Everything is not in $A$,} or \Eng{All things are not in $A$.}
  For example, there is a saying:
  \begin{center}
    All that glitters is not gold.
  \end{center}
It is pretty clear that what is meant is that \emph{some} things that
glitter are not gold: some shiny attractive things are not
worth much.  But the saying looks as if it could be
written as \Eng{All that glitters fails to be gold.}  This does not
have the intended meaning, since gold itself does glitter.  To avoid
possible misunderstanding, it
seems better to write 
\begin{center}
  Not all that glitters is gold,
\end{center}
with \Eng{not} moved to the beginning.

Turkish avoids the ambiguities possible from a misplaced
\Eng{not}.  In the Antalya \emph{otogar,} I once bought a bag of
bananas with the brand name Asal.  The bag displayed
the slogan
\begin{center}
  Her muz Asal muz de\u gildir.
\end{center}
This should be translated as
  \Eng{Not every banana is a Prime banana}.
According to our understanding, the sentence
\Eng{Every banana is not a Prime banana}
would be rendered in Turkish as
\begin{center}
  Hi\c cbir muz Asal muz de\u gildir.
\end{center}

The words \Eng{a(n)} and \Eng{any} are ambiguous.  If you
say \Eng{A dog has three legs}, you probably mean the \Eng{a}
existentially: there is a dog that has three legs.  But if you say
\Eng{A dog has four legs}, probably you are describing dogs in
general: every dog has four legs.  The sentence \Eng{Anybody can come}
could be a general invitation to everybody, or it could express a
worry over the possibility that somebody will come.

Still, the word \Eng{any} seems useful in ordinary life.  Again, Donne
writes: 
\begin{center}
  Any man's death diminishes me.
\end{center}
Could he write, instead, \Eng{Every man's death diminishes me}?  In a
mathematical context, the \Eng{every} is preferable; but \Eng{every
  man's death} suggests the image of all people dying at once;
\Eng{any man's death} takes the deaths one by one.  

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Prove Lemma~\ref{lem:imp-all}.
\item
Find (with proof) prenex forms for the following:
\begin{enumerate}
\item
$\sigma\amp\Exists xPx$,
\item
$\Forall xPx\implies\sigma$,
\item
$\Exists xPx\implies\sigma$,
\item
$\Exists xPx\implies\Forall xQx$,
\item
$\Forall xPx\implies\Exists xQx$.
\end{enumerate}
\item
Rewrite $\Forall x\Exists yRxy$ in a form that does
not use $\exists$.
\item
Write the negation of $\Exists x(Px\implies\Forall yRxy)$ in prenex form. 
\item
Write the following sentences $\sigma$ in symbolic form, with
quantifiers, and in each case, determine whether $\str
M\models\sigma$, where $\str M$ is $\N$, $\Z$, $\Q$, or $\R$: 
\begin{enumerate}
%\renewcommand{\labelenumii}{\theenumii)}
\item
every number has a square root;
\item
every positive number has a square root;
\item
for all coefficients $b$ and $c$, the equation $x^2+bx+c=0$ has two distinct solutions, provided $b^2\neq 4c$;
\item
there is no least number;
\item
between any two distinct numbers, there is another number.
\end{enumerate}
 \end{enumerate}


%\input{chapter-propositional.tex}

\chapter{Propositional logic}\label{ch:logic}
\setcounter{section}{-1}

\section{Truth-tables}\label{sect:tt}

Propositional formulas were defined in \S~\ref{sect:connectives}.  It
was suggested there that every \emph{closed} propositional formula
${\sv F}$ has a \textbf{value.}\dindex{value} Let us denote this value
by 
\begin{equation*}
  \named {\sv F};\glossary{$\widehat {\sv F}$}
\end{equation*}
it is an element of
$\B$ and can be found in the following way.  First note that ${\sv F}$
meets one of the following conditions:
\begin{compactenum}[1)]
\item
${\sv F}$ is a constant from $\B$ (that is, $0$ or $1$), or
\item
${\sv F}$ is $\lnot {\sv G}$ for some closed formula ${\sv G}$, or
\item
${\sv F}$ is $({\sv G}*{\sv H})$ for some closed formulas ${\sv G}$ and ${\sv H}$, where $*$ is one of the
connectives $\land$, $\lor$, $\lto$, $\liff$, and $\eor$.
\end{compactenum}
Then we can find $\named {\sv F}$ by the following \textbf{recursive}\dindex{recursive}
procedure:
\begin{compactenum}
\item
If ${\sv F}$ is in $\B$, then $\named {\sv F}$ is ${\sv F}$ itself.
\item
If ${\sv F}$ is $\lnot {\sv G}$, then $\named {\sv F}$ is the value of $\lnot\named {\sv G}$
as determined by the table in~\S~\ref{sect:connectives}.
\item
If ${\sv F}$ is $({\sv G}*{\sv H})$, then $\named {\sv F}$ is the value of $\named {\sv G}*\named {\sv H}$
 as determined by the tables in~\S~\ref{sect:connectives}.
\end{compactenum}
In the terminology introduced at the end of \S~\ref{sect:connectives},
${\sv F}$ is a 
\textbf{name}\dindex{name} for $\named {\sv F}$.
It is proved in the next section below that $\widehat {\sv F}$ is
\emph{uniquely} determined by the procedure just given for finding it; we can
then indeed call $\widehat {\sv F}$ the \textbf{value,} or more precisely the
\textbf{truth-value,}%
\dindexsub{truth}{---{}-value}%
\dindexsub{value}{truth-{}---} of ${\sv F}$.  

If a formula is not closed, then it does not have a value in $\B$.  However,
any formula can be made into a closed formula by 
\textsl{substitution}%
\tindex{substitution}
of values for its variables.

For each propositional formula $\sv F$, there is some $n$ in $\N$ such that, for each $k$ in $\N$, if the variable $P_k$ appears in $\sv F$, then $k<n$.
Then we can write ${\sv F}$ as
\begin{equation*}
{\sv F}(P_0,\dots,P_{n-1}),\glossary{${\sv F}(P_0,\dots,P_{n-1})$}
\end{equation*} 
and we may refer to ${\sv F}$ as an
\textbf{$n$-ary}\dindex{nary@$n$-ary}\dindexsub{arity}{nary@$n$-ary}
formula.  A $3$-ary formula is also called
\textbf{ternary;}\dindex{ternary}\dindexsub{nary@$n$-ary}{ternary}\dindexsub{arity}{ternary}
a $2$-ary formula,
\textbf{binary;}\dindex{binary}\dindexsub{nary@$n$-ary}{binary}\dindexsub{arity}{binary}
a $1$-ary formula,  
\textbf{singulary.}\dindex{singulary}\dindexsub{nary@$n$-ary}{singulary}\dindexsub{arity}{singulary}\footnote{The
  word 
  \Eng{unary}\index{unary}\dindexsub{nary@$n$-ary}{unary}\dindexsub{arity}{unary} is often used instead
  of \Eng{singulary}.  Following Quine,\index{Quine} Church\index{Church} \cite[\S~02, p.~12, n.~29]{MR18:631a} suggests 
  \Eng{singulary} as a more etymologically correct word than
  \Eng{unary}.  Indeed, whereas the first five Latin cardinal numbers are
  \Lat{un-}, \Lat{du-}, \Lat{tri-}, \Lat{quattuor}, \Lat{quinque}, the
  first five Latin \emph{distributive} numbers---corresponding to the
  Turkish \Tur{birer}, \Tur{iki\c ser}, \Tur{\"u\c cer},
  \Tur{d\"order}, \Tur{be\c ser} \cite{LatinDili}---are \Lat{singul-},
  \Lat{bin-}, \Lat{tern-}, \Lat{quatern-}, \Lat{quin-}.  It is the
  latter sequence that gives us \Eng{binary} and \Eng{ternary}---also
  \Eng{quaternary} and \Eng{quinary}, if these are desired.  So
  \Eng{singulary} appears to be a better word than \Eng{unary}.  In
  fact, \Eng{singulary} does not appear in the original \emph{Oxford English
Dictionary} \cite{OED}.  The word \Eng{unary} \emph{does} appear in
this dictionary, but it is considered
obsolete: only one use of the word, from 1576, was discovered in
English literature.  There, \Eng{unary} meant \emph{unit}, although
the word \Eng{unit} was not actually invented until 1570, when it was
introduced by [John] Dee\index{Dee, John} to correspond to the Greek \Gk{mon'ad-}.}
  A $0$-ary or \textbf{nullary}\dindex{nullary}\dindexsub{nary@$n$-ary}{nullary}\dindexsub{arity}{nullary}
  formula has 
\emph{no} variables: it is 
\textbf{closed}%
\dindex{closed formula} 
in the sense of
\S~\ref{sect:connectives}.  An $n$-ary formula is also $(n+1)$-ary,
$(n+2$)-ary, and so on. 

\begin{examples}\mbox{}
  \begin{asparaenum}
  \item
Suppose ${\sv F}$ is $P_0\land P_1\lto P_0\lor P_1$ (that is, $((P_0\land
P_1)\lto(P_0\lor P_1))$, according to the convention established in
\S~\ref{sect:connectives}).   Then ${\sv F}$ is binary and can be described as
\begin{equation*}{\sv F}(P_0,P_1).\end{equation*}
It can also be considered as the ternary formula ${\sv F}(P_0,P_1,P_2)$, but
\emph{not} as the singulary ${\sv F}(P_0)$. 
\item
  By the convention established here, the formula
  $P_4\lor P_{21}$ is $22$-ary and $175$-ary; it is not $21$-ary, much
  less binary. 
  \end{asparaenum}
\end{examples}

If ${\sv F}$ is an $(n+1)$-ary formula, then it can be converted to an
$n$-ary formula in two different ways by \textbf{substitution.}\dindex{substitution}  Indeed,
if $e$ is one of the two elements of~$\B$, then each occurrence of the
variable $P_n$ in ${\sv F}$ can be 
replaced with $e$; all the remaining variables of ${\sv F}$ belong to
$\{P_0,\dots,P_{n-1}\}$, so ${\sv F}$ has become $n$-ary.
In turn, other elements of $\B$ can be substituted 
for other variables in ${\sv F}$, so that, in the end, a closed
formula results.

In general, if ${\sv F}$ is an $n$-ary formula, and $(e_0,\dots,e_{n-1})$ is
a list of $n$ elements of~$\B$, then there is a closed formula
\begin{equation*}
  {\sv F}(e_0,\dots,e_{n-1}),\glossary{${\sv F}(e_0,\dots,e_{n-1})$}
\end{equation*}
which is the result of subtituting $e_k$ for $P_k$ in ${\sv F}$ for each $k$
that is less than $n$.
The list $(e_0,\dots,e_{n-1})$ can be called an
\textbf{$n$-tuple}%
\dindex{ntuple@$n$-tuple}%
\dindexsub{tuple}{n-{}---@$n$-{}---}
from
$\B$ and can be abbreviated by
\begin{equation*}
\tuple e.\glossary{$\vec e$}
\end{equation*}
(The definition
of {$n$-tuple} will be refined in \S~\ref{sect:relations}.)   Here the
tuple $\tuple e$ is an $n$-ary
\textbf{truth-assignment}%
\dindexsub{truth}{---{}-assignment}\dindexsub{assignment}{truth-{}---}
(or a truth-assignment for the $n$-ary formula ${\sv F}$).
The truth-value of
${\sv F}(\tuple e)$ can be denoted by\footnote{The notation is from
  \cite[Definition~2.1.8, p.~41]{Burris}.} 
\begin{equation*}
  \named {\sv F}(\tuple e).
\end{equation*}

\begin{example}
Again suppose ${\sv F}$ is $P_0\land P_1\lto P_0\lor P_1$; consider this as
${\sv F}(P_0,P_1)$.  If $\tuple e=(0,1)$, then ${\sv F}(\tuple e)$ is $0\land 1\lto
0\lor 1$; the value of this is the value of $0\lto 1$, which is $1$.
That is, $\named {\sv F}(0,1)=1$.
\end{example}

A
\textbf{truth-table}\dindexsub{truth}{---{}-table}\dindexsub{table}{truth-{}---}
is a list of the values attained by a propositional
formula under its possible truth-assignments.  If a formula is
$n$-ary, then its truth-table has $n+1$ columns: a column for each
variable, and one column for the formula itself; also, aside from the
headings of the columns, the table must have $2^n$ rows.

\begin{example}
Truth-tables defining certain connectives were given in
\S~\ref{sect:connectives}. 
\end{example}

If $k<2^n$, then 
\begin{equation*}
k=e^k_0+2e^k_1+4e^k_2+\dotsb+2^{n-1}e^k_{n-1}=\sum_{j<n}2^je^k_j
\end{equation*}
for some $e^k_j$ in $\B$; that is, $k$ is $e^k_{n-1}e^k_{n-2}\cdots e^k_1e^k_0$ in binary notation.  Then the truth-table for an arbitrary $n$-ary formula ${\sv F}(P_0,\dots,P_{n-1})$ has the form of
\begin{equation*}
\begin{array}{c|c|c|c|c||c}
 P_0 & P_1 & P_2 & \cdots & P_{n-1} & {\sv F}\\ \hline \rule{0pt}{3ex}
 0   & 0   & 0   & \cdots & 0       & \named{{\sv F}}(0,0,0,\dots,0)\\ \rule{0pt}{3ex}
 1   & 0   & 0   & \cdots & 0       & \named{{\sv F}}(1,0,0,\dots,0)\\ \rule{0pt}{3ex}
 0   & 1   & 0   & \cdots & 0       & \named{{\sv F}}(0,1,0,\dots,0)\\ \rule{0pt}{3ex}
 1   & 1   & 0   & \cdots & 0       & \named{{\sv F}}(1,1,0,\dots,0)\\ \rule{0pt}{3ex}
 0   & 0   & 1   & \cdots & 0       & \named{{\sv F}}(0,0,1,\dots,0)\\ \rule{0pt}{3ex}
 \vdots & \vdots & \vdots& \cdots & \vdots& \vdots \\ \rule{0pt}{3ex}
 e^k_0 & e^k_1 & e^k_2 & \cdots & e^k_{n-1} & \named{{\sv F}}(e^k_0,e^k_1,e^k_2,\dots,e^k_{n-1})\\ \rule{0pt}{3ex}
 \vdots & \vdots & \vdots& \cdots & \vdots& \vdots
\end{array}
\end{equation*}

To be able to \emph{compute} the truth-table of a formula, we need to
know the truth-tables of the \textsl{proper
  sub-formula}s of the given
formula.  The
\textbf{sub-formulas}\dindexsub{sub}{---{}-formula}\dindexsub{formula}{sub-{}---}
of a formula are determined by the following conditions:
\begin{compactenum}
  \item
${\sv F}$ is a sub-formula of itself.
\item
${\sv F}$ is a sub-formula of $\lnot {\sv F}$.
\item
${\sv F}$ and ${\sv G}$ are sub-formulas of $({\sv F}*{\sv G})$
(where $*$ is $\land$, $\lor$, $\lto$, $\liff$ or $\eor$; remember that,
  by the convention established in \S~\ref{sect:connectives}, ${\sv F}$ and
  ${\sv G}$ here are not just strings, but \emph{formulas}).
\item
Every sub-formula of a sub-formula of ${\sv F}$ is a sub-formula
of ${\sv F}$.
\end{compactenum}
A sub-formula of ${\sv F}$ is a \textbf{proper sub-formula}\dindexsub{proper}{---
  sub-formula}\dindexsub{sub}{proper
  ---{}-formula}\dindexsub{formula}{sub-{}---} if it is not ${\sv F}$
itself. 

The sub-formulas of a given formula can be arranged in a tree.  For
example, the sub-formulas of $P_0\lor\lnot P_0$ are the nodes of the
following tree:
\begin{equation*}
  \xymatrix@!0{
 & *+[F]{P_0\lor \lnot P_0} \ar@{-}[dl] \ar@{-}[dr]  &&\\
*+[F]{P_0}& & *+[F]{\lnot P_0} \ar@{-}[dr] &\\
 & &                     & *+[F]{P_0}
}
\end{equation*}
The sub-formulas of $P_0\lor\lnot P_0$ are thus
$P_0$, $P_0\lor \lnot P_0$ itself, $\lnot P_0$, and $P_0$ again.  I write $P_0$
twice because it appears twice as a sub-formula of $P_0\lor\lnot P_0$.
However, we can give the truth-table for $P_0\lor\lnot P_0$ (along with an
extra column for our computations) thus:
\begin{center}
  \begin{tabular}{c|c|c}
    $P_0$ & $\lnot P_0$ & $P_0\lor\lnot P_0$\\ \hline
$0$ & $1$ & $1$\\
$1$ & $0$ & $1$
  \end{tabular}.
\end{center}
Alternatively, we can include a column for each sub-formula (even if
it is the same as another sub-formula):
\begin{center}
  \begin{tabular}{c|c|c|c}
    $P_0$ & $P_0\lor\lnot P_0$ & $\lnot P_0$ & $P_0$\\ \hline
$0$ & $1$ & $1$ & $0$\\
$1$ & $1$ & $0$ & $1$
  \end{tabular}.
\end{center}
Why would we do this?
The sub-formulas of any formula are in one-to-one correspondence%
\index{one-to-one!--- correspondence}%
\index{correspondence!one-to-one ---} 
with the variables and the connectives in the formula (that is, there
is a bijection between them, in the sense of \S~\ref{sect:functions}).
Indeed, compare the previous tree with the following:
\begin{equation*}
  \xymatrix@!0{
 & *+[F]{\lor} \ar@{-}[dl] \ar@{-}[dr]  &&\\
*+[F]{P_0}& & *+[F]{\lnot} \ar@{-}[dr] &\\
 & &                     & *+[F]{P_0}
}
\end{equation*}
We have the following correspondence between sub-formulas and symbols:
\begin{center}
  \begin{tabular}{rcl}
$P_0$            & $\leftrightsquigarrow$ & $P_0$\\ 
$P_0\lor\lnot P_0$ & $\leftrightsquigarrow$ & $\lor$\\ 
$\lnot P_0$      & $\leftrightsquigarrow$ & $\lnot$\\ 
$P_0$            & $\leftrightsquigarrow$ & $P_0$
  \end{tabular}
\end{center}
Using this correspondence, we can rewrite the last truth-table thus:
\begin{equation*}
  \begin{array}{c|c|c|c}
    P_0 & \lor & \lnot & P_0\\ \hline
0&1&1&0\\
1&1&0&1
  \end{array}
\end{equation*}
I propose to call this the 
\textbf{\full{} truth-table}%
\dindexsub{truth}{full ---{}-table}%
\dindexsub{full}{--- truth-table}%
\dindexsub{table}{full truth-{}---} of $P_0\lor\lnot 
P_0$; from it we can extract the 
\textbf{proper truth-table}%
\dindexsub{proper}{--- truth-table}%
\dindexsub{truth}{proper ---{}-table}%
\dindexsub{table}{proper truth-{}---} of $P_0\lor\lnot
P_0$ by taking
only one column headed by $P_0$, along and the
column headed by
$\lor$ (which corresponds to the whole formula):
\begin{equation*}
  \begin{array}{c||c}
    P_0 & P_0\lor\lnot P_0\\ \hline
0&1\\ \hline
1&1
  \end{array}
\end{equation*}

For another example, let ${\sv F}$ be the formula $P_0\lto \lnot P_1\lor P_2$.  The
sub-formulas of~${\sv F}$ compose the tree
\begin{equation*}
\xymatrix@!0{
  & *+[F]{P_0\lto \lnot P_1\lor P_2}\ar@{-}[dl] \ar@{-}[drrr]&&&&\\
*+[F]{P_0}&&&&*+[F]{\lnot P_1\lor P_2}\ar@{-}[dll] \ar@{-}[dr] &\\
&& *+[F]{\lnot P_1}\ar@{-}[dr] &&&*+[F]{P_2}\\
&&&*+[F]{P_1}&&
}
\end{equation*}
The corresponding tree of variables and connectives is:
\begin{equation*}
\xymatrix@!0{
  & *+[F]{\lto}\ar@{-}[dl] \ar@{-}[drrr]&&&&\\
*+[F]{P_0}&&&&*+[F]{\lor}\ar@{-}[dll] \ar@{-}[dr] &\\
&& *+[F]{\lnot}\ar@{-}[dr] &&&*+[F]{P_2}\\
&&&*+[F]{P_1}&&
}
\end{equation*}
From this we can get the \full{} truth-table as described below.
This table itself is:
\begin{equation*}
\begin{array}{c|c|c|c|c|c}
 P_0&\lto&\lnot&P_1&\lor&P_2\\ \hline
 0&1&1&0&1&0\\
 1&1&1&0&1&0\\
 0&1&0&1&0&0\\
 1&0&0&1&0&0\\
 0&1&1&0&1&1\\
 1&1&1&0&1&1\\
 0&1&0&1&1&1\\
 1&1&0&1&1&1
\end{array}.
\end{equation*}
We can construct this in stages, working our way through the trees
drawn above, starting with the variables:
\begin{align*}
&\begin{array}{c|c|c|c|c|c}
 P_0&\lto&\lnot&P_1&\lor&P_2\\ \hline
 0&&&0&&0\\
 1&&&0&&0\\
 0&&&1&&0\\
 1&&&1&&0\\
 0&&&0&&1\\
 1&&&0&&1\\
 0&&&1&&1\\
 1&&&1&&1
\end{array},
&&
\begin{array}{c|c|c|c|c|c}
 P_0&\lto&\lnot&P_1&\lor&P_2\\ \hline
 0&&1&0&&0\\
 1&&1&0&&0\\
 0&&0&1&&0\\
 1&&0&1&&0\\
 0&&1&0&&1\\
 1&&1&0&&1\\
 0&&0&1&&1\\
 1&&0&1&&1
\end{array},
\end{align*}
then
\begin{equation*}
\begin{array}{c|c|c|c|c|c}
 P_0&\lto&\lnot&P_1&\lor&P_2\\ \hline
 0&&1&0&1&0\\
 1&&1&0&1&0\\
 0&&0&1&0&0\\
 1&&0&1&0&0\\
 0&&1&0&1&1\\
 1&&1&0&1&1\\
 0&&0&1&1&1\\
 1&&0&1&1&1
\end{array}
\end{equation*}
and finally the complete table given earlier.
The column giving the values of ${\sv F}$ itself is the last to be
filled in: in this case, the second column, under $\lto$.
The \emph{proper} truth-table for ${\sv F}$ is then
\begin{equation*}
\begin{array}{c|c|c||c}
 P_0 & P_1 & P_2 & {\sv F}\\ \hline
 0&0&0&1\\
 1&0&0&1\\
 0&1&0&1\\
 1&1&0&0\\
 0&0&1&1\\
 1&0&1&1\\
 0&1&1&1\\
 1&1&1&1
\end{array}.
\end{equation*}

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item\label{exer:tt}
Write \full{} truth-tables and proper truth-tables for the formulas:
\begin{enumerate}
\renewcommand{\labelenumii}{(\theenumii)}
\item
$P_0\lto P_1\lto P_0$;
\item
$P_0\land P_1\land P_2$;
\item
$P_0\eor P_1\eor P_2$;
\item
$(P_0\lto P_1\lor P_2)\lto\lnot P_0\lor P_1$;
\item
 $(P_0\lto P_1\lor\lnot P_2)\land(P_1\lto P_0\land P_2)\lto P_0\lto P_2$;
\item
$\lnot(\lnot P_2\lto P_0\lto\lnot(P_2\lto P_1))$.
\end{enumerate}
How many columns has each table?
\item
What does the truth-table for a nullary (closed) formula look like?
\item
For each $n$ in $\N$, describe the $n$-ary formulas whose full
truth-tables have fewer columns than their proper truth-tables.
\end{enumerate}

\section{Unique readability}\label{sect:unique}

We have to justify our definition of $\named {\sv F}$ for closed formulas
${\sv F}$: that is, we have to confirm that only one value of $\named{\sv F}$ can be computed
for each ${\sv F}$.

We have called a propositional formula $n$-ary if its variables are
among the first $n$ variables on the list $(P_0,P_1,P_2,\dots)$.  The
notion of \textbf{arity}\dindex{arity} applies to connectives themselves:
\begin{compactenum}
  \item
$\land$, $\lor$, $\lto$, $\liff$ and $\eor$ are
    \textbf{binary}\dindex{binary}\dindexsub{arity}{binary}\dindexsub{nary@$n$-ary}{binary},
    because 
    they are used to join \emph{two} formulas.
\item
$\lnot$ is
  \textbf{singulary}\dindex{singulary}%
\dindexsub{arity}{singulary}%
\dindexsub{nary@$n$-ary}{singulary}. 
\item
The constants $0$ and $1$ are
\textbf{nullary.}%
\dindex{nullary}%
\dindexsub{arity}{nullary}%
\dindexsub{nary@$n$-ary}{nullary}
\end{compactenum}
Although, by our convention, an $n$-ary formula is also $(n+1)$-ary, a
connective has a unique arity: since $\lnot$ is singulary, it is not
binary. 

The formulas joined by a connective in a formula are the
\textbf{arguments}\index{argument} of the connective.  In the formula
\begin{equation*}
  {\sv P}\lto\lnot {\sv Q}\land 1
\end{equation*}
(which stands for $({\sv P}\lto(\lnot {\sv Q}\land 1))$),
the arguments of $\lto$ are ${\sv P}$ and $\lnot {\sv Q}\land 1$ (in that order);
the arguments of $\land$ are $\lnot {\sv Q}$ and $1$; the argument of
$\lnot$ is ${\sv Q}$; and $1$ has no argument.

By definition, each propositional formula ${\sv F}$ meets one of the
following conditions:
\begin{compactenum}[1)]
  \item
${\sv F}$ is a variable;
\item
${\sv F}$ is a nullary connective;
\item
${\sv F}$ is $\lnot {\sv G}$ for some ${\sv G}$;
\item
${\sv F}$ is $({\sv G}*{\sv H})$ for some ${\sv G}$ and ${\sv H}$ and some binary connective $*$. 
\end{compactenum}
It is obvious that ${\sv F}$ can meet \emph{only} one of these conditions.
It is \emph{not} obvious that a formula $({\sv G}*{\sv H})$ cannot also be written
$({\sv G}'*'{\sv H}')$, where ${\sv G}'$ is a \emph{different} formula from ${\sv G}$.

Let ${\sv G}$ be $(\sv P\land {\sv Q})$, and let ${\sv H}$ be ${\sv R}$.  Then
$({\sv G}\lor {\sv H})$ is $((\sv P\land\sv Q)\lor\sv R)$, which
can be written as $(\sv U\land \sv V)$, where $\sv U$ is $(\sv P$, and $\sv V$
is ${\sv Q})\lor {\sv R}$.  But $\sv U$ is not a formula (why not?); neither is $\sv V$.

How do we know that, if ${\sv G}$ and ${\sv H}$ are more complicated, $({\sv G}*H)$
\emph{still} cannot be analyzed as a different application of a binary
connective?   How do we know that $({\sv G}*{\sv H})$ is \textbf{uniquely
  readable?}\dindexsub{unique}{---ly readable}
Our definition of $\named {\sv F}(\tuple e)$ requires unique readability.
To \emph{prove} unique readability; we can use the notion of an
\textsl{initial segment} of a formula.

Every formula is a string of symbols, written left to right.  If we
cut the string, then it is divided into two segments: an
\textbf{initial}%
\dindex{initial segment}%
\dindexsub{segment}{initial ---} 
and a 
\textbf{final}%
\dindex{final segment}%
\dindexsub{segment}{final ---} 
segment.  I allow the cut to come 
at an end: that is, I allow one of the two segments to be empty, so
that the other segment is the whole string:

\begin{example}
  The initial segments of $({\sv P}\lor\lnot {\sv P})$ are  $({\sv P}\lor\lnot {\sv P})$
  itself,  $({\sv P}\lor\lnot {\sv P}$,  $({\sv P}\lor\lnot{}$,  $({\sv P}\lor{}$,
  $({\sv P}$,  $($, and the empty string.
\end{example}

An initial segment of ${\sv F}$ that is not ${\sv F}$ itself is a \textbf{proper
  initial segment}\dindexsub{proper}{--- initial
  segment}\dindexsub{segment}{proper initial ---} of~${\sv F}$.

\begin{lemma}\label{lem:()}
\mbox{}
\begin{compactenum}
\item\label{item:fLR}
  Every propositional formula has \emph{just as many} left parentheses as right
  parentheses.
\item
If ${\sv F}$ is a variable, a constant, or a negation, then every
  initial segment of ${\sv F}$ has \emph{at least as many} left parentheses as
  right parentheses.
\item
  If ${\sv F}$ is a propositional
  formula that is not a variable, a 
  constant, or a negation, then every non-empty proper initial segment
  of ${\sv F}$ has \emph{more} left parentheses
  than right parentheses.
\end{compactenum}
\end{lemma}

\begin{proof}
  To prove the first claim, follow the pattern of
  Proposition~\ref{first-proof}.  

To prove the second and third claims, let $A$
  be the set of formulas ${\sv F}$ that do satisfy those claims.
Then, trivially, $A$ contains all variables and constants.  If $A$
  contains ${\sv F}$, then ${\sv F}$ has at least as many left as right
  parentheses, hence so does $\lnot {\sv F}$, which is a negation, so $\lnot {\sv F}$ is in $A$.
Finally, suppose $A$ contains ${\sv F}$
  and ${\sv G}$, and $*$ is a binary connective.  Every non-empty proper
  initial segment of
  $({\sv F}*{\sv G})$ is either $({\sv F}*{\sv U}$ for some initial segment ${\sv U}$ of ${\sv G}$, or $({\sv V}$
  for some initial segment ${\sv V}$ of ${\sv F}$.  But then ${\sv U}$ and ${\sv V}$ must have
  \emph{at least} as many left as right parentheses, since ${\sv F}$ and ${\sv G}$
  are in $A$; so $({\sv F}*{\sv U}$ and
  $({\sv V}$ have \emph{more} left than right parentheses.  Therefore
  $({\sv F}*{\sv G})$ is in $A$.  By the recursive definition of propositional
  formulas, $A$ contains all propositional formulas.
\end{proof}


\begin{lemma}\label{lem:pis}
  No proper initial segment of a propositional formula is a
  propositional formula.
\end{lemma}

\begin{proof}
Let $A$ comprise all formulas ${\sv F}$ such that no proper initial segment
of ${\sv F}$ is a formula.  Then $A$ contains all variables and constants.
Suppose $A$ contains ${\sv F}$, and $\sv U$ is an initial segment of $\lnot {\sv F}$
that is a formula.  Then $\sv U$ is $\lnot\sv V$ for some initial segment $\sv V$ of
${\sv F}$ that is also a formula; so $\sv V$ is ${\sv F}$; hence $\sv U$ is $\lnot {\sv F}$.
Therefore $\lnot {\sv F}$ is in $A$.

Finally, suppose ${\sv F}$ and ${\sv G}$ are in $A$, and $*$ is a binary
connective.  Every proper initial segment of $({\sv F}*{\sv G})$ is either empty
or has more left than right parentheses, by Lemma~\ref{lem:()}, so it
is not a formula.  Thus $({\sv F}*{\sv G})$ is in $A$.  By definition of
propositional formulas, $A$ contains all of them.
\end{proof}


An alternative proof of this lemma is by the method of \textbf{infinite
  descent:}\dindexsub{infinite}{---
  descent}\dindexsub{descent}{infinite ---}\dindexsub{method}{--- of
  infinite descent} that
is, it relies on something like
Lemma~\ref{lem:inf-desc}.  Suppose some proper initial segment of a
formula is also a formula.  Then the original formula is either $\lnot
{\sv F}$ or $({\sv F}*{\sv G})$.  If it is $\lnot {\sv F}$, then its proper initial segment is
  $\lnot {\sv F}'$, where ${\sv F}'$ is a formula that is a proper initial segment
  of ${\sv F}$.  If the original formula is $({\sv F}*{\sv G})$, then its proper initial
  segment must have the form $({\sv F}'*'{\sv G}')$, and then there are two
  possibilities:
  \begin{compactenum}[1)]
    \item
one of ${\sv F}$ and ${\sv F}'$ is a proper initial 
segment of the other, or
\item
${\sv F}$ and ${\sv F}'$ are the same formula, and ${\sv G}'$ is a
  proper initial segment of ${\sv G}$. 
  \end{compactenum}
Thus, for every formula with a proper initial
segment that is a formula, there is a \emph{shorter} formula with the
same property.  In this way, we get an infinite sequence of formulas,
each one strictly shorter then the preceding, which is absurd.

\begin{theorem}[Unique Readability]\label{thm:UR}
If $({\sv F}*{\sv G})$ and
$({\sv F}'*'{\sv G}')$ are the same propositional formula, then
${\sv F}$ and ${\sv F}'$ are the same (hence $*$ is $*'$, and ${\sv G}$ is ${\sv G}'$).
\end{theorem}

\begin{proof}
  If  $({\sv F}*{\sv G})$ and
$({\sv F}'*'{\sv G}')$ are the same formula, then one of ${\sv F}$ and ${\sv F}'$ is an
  initial segment of the other, so they are the same by
  Lemma~\ref{lem:pis}. 
\end{proof}

Now we know that $\named {\sv F}(\tuple e)$ is well defined, so truth-tables
are uniquely determined.

It may seem as if parentheses are required to ensure unique readability.
We do have a convention that allows us to dispense with some parentheses:
we can write ${\sv P}\lto {\sv Q}\lto {\sv R}$ for $({\sv P}\lto ({\sv Q}\lto {\sv R}))$.  But we cannot
dispense with the parentheses in $({\sv P}\lto {\sv Q})\lto {\sv R}$, unless we come up with
a completely new system of notation.

\subsection*{Polish notation}

When we move into a second dimension and write formulas as
trees, then 
\begin{compactenum}[1)]
  \item
${\sv P}\lto {\sv Q}\lto {\sv R}$ becomes 
$\xymatrix@!0{
        & *+[F]{\lto} \ar@{-}[ddl] \ar@{-}[drr]  &&&\\
        &          && *+[F]{\lto} \ar@{-}[dl] \ar@{-}[dr]&\\
*+[F]{{\sv P}}&          & *+[F]{{\sv Q}}&& *+[F]{{\sv R}}}$
\item
$({\sv P}\lto {\sv Q})\lto {\sv R}$ becomes
  $\xymatrix@!0{
     &&& *+[F]{\lto} \ar@{-}[dll] \ar@{-}[ddr]  &\\
        & *+[F]{\lto} \ar@{-}[dl] \ar@{-}[dr]&&&\\
*+[F]{{\sv P}}&          & *+[F]{{\sv Q}}&& *+[F]{{\sv R}}}$
\end{compactenum}
The arrangement of the branches takes the place of parentheses.  Now
convert the trees back into strings, but write the symbols in the
following orders, respectively:
\begin{equation*}
  \xymatrix@!0{
        & *+[F]{0} \ar@{-}[ddl] \ar@{-}[drr]  &&&\\
        &          && *+[F]{2} \ar@{-}[dl] \ar@{-}[dr]&\\
*+[F]{1}&          & *+[F]{3}&& *+[F]{4}}
\qquad\qquad
\xymatrix@!0{
     &&& *+[F]{0} \ar@{-}[dll] \ar@{-}[ddr]  &\\
        & *+[F]{1} \ar@{-}[dl] \ar@{-}[dr]&&&\\
*+[F]{2}&          & *+[F]{3}&& *+[F]{4}}
\end{equation*}
The resulting strings are
\begin{equation*}
\mathord{\lto} {\sv P} \mathord{\lto} {\sv Q}{\sv R};  \qquad\qquad
\mathord{\lto}\mathord{\lto}{\sv P}{\sv Q}{\sv R}.
\end{equation*}
These are formulas written in \textsl{\L
  ukasiewicz}\tindex{Lukasiewicz@\L
  ukasiewicz}\tindexsub{notation}{Lukasiewicz ---@\L ukasiewicz ---}
  or \textsl{Polish
  notation}.\footnote{Church
  \cite[p.~38, n.~91]{MR18:631a} 
  calls it {\L ukasiewicz notation}, after its inventor---who was
  Polish; the common term today seems to be \Eng{Polish notation}.}  

A \textbf{signature}\dindex{signature} is a set of connectives.
Our definition of propositional formulas in \S~\ref{sect:connectives}
is a definition of the formulas of the signature
$\{0,1,\lnot,\land,\lor,\lto,\liff,\eor\}$ in
\textbf{infix notation.}\dindex{infix
  notation}\dindexsub{notation}{infix ---}  Infix notation makes sense
only 
when the connectives in use are $0$-, $1$- or $2$-ary.  Of a
signature $\lang$ containing connectives of possibly higher arities,
the formulas in \textbf{Polish notation}\dindexsub{Polish}{---
  notation}\dindexsub{notation}{Polish 
  ---} can be defined as follows: 
\begin{compactenum}
  \item
All variables are formulas of $\lang$ in Polish notation;
\item
if $n\in\N$, and $*$ is an $n$-ary connective in $\lang$, and if ${\sv F}_0$,
${\sv F}_1$, \dots, ${\sv F}_{n-1}$ are formulas of $\lang$ in Polish notation,
then
\begin{equation*}
  \mathrel* {\sv F}_0 {\sv F}_1\cdots {\sv F}_{n-1}
\end{equation*}
is a formula of $\lang$ in Polish notation.
\end{compactenum}
(The latter condition includes the case $n=0$; in this case, the list
$({\sv F}_0, \dots, {\sv F}_{n-1})$ is empty, so the nullary connective by itself 
is a formula.)  Thus, in Polish notation, every connective is followed
by the list of its arguments.
In \textbf{reverse Polish notation}\dindexsub{reverse}{--- Polish
  notation}\dindexsub{Polish}{reverse ---
  notation}\dindexsub{notation}{reverse Polish ---} (or \textbf{RPN}\dindex{RPN}),
the connective comes \emph{after} its 
arguments.  The corresponding RPN for arithmetic can be
convenient for electronic calculators, and it 
bears some resemblance to Turkish word-order.  Compare:
\begin{center}
    \begin{tabular}{r|ccccc|} \hline
 & \Eng{One} & \Eng{plus} & \Eng{two} & \Eng{is} & \Eng{three.} \\ 
infix notation: &$1$ & $+$ & $2$ & $=$ & $3$\\ \hline  
 & \Tur{Bir} & \Tur{iki} & \Tur{daha} & \Tur{\"u\c c} & \Tur{-t\"ur.}\\
RPN: &$1$ & $2$ & $+$  & $3$  & $=$ \\ \hline
  \end{tabular}
\end{center}


\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Prove part~\ref{item:fLR} of Lemma~\ref{lem:()}.
\item
For each symbol in the formula
 $({\sv P}\lto {\sv Q}\lor\lnot
{\sv R})\land(1\lto {\sv P}\land {\sv R})\lto(0\lto {\sv R})$, give the list of arguments, if it
 exists.  Write the formula in Polish notation.
\item
Prove that formulas in Polish notation have unique readability.  (You
can use infinite descent; but can you \emph{avoid} using this
technique?) 
\item\label{exer:tern}
Letting $\triangledown$ be the \emph{ternary} operation on $\B$ that
converts a triple $(x,y,z)$ to $\parity{(x+1)(y+1)(z+1)}$ (where $\mathrm p$ is as in \S~\ref{sect:parity}), construct a
truth-table for $\triangledown {\sv P}{\sv Q}{\sv R}$.
\end{enumerate}

%input{uniq-read-app}

\section{Logical equivalence}\label{equivalent}

Recall the distinction, stated in \S~\ref{algebra}, between terms and
polynomials.  Suppose ${\sv F}$ and ${\sv G}$ are two $n$-ary Boolean terms, that is,
propositional formulas.  They represent the same Boolean polynomial if
\begin{equation*}
  \named {\sv F}(\tuple e)=\named {\sv G}(\tuple e)
\end{equation*}
for all truth-assignments $\tuple e$.  In this case, as suggested in
\S~\ref{sect:connectives}, we shall write
\begin{equation*}
  {\sv F}\sim {\sv G};\glossary{${\sv F}\sim {\sv G}$}
\end{equation*}
and we shall
say that ${\sv F}$ and ${\sv G}$ are
\textbf{logically equivalent}
\dindexsub{logic}{---ally equivalent}\dindexsub{equivalent}{logically ---}
(or just \textbf{equivalent}\dindex{equivalent}).
Here we have a clear test for equivalence:  \emph{Two formulas are
equivalent if and only if they have the same proper truth-table;} more
precisely, the formulas must have the same truth-table when the formulas
are treated as being $n$-ary for the same~$n$.  Let us call this test for
equivalence the \textbf{truth-table method.}\dindexsub{truth}{---{}-table
  method}\dindexsub{table}{truth-{}---
  method}\dindexsub{proof}{truth-table
  method}\dindexsub{method}{truth-table ---}

\begin{example}
Are the formulas $P_0$ and $(P_1\lor\lnot P_1)\lto P_0$ equivalent?   Their
\full{} truth-tables are
\begin{align*}
&  \begin{array}{c}
P_0\\ \hline
0\\1    
  \end{array}&&
  \begin{array}{c|c|c|c|c|c}
 (P_1 & \lor & \lnot & P_1) & \lto & P_0\\ \hline
 0&1&1&0&0&0\\
 0&1&1&0&1&1\\
 1&1&0&1&0&0\\
 1&1&0&1&1&1
 \end{array}.
\end{align*}
As a binary formula, each formula has the same proper truth-table
\begin{equation*}
\begin{array}{c|c||c}
 P_0 & P_1 & {\sv F} \\ \hline
 0&0&0\\
 1&0&1\\
 0&1&0\\
 1&1&1
\end{array};
\end{equation*}
so the formulas are equivalent.
\end{example}

The truth-table method is a method of \emph{proving} that two formulas
are equivalent.  The method is highly specific:  For example, it cannot
obviously\footnote{A one-variable nonzero polynomial of degree $n$ has at most
  $n$ zeros; so if $f(x)$ and $g(x)$ are polynomials of degree $n$ at most, and
\begin{equation*}
0=f(x_0)-g(x_0)=f(x_1)-g(x_1)=\dots=f(x_n)-g(x_n), 
\end{equation*}
where all of the $x_k$ are distinct, then $\Forall xf(x)=g(x)$.  This method
  does not work for polynomials in more than one variable.}
be used to prove the arithmetic identities mentioned in
\S~\ref{algebra}, or to prove trigonometric identities like
\begin{equation*}
  \tan^2x+1=\sec^2x.
\end{equation*}
To prove \emph{this} identity, we can write a chain of recognizable
identities:
\begin{equation*}
  \tan^2x+1=\frac{\sin^2x}{\cos^2x}+1=
  \frac{\sin^2x}{\cos^2x}+\frac{\cos^2x}{\cos^2x}=
  \frac{\sin^2x+\cos^2x}{\cos^2x}= \frac 1{\cos^2x}=
\sec^2x.
\end{equation*}
This proof is an example of the \textsl{method of
  simplification}\tindexsub{method}{--- of
  simplification}\tindexsub{proof}{method of
  simplification}\tindexsub{simplification}{method of ---}.  This
method can also be used for propositional formulas.
In this context, we shall develop the
theoretical background of simplification in the next
section; the method itself is
developed in \S~\ref{simplify}, but will rely on the lemma below.  A proof by simplification, suitably
expressed, will be an example of a \textsl{formal
  proof}\tindexsub{proof}{formal ---}\tindexsub{formal}{--- proof}.

\begin{lemma}\label{lem:simp}
  \mbox{}
  \begin{compactenum}
\item\label{item:def}
\textbf{Definitions:}\dindex{definition}
  \begin{align*}
    {\sv P}\lto {\sv Q}&\sim \lnot {\sv P}\lor {\sv Q},\\
{\sv P}\liff {\sv Q}&\sim({\sv P}\lto {\sv Q})\land ({\sv Q}\lto {\sv P}),\\ 
{\sv P}\eor {\sv Q}&\sim \lnot ({\sv P}\liff {\sv Q}).
  \end{align*}
    \item
\textbf{Double negation:}\dindexsub{double}{---
  negation}\dindexsub{negation}{double ---} 
\begin{equation*}
  \lnot\lnot {\sv P}\sim {\sv P}.
\end{equation*}
\item
\textbf{De Morgan's Laws:}\dindex{De Morgan's
  Laws}\dindexsub{law}{De Morgan's L---s} 
\begin{equation*}
  \lnot ({\sv P}\lor {\sv Q})  \sim \lnot {\sv P}\land \lnot {\sv Q}, \qquad
  \lnot ({\sv P}\land {\sv Q})  \sim \lnot {\sv P}\lor \lnot {\sv Q}.
\end{equation*}
\item
\textbf{Commutativity:}\dindex{commutativity}\dindexsub{law}{L--- of
  Commutativity} 
\begin{equation*}
  {\sv P}\land {\sv Q}  \sim {\sv Q}\land {\sv P},\qquad
  {\sv P}\lor {\sv Q}  \sim {\sv Q}\lor {\sv P}.
\end{equation*}
\item
\textbf{Associativity:}\dindex{associativity}\dindexsub{law}{L--- of
  Associativity} 
\begin{equation*}
  ({\sv P}\land {\sv Q})\land {\sv R}  \sim {\sv P}\land ({\sv Q} \land {\sv R}),\qquad
  ({\sv P} \lor {\sv Q})\lor {\sv R}  \sim {\sv P}\lor ({\sv Q}\lor {\sv R}).
\end{equation*}
\item
\textbf{Distributivity:}\dindex{distributivity}\dindexsub{law}{L--- of
  Distributivity} 
\begin{equation*}
 {\sv P}\land({\sv Q}\lor {\sv R})\sim ({\sv P}\land {\sv Q})\lor({\sv P}\land {\sv R}), \qquad
 {\sv P}\lor({\sv Q}\land {\sv R})\sim ({\sv P}\lor {\sv Q})\land({\sv P}\lor {\sv R}).
\end{equation*}
\item
\textbf{Redundancies:}\dindex{redundancy}
\begin{align*}
  {\sv P}\land {\sv P} & \sim {\sv P}, & {\sv P}\land\lnot {\sv P} & \sim 0, & {\sv P}\land 1 & \sim {\sv P}, &
  {\sv P}\land 0 & \sim 0,\\ 
  {\sv P}\lor  {\sv P} & \sim {\sv P}, & {\sv P}\lor \lnot {\sv P} & \sim 1, & {\sv P}\lor  0 & \sim {\sv P}, &
  {\sv P}\lor  1 & \sim 1.
\end{align*}
\item
\textbf{New variables:}\dindexsub{variable}{new
  ---}\dindex{new variable}
\begin{equation*}
  {\sv P}\sim ({\sv P}\land {\sv Q})\lor ({\sv P}\land \lnot {\sv Q}), \qquad
  {\sv P}\sim ({\sv P}\lor {\sv Q})\land ({\sv P}\lor \lnot {\sv Q}).
\end{equation*}
  \end{compactenum}
\end{lemma}

The proof of the lemma is an exercise.
(The label \Eng{Definitions} in part~\ref{item:def} of the lemma is not a
literal account of how the connectives were defined in
\S~\ref{sect:connectives}.)

The problem of checking for equivalence can be formulated
in other ways. 
If ${\sv F}\sim 1$, then we write
\begin{equation}\label{eqn:semantic}
 \models {\sv F},
\end{equation}
and we say that ${\sv F}$ is a 
\textbf{tautology.}%
\dindex{tautology}%
\footnote{From the Greek \Gk{to
  a>uto}, meaning \Eng{the same}.  Originally a tautology was a
  redundant expression, such as \Eng{cease and desist}.}  (The
  semantic turnstile%
\index{semantic!--- turnstile}%
\index{turnstile!semantic ---} $\models$ was introduced in
  \S~\ref{sect:quantifiers}.  To be consistent with the notation in
  that earlier section, we might write~\eqref{eqn:semantic} as
  $\B\models {\sv F}$; but the variables in propositional formulas will
  always range over $\B$.) 
If ${\sv F}\sim
  0$, we call ${\sv F}$ a
\textbf{contradiction.}%
\dindex{contradiction}
We say ${\sv F}$ is
\textbf{satisfiable}%
\dindex{satisfiable} if it is not a
contradiction.  If both ${\sv F}$ and $\lnot {\sv F}$ are satisfiable,
then ${\sv F}$ is
  a 
\textbf{contingency.}%
\dindex{contingency}
Hence, in the truth-table for ${\sv F}$, if the
  column for ${\sv F}$ 
itself contains:
\begin{compactenum}[1)]
  \item
  only $1$s, then ${\sv F}$ is a tautology;
  \item
  only $0$s, then ${\sv F}$ is a contradiction;
  \item
  at least one $1$, then ${\sv F}$ is satisfiable;
\item
at least one $1$, and at least one $0$, then ${\sv F}$ is a contingency.
\end{compactenum}
Also, the following statements mean the same thing:
\begin{compactenum}[1)]
  \item
  ${\sv F}\sim {\sv G}$;
  \item
  $\models {\sv F}\liff {\sv G}$;
  \item
  $\lnot({\sv F}\liff {\sv G})$ is not satisfiable.
\end{compactenum}
Thus, in effect, a test for equivalence is a test for tautology, which is a test
for satisfiability.

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Test for the equivalence of the following pairs of formulas by the
truth-table method:
\begin{enumerate}
  \item
${\sv P}$ and ${\sv Q}\lto {\sv P}$;
\item
${\sv P}$ and ${\sv Q}\lto({\sv P}\land {\sv Q})$;
\item
${\sv P}\lto({\sv Q}\lto {\sv R})$ and ${\sv P}\lto {\sv Q}\lto({\sv P}\lto {\sv R})$.
\end{enumerate}
\item
Give examples of tautologies, contradictions, and contingencies.
\item\label{exer:simp}
Prove Lemma \ref{lem:simp}.
\item\label{exer:and+1}
Establish the following equivalences:
\begin{enumerate}
\renewcommand{\labelenumii}{(\theenumii)}
\item\label{exer:not=1+}
$\lnot {\sv P}\sim 1\eor {\sv P}$;
\item
${\sv P}\lor {\sv Q}\sim {\sv P}\eor {\sv Q}\eor {\sv P}\land {\sv Q}$;
  \item
${\sv P}\eor {\sv Q}\sim {\sv Q}\eor {\sv P}$;
\item
$({\sv P}\eor {\sv Q})\eor {\sv R}\sim {\sv P}\eor {\sv Q}\eor {\sv R}$;
\item
${\sv P}\land({\sv Q}\eor {\sv R})\sim {\sv P}\land {\sv Q}\eor {\sv P}\land {\sv R}$;
\item
${\sv P}\eor {\sv P}\sim 0$.
\end{enumerate}
\item
Is there a formula ${\sv F}$ such that 
\begin{equation*}
\models ({\sv F}\lto ({\sv P}\liff
{\sv Q}))\land({\sv P}\lor({\sv Q}\lor {\sv F}))?
\end{equation*}
(One way to solve this problem is to write out a truth table for 
$({\sv R}\lto ({\sv P}\liff
{\sv Q}))\land({\sv P}\lor({\sv Q}\lor {\sv R}))$, then try to write a truth-table for $\sv F$.  An alternative is to use the next two sections to write the original formula in an equivalent form $(\sv G\lto\sv F)\land(\sv F\lto\sv H)$, then check whether $\models\sv G\lto\sv H$.)
\end{enumerate} 

\section{Substitution and replacement}

If ${\sv F}$ is a formula for which $(e_0,\dots,e_{n-1})$ is a
truth-assignment, then the constant formula
${\sv F}(e_0,\dots,e_{n-1})$ is obtained by
\textbf{substitution.}\dindex{substitution}  In this substitution, it is not essential that
each $e_i$ be in the 
set $\B$, that is, $\{0,1\}$; if $({\sv G}_0,\dots,{\sv G}_{n-1})$ is a list of
$n$ formulas,
then from ${\sv F}$ we can obtain the
formula\glossary{${\sv F}({\sv G}_0,\dots,{\sv G}_{n-1})$} 
\begin{equation*}
  {\sv F}({\sv G}_0,\dots,{\sv G}_{n-1})
\end{equation*}
by \emph{substitution} of ${\sv G}_j$ for each instance of $P_j$ in ${\sv F}$, for each
$j$ less than $n$.
Note that, if we are using the usual infix notation (see
\S~\ref{sect:unique}), but have removed parentheses as allowed by our
conventions, then the substitutions must be done with parentheses as
necessary to
ensure that each substituted formula becomes a \emph{sub-formula} of
the new formula.

\begin{example}
  Suppose ${\sv F}$ is $P_0\land (P_1\lto P_0)$, and ${\sv G}_0$ is $P_0\lto P_1$,
  and ${\sv G}_1$ is 
  $P_1\lto(P_0\lor P_2)$.  Then ${\sv F}({\sv G}_0,{\sv G}_1)$ is
  \begin{equation*}
    (P_0\lto P_1)\land((P_1\lto(P_0\lor
  P_2))\lto P_0\lto P_1).
  \end{equation*}
rather than $P_0\lto P_1\land(P_1\lto(P_0\lor
  P_2)\lto P_0\lto P_1)$.
\end{example}

Substitution is \textbf{associative}\dindex{associative} in that, if we substitute some
formulas ${\sv G}_i$ into ${\sv F}$, and then substitute some formulas ${\sv H}_j$ into
the result, we get the same formula as if we substitute the ${\sv H}_j$
first into the ${\sv G}_i$, and then the results into ${\sv F}$.  
Likewise, if you put a book in a box, then put the box on a table, you get the same result as if you first put the box on the table before putting the book in the box.
The formal
statement is the following:

\begin{lemma}[Associativity]%
\label{lem:sub}%
\dindexsub{associativity}{A--- Lemma}%
\dindexsub{lemma}{Associativity L---}
Suppose ${\sv F}$ is an $n$-ary formula, and 
\begin{equation*}
  ({\sv G}_0, \dots, {\sv G}_{n-1})
\end{equation*}
is a
  list of $n$ formulas, each one of them being $\ell$-ary.
  Let
  ${\sv H}$ be the formula ${\sv F}({\sv G}_0,\dots,{\sv G}_{n-1})$.  Then ${\sv H}$ is $\ell$-ary.
  Suppose $(\sv K_0, \dots, \sv K_{\ell-1})$ is a list of
  $\ell$ formulas.  Then the formula
  \begin{equation*}
      {\sv H}(\sv K_0,\dots,\sv K_{\ell-1})
  \end{equation*}
is the formula
  \begin{equation*}
    {\sv F}({\sv G}_0(\sv K_0,\dots,\sv K_{\ell-1}),\dots,{\sv G}_{n-1}(\sv K_0,\dots,\sv K_{\ell-1})).
  \end{equation*}
Finally, suppose $\tuple e$ is a truth-assignment for the ${\sv G}_j$.  Then
  $\tuple e$ is a truth-assign\-ment for ${\sv H}$.  If also
  \begin{equation*}
\named {\sv G}_j(\tuple e)= f_j
  \end{equation*}
for each $j$ in $\{0,\dots,n-1\}$, then $(f_0,\dots,f_{n-1})$ is a
truth-assignment $\tuple f$ for ${\sv F}$, and
\begin{equation*}
  \named {\sv H}(\tuple e)=\named {\sv F}(\tuple f).
\end{equation*}
\end{lemma}

\begin{proof}
I claim that the proposition is obvious,\footnote{However, Church
  \cite[\S~15, p.~97]{MR18:631a} proves a version of this lemma by
  induction.} in the sense that no written
proof will make the truth of the proposition clearer than it already
is to the reader who has understood the proposition.  
\end{proof}

Is a truth-assignment for ${\sv F}({\sv G}_0,\dots,{\sv G}_{n-1})$ also
a truth-assignment for the ${\sv G}_j$?  It is, if all of the
variables $P_0$, \dots, $P_{n-1}$ actually \emph{appear} in ${\sv F}$;
otherwise it may not be:

\begin{example}
  Suppose ${\sv F}$ is just $P_0$, \emph{considered} as a binary formula.
  Let ${\sv G}_i$ be $P_i$ when $i\in\{0,1\}$.  Then
  ${\sv F}({\sv G}_0,{\sv G}_1)$ is $P_0$.  Now, $(0)$ is a truth-assignment for the
  formula $P_0$; but $(0)$ is not long enough to be a truth-assignment
  for ${\sv G}_1$. 
\end{example}

\begin{theorem}[Substitution]
  \label{thm:substitution}\dindexsub{substitution}{S---
  Theorem}\dindexsub{theorem}{Substitution Th---}
If 
\begin{equation*}{\sv F}(P_0,\dots,P_{n-1})\sim {\sv G}(P_0,\dots,P_{n-1}),\end{equation*} 
and $({\sv H}_0,\dots,{\sv H}_{n-1})$ is a list of $n$ formulas, then 
\begin{equation*}{\sv F}({\sv H}_0,\dots,{\sv H}_{n-1})\sim {\sv G}({\sv H}_0,\dots,{\sv H}_{n-1}).\end{equation*}
\end{theorem}

\begin{proof}
Since ${\sv F}\sim {\sv G}$, we have 
\begin{equation}\label{eqn:sub}
  \named {\sv F}(\tuple e)=\named {\sv G}(\tuple e)
\end{equation}
for all truth-assignments $\tuple e$ for ${\sv F}$ and ${\sv G}$.  Let ${\sv F}'$ be
${\sv F}({\sv H}_0,\dots,{\sv H}_{n-1})$, and let ${\sv G}'$ be ${\sv G}({\sv H}_0,\dots,{\sv H}_{n-1})$.
Suppose $\tuple f$ is a truth-assignment for the ${\sv H}_j$, and let
$\named {\sv H}_j(\tuple f\;)=e_j$.  Then
\begin{align*}
\named{{\sv F}'}(\tuple f\;)
&=\named {\sv F}(\tuple e)&&\text{[by Lemma \ref{lem:sub}]}\\
&=\named {\sv G}(\tuple e)&&\text{[by \eqref{eqn:sub}]}\\
&=\named{{\sv G}'}(\tuple f\;)&&\text{[by Lemma \ref{lem:sub}].}
\end{align*}
Therefore ${\sv F}'\sim {\sv G}'$.  This completes the proof.\footnote{This is
  also Burris's proof \cite[\S~2.3, pp.~46f.]{Burris}, although Burris's
  use of the fact given in Lemma~\ref{lem:sub} is not entirely explicit.}
\end{proof}

\begin{corollary}\label{cor:substitution}
A tautology remains a tautology when arbitrary formulas are
substituted for the variables.
\end{corollary}

\begin{example}\label{example:subst}
Since ${\sv P}\lor\lnot {\sv P}$ is a tautology, so is $({\sv P}\lto {\sv Q})\lor\lnot({\sv P}\lto
{\sv Q})$.
\end{example}

In ordinary language, the words \Eng{substitution} and \Eng{replacement} are
nearly synonyms, although there is a distinction.  From the expression
$abc$, we get $adc$ in a way that can be described in two ways:
\begin{compactenum}
  \item
by replacing $b$ \emph{with} $d$, or
\item
by substituting $d$ \emph{for} $b$.
\end{compactenum}
When doing logic, we shall make another important
distinction.  If ${\sv F}$ is a sub-formula of ${\sv G}$, then we may
\textbf{replace}\dindex{replace} ${\sv F}$ with another formula ${\sv F}'$.  Here, to replace ${\sv F}$ is
to replace a particular \emph{occurrence} of ${\sv F}$ (since possibly ${\sv F}$
appears more than once as a sub-formula of ${\sv G}$).

\begin{example}
  In ${\sv P}\lor\lnot {\sv P}$, replacing the second occurrence of ${\sv P}$ with ${\sv Q}$
  yields ${\sv P}\lor\lnot {\sv Q}$.
\end{example}

\begin{theorem}[Replacement]\label{thm:replacement}
  \dindexsub{replace}{R---ment Theorem}\dindexsub{theorem}{Replacement
    Th---}
Suppose ${\sv F}$ is a sub-formula of ${\sv G}$, and
\begin{equation*}{\sv F}\sim {\sv F}'.\end{equation*}
Let ${\sv G}'$ be the result of replacing ${\sv F}$ with ${\sv F}'$ in ${\sv G}$.  Then 
\begin{equation*}{\sv G}\sim {\sv G}'.\end{equation*}
\end{theorem}

\begin{proof}
  Say ${\sv G}$ is $n$-ary.  Let ${\sv H}(P_0,\dots,P_n)$ be the
  result of replacing ${\sv F}$ with $P_n$ in ${\sv G}$.  Then ${\sv G}$ itself is the
  formula
  \begin{equation*}
    {\sv H}(P_0,\dots,P_{n-1},{\sv F}),
  \end{equation*}
and ${\sv G}'$ is ${\sv H}(P_0,\dots,P_{n-1},{\sv F}')$.  The remainder of the
proof\footnote{Burris \cite[\S~2.4, pp.~48ff.]{Burris} gives an
  elaborate proof using induction; but I think the work is
  unnecessary, once one has Lemma~\ref{lem:sub}.  Church's proof
\cite[\S~15, p.~101]{MR18:631a} leaves details to the reader, but
also involves induction.  Moreover, Church's proof refers to the
principle of unique
readability, which Burris seems not to discuss.}    is
an exercise involving Lemma~\ref{lem:sub}.
\end{proof}

\begin{corollary}\label{cor:replacement}
A tautology remains a tautology when a sub-formula is replaced with
an equivalent sub-formula.
\end{corollary}

\begin{example}
Since $\models({\sv P}\lto {\sv Q})\lor\lnot({\sv P}\lto {\sv Q})$ by Example
\ref{example:subst}, and 
\begin{equation*}\lnot({\sv P}\lto {\sv Q})\sim {\sv P}\land\lnot {\sv Q},\end{equation*} 
we have $\models({\sv P}\lto {\sv Q})\lor({\sv P}\land\lnot {\sv Q})$.
\end{example}

The Substitution and Replacement Theorems work together in the
following way.  From known equivalences, Substitution lets us derive
many more.  By Replacement, we can use these equivalences to write
given formulas in different (but equivalent) form.

That, in short, is the method of simplification, to be developed in \S~\ref{simplify}.
Our first example of the procedure will be in \S~\ref{sect:adequacy}.
Meanwhile, in \S~\ref{normal}, we shall describe some formulas such
that \emph{every} formula is equivalent to one of them.  These
equivalences can be established by the procedure just described, using
the stock of equivalences presented in Lemma~\ref{lem:simp}.

\subsection*{Exercises}

\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
If ${\sv F}({\sv P})$ is ${\sv P}\lto {\sv P}\lto {\sv P}$, what is ${\sv F}({\sv F}({\sv P}))$, written with the fewest possible parentheses?
  \item
Prove Corollary \ref{cor:substitution}.
\item
Complete the proof of the Replacement Theorem (\ref{thm:replacement}).
\item
Prove Corollary \ref{cor:replacement}.
\end{enumerate}

\section{Normal forms}\label{normal}

We noted in \S~\ref{algebra} that different arithmetic \emph{terms}
may represent the same \emph{polynomial.}  Among those terms, there
may be a preferred term, which might be called a 
\textsl{normal form}%
\tindexsub{normal}{--- form of a polynomial} of the polynomial. 

\begin{example}
The normal form of $(5+x^2-2x)(1+x)-(x-1+2x^2)(x^3+6)$ might be
\begin{equation*}
11-x-13x^2+2x^3-x^4-2x^5,
\end{equation*}
since the latter term is usually easier to work with.
\end{example}

If we have the truth-table of a formula, then we can read off an
equivalent formula in so-called 
\textsl{disjunctive normal form.}%
\tindexsub{disjunctive}{--- normal form}%
\tindexsub{normal}{disjunctive --- form}  
The
general procedure is described immediately, then illustrated by
Example~\ref{example:dnf}. 

Suppose we have the truth-table for a formula ${\sv F}(P_0,\dots,P_{n-1})$.
Say there are $m$ rows in which the entry for ${\sv F}$ itself is $1$.  Then
$m\leq 2^n$.  If we ignore the other rows (namely, those rows in
which the
entry for ${\sv F}$ is $0$), then what remains has the form
\begin{equation*}
\begin{array}{c|c|c|c||c}
 P_0 & P_1 & \dots & P_{n-1} & {\sv F} \\ \hline \rule{0pt}{3ex}
 e_0^0&e_1^0&\dots&e_{n-1}^0&1\\  \rule{0pt}{3ex}
 e_0^1&e_1^1&\dots&e_{n-1}^1&1\\ \rule{0pt}{3ex}
 \vdots & \vdots & & \vdots & \vdots\\ \rule{0pt}{3ex}
 e_0^{m-1}&e_1^{m-1}&\dots&e_{n-1}^{m-1}&1
\end{array}
\end{equation*}
where each $e_j^i$ is in $\B$.  If $i<m$
and $j<n$, then let us define $P^i_j$ to be the
formula 
\begin{equation*}
  \begin{cases}
  \lnot P_j,&\text{if }e^i_j=0;\\
  P_j,      &\text{if }e^i_j=1.
  \end{cases}
\end{equation*}
If $i<m$, let ${\sv G}^i$ be the conjunction 
\begin{equation*}
  P_0^i\land\dotsb\land P_{n-1}^i.  
\end{equation*}
The
formulas ${\sv G}^i$ can be called the 
\textbf{normal disjunctive constituents}%
\dindexsub{normal}{--- disjunctive constituent}%
\dindexsub{disjunctive}{normal --- constituent}%
\dindexsub{constituent}{normal disjunctive ---} of ${\sv F}$.
Their disjunction,
\begin{equation*}
  {\sv G}^0\lor {\sv G}^1\lor\dotsb\lor {\sv G}^{m-1},
\end{equation*}
is called a 
\textbf{disjunctive normal form}%
\dindexsub{disjunctive}{--- normal form}%
\dindexsub{normal}{disjunctive --- form} 
for ${\sv F}$.  (The other
disjunctive normal forms for ${\sv F}$ are obtained by re-ordering the
constituents ${\sv G}^i$.)  It is Theorem~\ref{thm:dnf} below that every
formula is equivalent to its disjunctive normal forms.

Note here that we speak of conjunctions and disjunctions of arbitrarily
many formulas. The disjunction of the formulas ${\sv H}_0,\dots,{\sv H}_{r-1}$ is
\begin{equation*}
  {\sv H}_0\lor {\sv H}_1\lor\dots\lor {\sv H}_{r-1},
\end{equation*}
which can also be written as
\begin{equation}\label{eqn:disj}
  \bigvee_{i<r}{\sv H}_i.\glossary{$\bigvee_{i<r}{\sv H}_i$}
\end{equation}
If $r=1$, then this formula is just ${\sv H}_0$.  If $r=0$, then, by
convention,\footnote{The convention is reasonable:  Instead of
  \eqref{eqn:disj}, we could write $\bigvee\{{\sv H}_0,\dots,{\sv H}_{r-1}\}$;
  informally, this says that \emph{at least one} of the formulas ${\sv H}_i$
  is true.  If $r=0$, then there are no formulas ${\sv H}_i$, and in
  particular there is no such \emph{true} formula, so
  $\bigvee\{{\sv H}_0,\dots,{\sv H}_{r-1}\}$ is false.} the formula in~\eqref{eqn:disj} is understood to be $0$.  In
particular, the disjunctive normal
form of a contradiction is $0$.  The conjunction
\begin{equation*}
  \bigwedge_{i<r}{\sv H}_i\glossary{$\bigwedge_{i<r}{\sv H}_i$}
\end{equation*}
is defined analogously, and is $1$ if $r=0$.

\begin{example}\label{example:dnf}
Here is the \full{} truth-table of a particular disjunction:
\begin{equation*}
\begin{array}{c|c|c|c|c|c|c|c|c}
 \lnot&(P_0&\lto&P_1)&\lor&(P_2&\land&\lnot&P_0)\\ \hline
 0&0&1&0&0&0&0&1&0\\
 1&1&0&0&1&0&0&0&1\\
 0&0&1&1&0&0&0&1&0\\
 0&1&1&1&0&0&0&0&1\\
 0&0&1&0&1&1&1&1&0\\
 1&1&0&0&1&1&0&0&1\\
 0&0&1&1&1&1&1&1&0\\
 0&1&1&1&0&1&0&0&1
\end{array}.
\end{equation*}
Extract the rows in which the column headed $\lor$ features $1$, and take
only one each of the columns for $P_0$, $P_1$ and $P_2$:
\begin{equation*}
\begin{array}{c|c|c}
P_0&P_1&P_2\\ \hline
 1&0&0\\
 0&0&1\\
 1&0&1\\
 0&1&1
\end{array}.
\end{equation*}
The disjunctive normal form for $\lnot(P_0\lto P_1)\lor(P_2\land \lnot P_0)$ is
therefore
\begin{equation*}
  (P_0\land\lnot P_1\land\lnot P_2)\lor
  (\lnot P_0\land\lnot P_1\land P_2)\lor
  (P_0\land \lnot P_1\land P_2)\lor
  (\lnot P_0\land P_1\land P_2).
\end{equation*}
\end{example}

An $n$-ary formula
is in disjunctive normal form if the formula is precisely
\begin{equation*}
\bigvee_{i<m}\bigwedge_{j<n}P_j^i,
\end{equation*}
where each sub-formula $P^i_j$ is either $P_j$ or $\lnot P_j$, but all
of the constituents $\bigwedge_{j<n}P^i_j$ are distinct.  Note
especially that each constituent must contain the same variables.

\begin{example}
  The formula $\lnot(P_0\lto P_1)\lor(P_2\land \lnot P_0)$ is equivalent to
\begin{equation*}
  (P_0\land\lnot P_1)\lor
  (\lnot P_0\land\lnot P_1\land P_2)\lor
   (\lnot P_0\land P_1\land P_2),
\end{equation*}
but this is \emph{not} a disjunctive normal form, since one of the
constituents does not contain $P_2$.
\end{example}

\begin{theorem}\label{thm:dnf}
Every formula is equivalent to its disjunctive normal forms.
\end{theorem}

\begin{proof}
  Let us use the notation of the definition above, in which $\sv F$ has the DNF $\bigvee_{i<m}\sv G^i$.  Write ${\sv H}$ for the latter formula. Then we have to show 
  ${\sv F}\sim {\sv H}$.  For the truth-assignment
  $(e_0^i,\dots,e_{n-1}^i)$, let us write $\tuple e^i$.  For arbitrary
  truth-assignments 
  $\tuple f$ for the ${\sv G}^i$, we have
  \begin{equation*}
    \named{{\sv G}^i}(\tuple f\;)=
    \begin{cases}
      1,&\text{ if }\tuple f=\tuple e^i;\\
0,&\text{ if }\tuple f\neq\tuple e^i.
    \end{cases}
  \end{equation*}
Then
\begin{equation*}
  \named {\sv H}(\tuple f\;)=
  \begin{cases}
    1,& \text{ if }\tuple f\;\in\{\tuple e^0,\dots,\tuple
    e^{m-1}\};\\
  0,& \text{ if }\tuple f\;\notin\{\tuple e^0,\dots,\tuple e^{m-1}\}.
  \end{cases}
\end{equation*}
Hence ${\sv H}$ and ${\sv F}$ have the same truth-table.
\end{proof}

There is also a \textbf{conjunctive normal form}
\dindex{conjunctive normal form}%
\dindexsub{normal}{conjunctive --- form} 
or \textbf{CNF;} it looks like the
disjunctive form, except that the $\land$ and the $\lor$ have switched
roles.  You read it off from the truth-table again, but you
look for $0$ (not $1$) in the column for the formula, and $P_i^j$
resolves to $P_i$ if there is $0$ in the corresponding column and
row.  

In particular, if a disjunctive form for an $n$-ary formula has $m$
constituents, then a conjunctive form for the same formula will have
$2^n-m$ constituents.  Whether it is easier to work with the
disjunctive or the conjunctive normal form depends on how big $m$ is. 

\begin{example}
To obtain the conjunctive normal form of the formula in
Example~\ref{example:dnf}, from its truth-table we extract
\begin{equation*}
  \begin{array}{c|c|c}
    P_0 & P_1 & P_2 \\ \hline
0&0&0\\
0&1&0\\
1&1&0\\
1&1&1
  \end{array},
\end{equation*}
from which we read off
\begin{equation*}
  (P_0\lor P_1\lor P_2)\land
  (P_0\lor\lnot P_1\lor P_2)\land
  (\lnot P_0\lor\lnot P_1\lor P_2)\land
  (\lnot P_0\lor\lnot P_1\lor\lnot P_2).
\end{equation*}
\end{example}

\begin{theorem}\label{thm:CNF}
  Every formula is equivalent to its conjunctive normal form.
\end{theorem}

If ${\sv F}$ is a tautology in the variables $P_0,\dots,P_{n-1}$,
then its disjunctive normal form will be the disjunction of the $2^n$
possible constituents
\begin{equation*}
  P_0^j\land\dots\land P_{n-1}^j.
\end{equation*}
Suppose in general that we have a method of finding disjunctive normal
forms that does not rely on truth-tables.  (In \S~\ref{simplify}
we shall describe such a method.)  Applying this method to a
formula in $n$ variables, if we arrive at a disjunction of $2^n$ distinct
constituents, then the original formula must have been a tautology.

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Find a CNF for $P_0\lto P_1\lto\cdots\lto P_n$.
\item
Find two formulas $\sv F$ with the truth-table $\begin{array}[t]{c|c|c||c}
P_0&P_1&P_2&\sv F\\\hline
0&0&0&1\\
1&0&0&1\\
0&1&0&0\\
1&1&0&0\\
0&0&1&0\\
1&0&1&1\\
0&1&1&0\\
1&1&1&0
\end{array}$.
\item
Find a DNF for $P_0\eor P_1\eor\cdots\eor P_n$.
\item
What is the DNF
for a tautology in no variables?
\item
Find the disjunctive and conjunctive normal forms for:
\begin{enumerate}
\renewcommand{\labelenumii}{(\theenumii)}
  \item
$P_0\lto P_1\lto P_2$;
\item
$(\lnot P_0\lto P_1)\land(\lnot P_1\lto P_0)\lto(\lnot
P_0\lor\lnot P_1)$;
\item
$P_0\eor P_1\liff P_2$.
\end{enumerate}
\item
Prove Theorem~\ref{thm:CNF}.
\item
Show that for any formula ${\sv F}(P_0,P_1,P_2,P_3)$, either the disjunctive
or the conjunctive normal form has no more than $8$ constituents.
\item
Here is an alternative approach to disjunctive normal forms.
Supposing $I\included\{0,\dots,n-1\}$ and $j<n$, let
\begin{equation*}
e^I_j=\begin{cases}
1,&\text{ if }j\in I,\\
0,&\text{ if }j\notin I.
\end{cases}
\end{equation*}
Then let $\tuple e^I=(e^I_0,\dots,e^I_{n-1})$.  If $\sv F$ is an $n$-ary formula, show
\begin{equation*}
\sv F\sim\bigvee_{\named{\sv F}(\tuple e^I)=1}\Biggl(\bigwedge_{j\in I}P_i\land\bigwedge_{\substack{k\notin I\\k<n}}\lnot P_k\Biggr).
\end{equation*}
\item
This exercise develops a new kind of normal form.
\begin{enumerate}
\renewcommand{\theenumii}{\Alph{enumii}}
\renewcommand{\labelenumii}{\theenumii.}
\item
Supposing $\sv F$ has the DNF $\sv G^0\lor\dotsb\lor\sv G^{m-1}$, show
\begin{equation*}
\sv F\sim\sv G^0\eor\dotsb\eor\sv G^{m-1}.
\end{equation*}
\item
For a formula $\sv G^0\eor\dotsb\eor\sv G^{m-1}$, use also the notation $\sum_{i<m}\sv G^i$.  If $I\included\{0,\dots,n-1\}$, show
\begin{equation*}
\bigwedge_{j\in I}P_j\land\bigwedge_{\substack{k\notin I\\k<n}}\lnot P_k\sim \sum_{\substack{I\included J\\
J\included\{0,\dots,n-1\}}}\bigwedge_{k\in J}P_k.
\end{equation*}
\item
Show that every satisfiable\index{satisfiable} $n$-ary formula is equivalent to a formula
\begin{equation*}
  {\sv F}_0\eor {\sv F}_1\eor\dotsb\eor {\sv F}_{m-1},
\end{equation*}
where all of the ${\sv F}_i$ are distinct, and, for each $i$ in
$\{0,1,\dots,m-1\}$, there is a subset $I$ of 
$\{0,1,\dots,n-1\}$ such that ${\sv F}_i$ is the conjunction
$\bigwedge_{j\in I}P_j$. 
\end{enumerate}
\end{enumerate}

\section{Adequacy}\label{sect:adequacy}

In \S~\ref{sect:unique}, a set of connectives is called a signature.
I said in \S~\ref{sect:p-formulas} that propositional logic was the
study of propositional formulas.  I want now to say more precisely that
\textbf{a propositional logic}\dindexsub{proposition}{a ---al
  logic}\dindexsub{logic}{a propositional ---} is (the study of) the
\emph{set} of 
propositional formulas \emph{of a particular signature}.  Then we have
been studying the propositional logic of the signature
\begin{equation*}
  \{\land,\lor,\lnot,\lto,\liff,\eor,0,1\}.
\end{equation*}
However, we have just seen that every formula with a truth-table is
equivalent to a formula with the smaller signature
$\{\land,\lor,\lnot\}$.  (If the formula is a contingency,\index{contingency} then just
take a conjunctive or disjunctive normal form.  For a contradiction,
take $P_0\land\lnot P_0$; for a tautology, $P_0\lor \lnot P_0$.)

Another way to say this is that every Boolean polynomial is
represented by a formula in $\{\land,\lor,\lnot\}$.  A technical term
for this feature of a signature is 
\textbf{adequacy.}%
\dindex{adequacy, adequate (signature)}
A signature $\lang$ is 
\textbf{adequate}%
\dindexsub{signature}{adequate ---}
 if every formula in \emph{every} signature
is equivalent to a formula in $\lang$.
The following is obvious.

\begin{lemma}\label{lem:LL'ad}
  If $\lang$ is an adequate signature, and $\lang'$ is a signature
  that includes $\lang$, then $\lang'$ is adequate.
\end{lemma}

In short, if a signature is adequate, then so is any \emph{larger}
signature.

There are proper subsets of $\{\land,\lor,\lnot\}$ that are
adequate.  The following was proved by Emil Post in
1921.\footnote{Post's method is different from ours; see his article~\cite[pp.~167~f.]{Post}.}

\begin{theorem}\label{thm:ad}
The signature $\{\lor, \lnot\}$ is adequate.  
\end{theorem}

\begin{proof}
Since $\{\land,\lnot,\lor\}$ is adequate, it is enough to show that any
formula in \emph{this} signature is equivalent to a formula in $\{\land,
\lnot\}$.
Suppose ${\sv F}$ is in $\{\land,\lnot,\lor\}$.  Every
instance of $\land$ in ${\sv F}$ determines (as in \S~\ref{sect:unique}) a
sub-formula of ${\sv F}$ that is a 
conjunction.  Say this conjunction is ${\sv G}\land {\sv H}$, where ${\sv G}$ and ${\sv H}$ are
sub-formulas of ${\sv F}$.  We have an equivalence
\begin{equation*}
  {\sv P}\land {\sv Q}\sim\lnot(\lnot {\sv P}\lor\lnot {\sv Q})
\end{equation*}
(as can be checked by truth-tables); therefore, by the Substitution
Theorem (\ref{thm:substitution}), we have
\begin{equation*}
  {\sv G}\land {\sv H}\sim\lnot(\lnot {\sv G}\lor\lnot {\sv H}).
\end{equation*}
By the Replacement Theorem (\ref{thm:replacement}), in ${\sv F}$ we can replace ${\sv G} \land {\sv H}$ with
$\lnot(\lnot {\sv G}\lor\lnot {\sv H})$.  In this way, we can remove all instances of
$\land$ from ${\sv F}$, obtaining a formula in $\{\lor,\lnot\}$ that
is equivalent to ${\sv F}$.
\end{proof}

Similarly, we have:

\begin{theorem}\label{thm:and-not-ad}
The signature $\{\land, \lnot\}$ is adequate.    
\end{theorem}

\begin{corollary}
  The signature $\{\land,\eor,1\}$ is adequate.
\end{corollary}

\begin{proof}
The signature $\{\land, \lnot\}$ is adequate, but
  the connective $\lnot$ can be expressed in terms of $\eor$
  and $1$, since
  \begin{equation*}
    \lnot {\sv P}\sim 1\eor {\sv P}
  \end{equation*}
by \S~\ref{equivalent},
Exercise~\eqref{exer:not=1+}; so
$\{\land,\eor,1\}$ is adequate. 
\end{proof}

The proofs of the last three numbered propositions are examples of a
general method for 
proving adequacy of a signature $\lang$:  Take a signature $\lang'$ that is
known to be adequate, and show that every
connective in $\lang'$ can be expressed with the
connectives of $\lang$.  Note well the two ingredients of the
argument:
\begin{compactenum}
  \item
$\lang'$ is known to be adequate.
\item
The elements of $\lang'$ can be expressed in terms of $\lang$.
\end{compactenum}
Although students sometimes do it anyway,
it would be useless to observe in this context that the elements of
$\lang$ can be expressed in terms of $\lang'$.  Remember that this
observation is immediate if $\lang\included\lang'$; then surely
the adequacy of $\lang'$ says nothing about the adequacy of $\lang$.

For another example, let $\curlywedge$ be the
\textbf{Schr\"oder
  connective;}\dindexsub{connective}{Schr\"oder
  ---}\dindex{Schroder connective@Schr\"oder
  connective}\footnote{According to Burris 
  \cite[\S~2.5.2, p.~53]{Burris}, Schr\"oder showed in 1880 that the
  `standard connectives'---say, the ones we have been using so far---can be
  expressed using this connective.  Post's later result---our
  Theorem~\ref{thm:ad}---then establishes the adequacy of
  $\{\curlywedge\}$.} this is defined so that
\begin{equation*}
  {\sv P}\curlywedge {\sv Q}\sim\lnot {\sv P}\land\lnot {\sv Q}.
\end{equation*}
So $\curlywedge$ is defined in terms of $\land$ and $\lnot$.  This fact
by itself tells us \emph{nothing} about the adequacy of
$\{\curlywedge\}$; it has no relevance to the proof of the following:

\begin{theorem}
  The signature $\{\curlywedge\}$ is adequate.
\end{theorem}

\begin{proof}
  It is enough to write
$\lnot {\sv P}$ and ${\sv P}\land {\sv Q}$ using only $\curlywedge$.
We have $\lnot {\sv P}\sim {\sv P}\curlywedge {\sv P}$, and also
\begin{align*}
  {\sv P}\land {\sv Q} & \sim (\lnot {\sv P})\curlywedge (\lnot {\sv Q}) \\
           & \sim ({\sv P}\curlywedge {\sv P})\curlywedge({\sv Q}\curlywedge {\sv Q}).
\end{align*}
Hence all formulas in the adequate signature $\{\land,\lnot\}$ can be
written in terms of~$\curlywedge$.
Thus $\{\curlywedge\}$ is adequate.
\end{proof}

Adequate $n$-ary connectives where $n>2$ can also be found (this is an exercise).

How might we show that a certain signature is \emph{not} adequate?
Note that the signature $\{\land,\lnot\}$ is adequate, even though it
contains no nullary connectives: the two constant Boolean
polynomials are represented in $\{\land,\lnot\}$ by
${\sv P}\land\lnot {\sv P}$ and $\lnot({\sv P}\land\lnot {\sv P})$ respectively. 

\begin{theorem}
  The signature $\{\land,\eor\}$ is \emph{not} adequate.
\end{theorem}

\begin{proof}
  We shall show that no formula in $\{\land,\eor\}$ represents
  $1$.  Now, if 
  \begin{equation*}
      {\sv F}(P_0,P_1,P_2,\dots,P_n)\sim 1,
  \end{equation*}
 then ${\sv F}(P_0,P_0,P_0,\dots,P_0)\sim
  1$ by the Substitution Theorem.  Hence it is enough to show that no
  \emph{singulary} formula in $\{\land,\eor\}$ represents $1$.
In $\{\land,\eor\}$, we can represent $0$ by ${\sv P}\eor {\sv P}$.  We
also have
\begin{align*}
  0\land 0&\sim 0,& 0\eor 0&\sim 0,\\
0\land {\sv P}&\sim 0,& 0\eor {\sv P}&\sim {\sv P},\\
{\sv P}\land 0&\sim 0,& {\sv P}\eor 0&\sim {\sv P},\\
{\sv P}\land {\sv P}&\sim {\sv P},& {\sv P}\eor {\sv P}&\sim 0.
\end{align*}
By the Replacement Theorem, we can create no singulary formula in
$\{\land,\eor\}$ that is \emph{not} equivalent to $0$ or a
variable. 
\end{proof}

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Prove Theorem \ref{thm:and-not-ad}.
\item\label{exer:not-to-ad}
Prove that $\{\lnot,\lto\}$ is adequate.
\item
Prove that $\lnot$ by itself is \emph{not} adequate.
\item
Prove the adequacy of the \textbf{Sheffer
  stroke,}\dindex{Sheffer stroke}\dindexsub{connective}{Sheffer stroke} namely
the connective $|$ such that ${\sv P}\mathrel{|}{\sv Q}\sim\lnot~({\sv P}\land {\sv Q})$.
\item
Find an adequate ternary ($3$-ary) connective.  (See
\S~\ref{sect:unique}, Exercise~\ref{exer:tern}.)
\end{enumerate}

\section{Simplification}\label{simplify}

In proving Theorem \ref{thm:ad}, we used a known equivalence, and the
Theorems of 
Substitution and Replacement, to `simplify' a formula in the sense
of eliminating instances of disjunction.  In the same way, we can
simplify any formula to disjunctive normal form.  The procedure relies
on Lemma~\ref{lem:simp}. 
Using this lemma, given any formula, we can:
\begin{compactenum}[1)]
  \item
eliminate instances of $\lto$, $\liff$, and $\eor$;
\item
eliminate multiple negations, and make sure that the only arguments of
$\lnot$ are variables;
\item
eliminate conjunctions of disjunctions;
\item
eliminate redundancies; now the formula is a disjunction of
conjunctions of variables and negated variables, so we can finally:
\item
add variables as necessary to obtain a disjunctive normal form.
\end{compactenum}

\begin{example}
Suppose ${\sv F}$ is the formula $\lnot({\sv P}\lto {\sv Q})\lor {\sv Q}$.  The reduction of ${\sv F}$ to
disjunctive normal form can proceed as follows:
\begin{align*}
{\sv F}&\sim \lnot(\lnot {\sv P}\lor {\sv Q})\lor {\sv Q}& & \text{[def'n of $\lto$]} \\
&\sim (\lnot\lnot {\sv P}\land\lnot {\sv Q})\lor {\sv Q}& & \text{[De Morgan]}\\
&\sim ({\sv P}\land\lnot {\sv Q})\lor {\sv Q}& &\text{[double negation]}\\
&\sim ({\sv P}\land\lnot {\sv Q})\lor({\sv Q}\land {\sv P})\lor({\sv Q}\land\lnot {\sv P}) & &\text{[new
variable]}\\
&\sim ({\sv P}\land\lnot {\sv Q})\lor({\sv P}\land {\sv Q})\lor(\lnot {\sv P}\land {\sv Q}) &
&\text{[commutativity]}
\end{align*}
\end{example}

There may be more than one way to proceed:

\begin{example}
Let ${\sv F}$ be $\lnot(\lnot {\sv P}\lto {\sv Q})\land({\sv Q}\lor\lnot {\sv P})$.  Then
  \begin{align*}
    {\sv F}&\sim \lnot(\lnot\lnot {\sv P}\lor {\sv Q})\land({\sv Q}\lor\lnot {\sv P}) &&
    \text{[def'n of $\lto$]}\\
&\sim
 \lnot({\sv P}\lor {\sv Q})\land({\sv Q}\lor\lnot {\sv P}) &&
    \text{[double neg.]}\\
&\sim
 (\lnot {\sv P}\land \lnot {\sv Q})\land({\sv Q}\lor\lnot {\sv P}) &&
    \text{[De Morgan]}\\
&\sim
((\lnot {\sv P}\land \lnot {\sv Q})\land {\sv Q})\lor((\lnot {\sv P}\land\lnot {\sv Q})\land\lnot {\sv P}) &&
    \text{[dist.]}\\
&\sim
(\lnot {\sv P}\land (\lnot {\sv Q}\land {\sv Q}))\lor(\lnot {\sv P}\land(\lnot {\sv P}\land\lnot {\sv Q})) &&
    \text{[assoc.; comm.]}\\
&\sim
(\lnot {\sv P}\land 0)\lor((\lnot {\sv P}\land\lnot {\sv P})\land\lnot {\sv Q}) &&
    \text{[red.; assoc.]}\\
&\sim
0\lor(\lnot {\sv P}\land\lnot {\sv Q}) &&
    \text{[red.]}\\
&\sim
\lnot {\sv P}\land\lnot {\sv Q} &&
    \text{[red.]}
  \end{align*}
Alternatively,
  \begin{align*}
    {\sv F}&\sim
 \lnot({\sv P}\lor {\sv Q})\land({\sv Q}\lor\lnot {\sv P}) &&
    \text{[def'n of $\lto$; double neg.]}\\
&\sim
 (\lnot {\sv P}\land \lnot {\sv Q})\land({\sv Q}\lor\lnot {\sv P}) &&
    \text{[De Morgan]}\\
&\sim
 \lnot {\sv P}\land (\lnot {\sv Q}\land({\sv Q}\lor\lnot {\sv P})) &&
    \text{[assoc.]}\\
&\sim
 \lnot {\sv P}\land ((\lnot {\sv Q}\land {\sv Q})\lor (\lnot {\sv Q}\land \lnot {\sv P})) &&
    \text{[dist.]}\\
&\sim
 \lnot {\sv P}\land (0\lor(\lnot {\sv Q}\land\lnot {\sv P})) &&
    \text{[red.]}\\
&\sim
 \lnot {\sv P}\land (\lnot {\sv Q}\land\lnot {\sv P}) &&
    \text{[red.]}\\
&\sim
 (\lnot\sv P\land\lnot\sv P)\land\lnot\sv Q&&\text{[comm; assoc]}\\
&\sim
 \lnot\sv P\land\lnot\sv Q&&\text{[red.]}
  \end{align*}
\end{example}

\begin{lemma}[Absorption Laws]\label{lem:abs-laws}\dindexsub{law}{Absorption
  L---s}\dindex{Absorption Laws}
\begin{equation*}
{\sv P}\land({\sv P}\lor {\sv Q}) \sim {\sv P},\qquad
{\sv P}\lor({\sv P}\land {\sv Q}) \sim {\sv P}.
\end{equation*}
\end{lemma}

If two formulas ${\sv F}$ and ${\sv G}$ are equivalent, then we can use
simplification to show
this as follows.
\begin{compactenum}
  \item
Simplify ${\sv F}$ to a disjunctive normal form ${\sv F}'$.
  \item
Simplify ${\sv G}$ to a disjunctive normal form ${\sv G}'$.
\item
Note that ${\sv F}'\sim {\sv G}'$.  (They should be the same formula, except
possibly in the order of the constituents.)
\end{compactenum}
However, it may be easier to simplify directly from one formula to the
other, or to use \emph{conjunctive} normal forms.  

\begin{example}
  The formulas $P_0\lto P_1\lto P_2$ and $P_1\lto P_0\lto P_2$ are equivalent, because
  \begin{align*}
    P_0\lto P_1\lto P_2&\sim
 \lnot P_0\lor(P_1\lto P_2)&&
    \text{[def'n of $\lto$]}\\
&\sim
 \lnot P_0\lor\lnot P_1\lor P_2&&
    \text{[def'n of $\lto$]}\\
&\sim
 \lnot P_1\lor\lnot P_0\lor P_2&&
    \text{[comm.]}\\
&\sim
 \lnot P_1\lor(P_0\lto P_2)&&
    \text{[def'n of $\lto$]}\\
&\sim
 P_1\lto P_0\lto P_2.&&
    \text{[def'n of $\lto$]}
  \end{align*}
(Associativity was used silently.)
The reduction of each formula to disjunctive normal form would be
tedious, since that normal form is
\begin{multline*}
(\lnot P_0\land\lnot P_1\land\lnot P_2)\lor
(      P_0\land\lnot P_1\land\lnot P_2)\lor 
(\lnot P_0\land      P_1\land\lnot P_2)\lor{}\\
{}\lor(\lnot P_0\land\lnot P_1\land      P_2)\lor
(      P_0\land\lnot P_1\land      P_2)\lor
(\lnot P_0\land      P_1\land      P_2)\lor
(      P_0\land      P_1\land      P_2); 
\end{multline*}
but the conjunctive normal form is just the formula $\lnot P_0\lor\lnot
P_1\lor P_2$, found in the original simplification.
\end{example}

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Given a formula in normal form, how would you write down its
truth-table?
\item
Prove the Absorption Laws (\ref{lem:abs-laws}) using simplification.
\item
Use simplification to prove the following equivalences:
\begin{enumerate}
  \item
$\lnot({\sv P}\land {\sv Q})\lor {\sv R}\sim {\sv P}\land {\sv Q}\lto {\sv R}$;
\item
$({\sv P}\lto {\sv Q})\land({\sv R}\lto {\sv Q})\land \lnot {\sv Q}\lto\lnot({\sv P}\lor {\sv R})\sim 1$;
\item
${\sv P}\lto({\sv Q}\lto {\sv R})\sim {\sv P}\lto {\sv Q}\lto({\sv P}\lto {\sv R})$;
\item
$({\sv P}\lor {\sv R})\land({\sv Q}\lor\lnot {\sv R})\sim ({\sv P}\land \lnot {\sv R})\lor({\sv Q}\land {\sv R})$;
\item
$(\sv P_0\lor\sv P_1)\land({\sv Q}_0\lor {\sv Q}_1)\sim
\bigvee_{i<2}\bigvee_{j<2}(\sv P_i\land {\sv Q}_j)$.
\end{enumerate}
\item\label{exer:simpdnf}
For $(\lnot P_0\lto P_1)\land(\lnot P_1\lto P_0)\lto(\lnot
P_0\lor\lnot P_1)$, find the disjunctive normal form using simplification.
\item
Use simplification to verify the equivalences listed in
\S~\ref{equivalent}, Exercise~\ref{exer:and+1}.
\item
Use simplification to establish
${\sv P}\liff {\sv Q}\sim {\sv P}\eor\lnot {\sv Q}$.
\end{enumerate}


\section{Logical entailment}\label{sect:entailment}

Simplification is a way to prove that two formulas are logically
equivalent.  There are other relations between formulas that we may
want to prove.
If ${\sv F}$ is an $n$-ary formula such that $\named {\sv F}(\tuple e)$ for all
truth-assignments $\tuple e$, then as in \S~\ref{equivalent} we write
\begin{equation*}
  \models {\sv F}.
\end{equation*}
Suppose $({\sv F}_0,\dots, {\sv F}_m)$ is a list of $m+1$ formulas, each of them $n$-ary,
such that, for all $n$-ary truth-assignments $\tuple e$, if $\named
{\sv F}_i(\tuple e)=1$ for each $i$ in $\{0,\dots,m-1\}$, then $\named
{\sv F}_m(\tuple e)=1$.  Then we say that ${\sv F}_m$ is a 
\textbf{logical consequence}\dindexsub{logic}{---al
  consequence}\dindexsub{consequence}{logical ---} of
$\{{\sv F}_0,\dots,{\sv F}_{m-1}\}$, or $\{\sv F_0,\dots,\sv F_{m-1}\}$ \textbf{logically entails}\dindexsub{logic}{---ally entails}\dindexsub{entails}{logically ---} $\sv F_m$,
and we write
\begin{equation*}
  {\sv F}_0,\dots,{\sv F}_{m-1}\models {\sv F}_m;
\end{equation*}
if the set $\{{\sv F}_0,\dots,{\sv F}_{m-1}\}$ is denoted by $\Sigma$, then we can also write
\begin{equation*}
  \Sigma\models {\sv F}_m.
\end{equation*}

Logical entailment can in principle be established by truth-tables.  However, this method is practical only when the numbers of variables and formulas are low.

\begin{examples}
\begin{enumerate}
\item
$\sv P\models\sv P\lor\sv Q$ because the table 
\begin{equation*}
\begin{array}{ccc}
\sv P&\lor&\sv Q\\\hline
0&0&0\\
1&1&0\\
0&1&1\\
1&1&1
\end{array}
\end{equation*}
shows $\sv P\lor\sv Q$ is true whenever $\sv P$ is true.  (It is
irrelevant that $\sv P\lor\sv Q$ can be true when $\sv P$ is false.) 
\item
Similarly $\sv P,\sv Q\models\sv P\land\sv Q$.
\item
$\sv P\lor\sv Q\fcom\sv Q\lto\sv R\models\sv P\lor\sv R$ by
  consideration of the starred rows in the table: 
\begin{equation*}
\begin{array}{ccc|c|c|cc}
\sv P&\sv Q&\sv R&\sv P\lor\sv Q&\sv Q\lto\sv R&\sv P\lor\sv R& \\\cline{1-6}
0    &0    &0    &0             &1             &0             & \\
1    &0    &0    &1             &1             &1             &*\\
0    &1    &0    &1             &0             &0             & \\
1    &1    &0    &1             &0             &1             & \\
0    &0    &1    &0             &1             &1             & \\
1    &0    &1    &1             &1             &1             &*\\
0    &1    &1    &1             &1             &1             &*\\
1    &1    &1    &1             &1             &1             &*
\end{array}
\end{equation*}
\end{enumerate}
\end{examples}

There are alternative methods for establishing logical entailment.  The following should be compared with Lemma~\ref{lem:LL'ad}.

\begin{lemma}
If $\Sigma\models\sv F$, and $\Sigma\included\Sigma'$, then $\Sigma'\models\sv F$.
\end{lemma}

\begin{proof}
If $\Sigma\models\sv F$, and $\Sigma\included\Sigma'$, and $\tuple e$ is a truth-assignment under which every formula in $\Sigma'$ is true, then every formula in $\Sigma$ is true under $\tuple e$, so $\named{\sv F}(\tuple e)=1$.  This means $\Sigma'\models\sv F$.
\end{proof}

Corresponding to Theorem~\ref{thm:substitution}, we have

\begin{theorem}[Substitution]\label{thm:conseq-sub}
  \dindexsub{substitution}{S--- Theorem}
  \dindexsub{theorem}{Substitution Th---}
  If $({\sv F}_0,\dots,{\sv F}_m)$ is a list of $n$-ary formulas such that
  \begin{equation*}
    {\sv F}_0,\dots,{\sv F}_{m-1}\models {\sv F}_m,
  \end{equation*}
and $({\sv G}_0,\dots,{\sv G}_{n-1})$ is a list of $n$ formulas, then
\begin{equation*}
\sv H_0,\dots,\sv H_{m-1}\models\sv H_m,
\end{equation*}
where $\sv H_i$ is
${\sv F}_i({\sv G}_0,\dots,{\sv G}_{n-1})$ for each $i$ in $\{0,\dots,m\}$.
\end{theorem}

\begin{proof}
Say $\tuple e$ is a
  truth-assignment for the ${\sv G}_j$ such that $\named {\sv H}_i(\tuple e)=1$
  when $i<m$.  Let $f_j=\named {\sv G}_i(\tuple e)$ when $j<n$.  Then
  $\named {\sv F}_i(\tuple f)=1$ when $i<m$, by the associativity of
  substitution (Lemma~\ref{lem:sub}).  Hence also $\named
  {\sv H}_m(\tuple e)=\named {\sv F}_m(\tuple f)=1$ (since ${\sv F}_m$ is a logical
  consequence of $\{{\sv F}_0,\dots,{\sv F}_{m-1}\}$).  Therefore
  ${\sv H}_0,\dots,{\sv H}_{m-1}\models {\sv H}_m$.
\end{proof}

The following is immediate from the definitions.

\begin{theorem}
If $\sv F\sim\sv G$, then $\sv F\models\sv G$.
\end{theorem}

\begin{theorem}[Transitivity]
If $\Sigma$ and $\Pi$ are finite sets of formulas, and $\Sigma\models\sv F$ for every $\sv F$ in $\Pi$, and $\Pi\models\sv G$, then $\Sigma\models\sv G$.
\end{theorem}

\begin{proof}
Under the given assumptions, suppose also that every formula in
$\Sigma$ is true under $\tuple e$.  Then every formula in $\Pi$ is
true under $\tuple e$, so $\named{\sv G}(\tuple e)=1$.  Thus
$\Sigma\models\sv G$. 
\end{proof}

\begin{example}
Since $\sv P\lor\sv Q\fcom\sv Q\lto\sv R\models\sv P\lor\sv R$ as in
the last example, and $\sv Q\lto\sv R\sim\lnot\sv Q\lor\sv R$, we have  
\begin{equation*}
\sv P\lor\sv Q\fcom\lnot\sv Q\lor\sv R\models\sv P\lor\sv R
\end{equation*}
by the last two theorems, hence
$\sv F\lor\sv G,\lnot\sv G\lor\sv H\models\sv F\lor\sv H$ by substitution.
\end{example}

A number of rules for establishing logical entailments correspond to some standard forms of argument in mathematical proofs.

\begin{lemma}\label{lem:proof-forms}
\mbox{}
\begin{compactenum}
  \item
\textbf{Contradiction:}\dindex{contradiction}
If $\Sigma\cup\{\lnot\sv F\}\models\sv G$ and
$\Sigma\cup\{\lnot\sv F\}\models\lnot\sv G$, then
  \begin{equation*}
\Sigma\models\sv F.
  \end{equation*}
  \item
\textbf{Contraposition:}\dindex{contraposition}
If $\Sigma\cup\{\lnot\sv F\}\models\lnot\sv G$, then
\begin{equation*}
\Sigma\cup\{\sv G\}\models\sv F.
\end{equation*}
\item
\textbf{Deduction:}\dindex{deduction}
If $\Sigma\cup\{\sv F\}\models\sv G$, then
\begin{equation*}
\Sigma\models\sv F\lto\sv G.
\end{equation*}
\end{compactenum}
\end{lemma}

\begin{lemma}\label{lem:new-rules}
\mbox{}
  \begin{compactenum}
\item
\textbf{Detachment:}%
\footnote{These rules also have the Latin names 
\textbf{\emph{Modus Ponens}}%
\dindexsub{modus@\emph{modus}}{M--- Ponens@\emph{M--- Ponens}}%
\dindexsub{ponens@\emph{ponens}}{Modus P---@\emph{Modus P---}}
(method of affirming)
and
\textbf{\emph{Modus Tollens}}%
\dindexsub{modus@\emph{modus}}{M--- Tollens@\emph{M--- Tollens}}% 
\dindexsub{tollens@\emph{tollens}}{Modus T---@\emph{Modus T---}}
(method of denying).
}%
\dindex{detachment}
\begin{align*}
&{\sv F}\fcom {\sv F}\lto {\sv G}\models {\sv G},&&
{\sv F}\lto {\sv G}\fcom \lnot {\sv G}\models \lnot {\sv F}.
\end{align*}
\item
\textbf{Simplification:}\dindex{simplification}
\begin{equation*}
  {\sv P}\land {\sv Q}\models {\sv Q}.
  \end{equation*}
  \item
\textbf{Cases:}\dindex{cases} 
\begin{equation*}
{\sv P}\lto {\sv Q}_0\lor\dots\lor {\sv Q}_n\fcom {\sv Q}_0\lto {\sv R}\fcom \dots\fcom {\sv Q}_m\lto
  {\sv R}\models {\sv P}\lto {\sv R}.
  \end{equation*}
  \item
\textbf{Addition:}\dindex{addition}
\begin{align*}
{\sv P}\models {\sv P}\lor {\sv Q},&&{\sv P}\models {\sv Q}\lor {\sv P}.
\end{align*}
  \item
  \textbf{Hypothetical Syllogism:}\dindex{hypothetical
  syllogism}\dindexsub{syllogism}{hypothetical ---}\footnote{A 
  \textbf{syllogism}%
\tindex{syllogism}
 is a classical form of
  argument; Aristotle's definition is quoted in
  Appendix~\ref{Aristotle}.}
  \begin{equation}\label{eqn:hyp-syl}
{\sv P}\lto {\sv Q}\fcom {\sv Q}\lto {\sv R}\models {\sv P}\lto  {\sv R}.
\end{equation} 
  \item
  \textbf{Disjunctive Syllogism:}%
\dindexsub{disjunctive}{--- syllogism}%
\dindexsub{syllogism}{disjunctive ---}
  \begin{align*}
&{\sv P}\lor {\sv Q}\fcom \lnot {\sv P}\models {\sv Q},&
&{\sv P}\lor {\sv Q}\fcom \lnot {\sv Q}\models {\sv P}.
\end{align*}
  \item
  \textbf{Constructive Dilemma:}\dindex{constructive
  dilemma}\dindexsub{dilemma}{constructive ---}
  \begin{equation*}
{\sv P}_0\lto {\sv Q}_0\fcom {\sv P}_1\lto {\sv Q}_1\fcom {\sv P}_0\lor
  {\sv P}_1\models {\sv Q}_0\lor {\sv Q}_1.
  \end{equation*} 
\end{compactenum}
\end{lemma}

\begin{proof}
To prove Detachment, is enough to show
  $P_0\fcom P_0\lto P_1\models P_1$, by the preceding Substitution Theorem.
  The truth-table 
  \begin{equation*}
    \begin{array}{c||c|c|c||c}
P_0 & P_0 & \lto & P_1 & P_1\\ \hline
0&0&1&0&0\\
1&1&0&0&0\\
0&0&1&1&1\\
1&1&1&1&1
    \end{array}
  \end{equation*}
shows that $(1,1)$ is the only truth-assignment where both $P_0$ and
$P_0\lto P_1$ are true. Under this assignment, $P_1$ is true.
\end{proof}


\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item\label{exer:and-models}
Show that ${\sv F}_0\fcom\dots\fcom {\sv F}_{m-1}\models {\sv G}$ if and only if
$\bigwedge_{k<m}{\sv F}_k\models {\sv G}$.
\item
Show that ${\sv P}\lto {\sv Q}\fcom {\sv R}\lto {\sv Q}\fcom \lnot {\sv Q}
\models
\lnot({\sv P}\lor {\sv R})$.
\item
Prove Lemma~\ref{lem:proof-forms}.
\item
Prove Lemma~\ref{lem:new-rules}.
\item
Prove the following:
\begin{enumerate}
\renewcommand{\labelenumii}{(\theenumii)}
\item
${\sv P}\liff {\sv Q}\fcom {\sv Q}\liff {\sv R}\models {\sv P}\liff {\sv R}$;
\item
${\sv P}\eor {\sv Q}\fcom {\sv Q}\eor {\sv R}\models {\sv P}\liff {\sv R}$.
\end{enumerate}
\end{enumerate}

\section{Formal proofs}\label{sect:formal}

For a given propositional logic, a
\textbf{proof-system}\dindexsub{proof}{---{}-system}\dindexsub{system}{proof-{}---}
consists of: 
\begin{compactenum}[1)]
\item
certain distinguished formulas, called
\textbf{axioms;}\dindex{axiom}
\item
\textbf{rules of inference},\dindex{rule of
  inference}\dindexsub{inference}{rule of ---} which are clearly 
  described ways of
obtaining new formulas from finitely many given formulas.
\end{compactenum}

One can think of an axiom $\sv F$ as the rule of inference
that allows $\sv F$ to be obtained from \emph{no} given
formulas.  

Suppose $\psys N$ is a proof-system, and $\Sigma$ is a set $\{{\sv F}_0,\dots,{\sv F}_{m-1}\}$ of formulas.
In $\psys N$, a \textbf{deduction}\dindex{deduction} or
\textbf{formal proof}\dindexsub{formal}{---
  proof}\dindexsub{proof}{formal ---} of a formula ${\sv F}_m$ from
$\Sigma$ is a finite sequence
\begin{equation*}
{\sv G}_0,\dots, {\sv G}_\ell,
\end{equation*}
where ${\sv G}_\ell$ is ${\sv F}_m$ and, for each $k$ in $\{0,\dots,\ell\}$, the
formula ${\sv G}_k$ is:
\begin{compactenum}[1)]
\item
an axiom of $\psys N$, or
\item
one of the formulas ${\sv F}_i$, where $i<m$, or
\item
a formula obtainable from (some of) the formulas in
$\{{\sv G}_0,\dots,{\sv G}_{k-1}\}$ by one of the rules of inference of $\psys N$.
\end{compactenum}
If there is such a deduction, then we may write one of
\begin{align*}
&{\sv F}_0,\dots,{\sv F}_{m-1}\proves[N] {\sv F}_m,&&
\Sigma\proves[N]\sv F_m,
\end{align*}
and we say that ${\sv F}_m$ is \textbf{derivable}\dindex{derivable} or \textbf{formally
  provable}\dindexsub{formal}{---ly
  provable}\dindexsub{provable}{formally ---}
  in $\psys N$ from $\Sigma$, or that
$\Sigma$ \textbf{formally entails}\dindexsub{entails}{formally ---}\dindexsub{formal}{---ly entails} $\sv F_m$ in $\psys N$.
In this case,
$\Sigma$ is a set
of \textbf{hypotheses}\dindex{hypothesis} from which $\sv F_m$ can be derived.  In case $m=0$, we
  write 
  \begin{equation*}
  \proves[N] {\sv F}_0
  \end{equation*} 
  and say that 
${\sv F}_0$ is a \textbf{validity}\dindex{validity} of $\psys N$ or a \textbf{theorem}\dindex{theorem} of $\psys
  N$.  Here $\proves$ is the \textbf{syntactic turnstile.}\dindexsub{syntactic}{--- turnstile}\dindexsub{turnstile}{syntactic ---}
  We may drop the subscript $\psys N$ on $\proves$ if the identity of $\psys N$ is clear.

Many proof-systems are possible.  Some are more useful than others.
As a minimum requirement, we should like a proof-system $\psys N$ to
be
\begin{compactenum}[1)]
\item
  \textbf{sound:}\dindex{sound}
  \begin{equation*}
\Sigma\proves[N] {\sv G}\implies
\Sigma\models {\sv G};
\end{equation*}
  \item
  \textbf{complete:}\dindex{complete}
\begin{equation*}
\Sigma\models {\sv G}\implies\Sigma\proves[N] {\sv G}.
\end{equation*}
\end{compactenum}
The remainder of this section establishes two such systems.

\subsection*{The system of detachment}

Let $\psys D$ be the proof-system in which
\begin{compactenum}[1)]
  \item
the axioms are just the tautologies;
\item
the rules of inference are two:
\begin{compactenum}
\item
if the formula $\sv F$ is given, and $\sv F\sim\sv G$, then $\sv G$ may be obtained;
\item
\textbf{Detachment:}\dindex{detachment}\footnote{Or
 \textbf{\emph{Modus Ponens.}}\dindexsub{modus@\emph{modus}}{M--- Ponens@\emph{M--- Ponens}}
\dindexsub{ponens@\emph{ponens}}{Modus P---@\emph{Modus P---}}}
if the formulas ${\sv F}$ and 
${\sv F}\lto {\sv G}$ are given, then the formula ${\sv G}$ may be obtained. 
\end{compactenum}
\end{compactenum}

\begin{example}\label{example:simplification}
  ${\sv F}\land {\sv G}\proves[D] {\sv G}$, because the following is a deduction in $\psys D$ of ${\sv G}$
  from ${\sv F}\land {\sv G}$:
  \begin{center}
    \begin{tabular}{r c l}
(0) & ${\sv F}\land {\sv G}$ & [hyp.]\\
(1) & $1$ & [taut.]\\
(2) & $\lnot {\sv F}\lor 1$ & [red.]\\
(3) & $\lnot {\sv F}\lor\lnot {\sv G}\lor {\sv G}$ & [red.]\\
(4) & $\lnot({\sv F}\land {\sv G})\lor {\sv G}$ & [De Morgan]\\
(5) & $({\sv F}\land {\sv G})\lto {\sv G}$ & [def'n of $\lto$]\\
(6) & ${\sv G}$ & [Detachment, lines 0 \& 5]
    \end{tabular}
  \end{center}
Strictly, the deduction itself is just the list 
\begin{equation*}
  {\sv F}\land {\sv G}\fcom  
 1\fcom  
 \lnot {\sv F}\lor 1\fcom  
 \lnot {\sv F}\lor \lnot {\sv G}\lor {\sv G}\fcom 
 \lnot({\sv F}\land {\sv G})\lor {\sv G}\fcom  
 ({\sv F}\land {\sv G})\lto {\sv G}\fcom 
 {\sv G}
\end{equation*}
of formulas.  In fact, there is a shorter deduction of ${\sv F}$ from
${\sv F}\land {\sv G}$, namely
\begin{equation*}
  {\sv F}\land {\sv G}\fcom {\sv F}\land {\sv G}\lto {\sv G}\fcom {\sv G}.
\end{equation*}
However, \emph{recognizing} this as a deduction requires, in part, recognizing
that ${\sv F}\land {\sv G}\lto {\sv G}$ is a tautology.
\end{example}

\begin{theorem}\label{thm:S-comp}
The proof-system $\psys D$ is sound and complete.
\end{theorem}

\begin{proof}
  We shall prove the following circle of implications:
  \begin{equation*}
    \xymatrix{
({\sv F}_0,\dots,{\sv F}_{m-1}\models {\sv F}_m) \ar@{=>}[r] & ({}\models {\sv F}_0\lto {\sv F}_1\lto \dots
\lto {\sv F}_m) \ar@{=>}[d]\\
({\sv F}_0,\dots,{\sv F}_{m-1}\proves[D] {\sv F}_m) \ar@{=>}[u] & ({}\proves[D] {\sv F}_0\lto {\sv F}_1\lto \dots
\lto {\sv F}_m) \ar@{=>}[l]
}
  \end{equation*}
Suppose ${\sv F}_0,\dots,{\sv F}_{m-1}\models {\sv F}_m$.  Then for every
truth-assignment $\tuple e$ for the ${\sv F}_i$, either $\named {\sv F}_m(\tuple e)=1$, or
$\named {\sv F}_i(\tuple e)=0$ for some $i$ in $\{0,\dots,m-1\}$.  If
$\named {\sv F}_i(\tuple e)=0$ and $i<m$, then ${\sv F}_i\lto {\sv F}_{i+1}\lto\dots\lto
{\sv F}_m$ is true at $\tuple e$, and hence so is ${\sv F}_0\lto\dots\lto {\sv F}_m$.
For the same reason, if $\named {\sv F}_m(\tuple e)=1$, then ${\sv F}_0\lto\dots\lto
{\sv F}_m$ is true at $\tuple e$.  Hence $\models {\sv F}_0\lto\dots \lto {\sv F}_m$.

Suppose $\models {\sv F}_0\lto\dots\lto {\sv F}_m$.  Then, since it is a tautology,
the formula ${\sv F}_0\lto\dots\lto {\sv F}_m$ is its own proof of itself.  Hence
$\proves[D] {\sv F}_0\lto\dots\lto {\sv F}_m$.

Suppose $\proves[D]
{\sv F}_0\lto\dots\lto {\sv F}_m$.  Let ${\sv G}_0,\dots,{\sv G}_\ell$ be a deduction of
${\sv F}_0\lto\dots\lto {\sv F}_m$.  Then we have the following deduction
of ${\sv F}_m$ from $\{{\sv F}_0,\dots,{\sv F}_{m-1}\}$.  
\begin{center}
  \begin{tabular}{r c l}
($0$)& ${\sv G}_0$ &\\
\dots&\dots&\\
($\ell-1$) & ${\sv G}_{\ell-1}$ \\
($\ell$) & ${\sv F}_0\lto\dots\lto {\sv F}_m$ &\\
($\ell+1$) & ${\sv F}_0$ & [hyp.]\\
($\ell+2$) & ${\sv F}_1\lto\dots\lto {\sv F}_m$ & [Detachment]\\
($\ell+3$) & ${\sv F}_1$ & [hyp.] \\
($\ell+4$) & ${\sv F}_2\lto\dots\lto {\sv F}_m$ & [Detachment]\\
\dots&\dots&\\
($\ell+2m-2$) & ${\sv F}_{m-1}\lto {\sv F}_m$ & [Detachment]\\
($\ell+2m-1$) & ${\sv F}_{m-1}$ & [hyp.]\\
($\ell+2m$) & ${\sv F}_m$ & [Detachment]
  \end{tabular}
\end{center}
Thus ${\sv F}_0,\dots,{\sv F}_{m-1}\proves[D]
{\sv F}_m$. 

Suppose finally  ${\sv F}_0,\dots,{\sv F}_{m-1}\proves[D]
{\sv F}_m$.  We use the method of infinite descent.\index{method!--- of
  infinite descent}\index{infinite!--- descent}\index{descent!infinite
  ---}  Let
${\sv G}_0,\dots,{\sv G}_{\ell-1},{\sv F}_m$ be a deduction of ${\sv F}_m$ from
$\{{\sv F}_0,\dots,{\sv F}_{m-1}\}$.  Let $\tuple e$ be a
truth-assignment such that
$\named {\sv F}_i(\tuple e)=1$ whenever $i<m$.   Suppose if possible that
$\named {\sv F}_m(\tuple 
e)=0$.  Then ${\sv F}_m$ is not in $\{{\sv F}_0,\dots,{\sv F}_{m-1}\}$, nor is ${\sv F}_m$ a
tautology.  Hence, by the definition of a deduction, either ${\sv F}_m\sim
{\sv G}_i$ for some $i$ in $\{0,\dots,\ell-1\}$, or there are $i$ and $j$ in
$\{0,\dots,\ell-1\}$ such that ${\sv G}_j$ is ${\sv G}_i\lto {\sv F}_m$.  In the first
case, ${\sv G}_i$ is false at $\tuple e$; in the second case, either ${\sv G}_i$
or ${\sv G}_j$ is false at $\tuple e$.
In either case,
$\named {\sv G}_k(\tuple e)=0$ for some $k$ in $\{0,\dots,\ell-1\}$.  But
${\sv G}_0,\dots, {\sv G}_k$ is still a deduction from $\{{\sv F}_0,\dots, {\sv F}_{m-1}\}$,
strictly shorter then the original one, but with the same property
(namely that its last formula is false at $\tuple e$).  We cannot take
shorter deductions indefinitely.  Hence $\named {\sv F}_m(\tuple e)=1$.
Therefore ${\sv F}_0,\dots,{\sv F}_{m-1}\models
{\sv F}_m$.  
\end{proof}

The system $\psys D$ can be simplified, at the cost of requiring longer deductions:

\begin{corollary}\label{cor:D}
That proof-system is sound and complete which has only $1$ as an axiom, and which has, as rules of inference,
\begin{compactenum}[(1)]
\item
Deduction,
\item
From $\sv F$, obtain $\sv G$, if $\sv F\sim\sv G$ or $\sv G\sim\sv F$ directly by Lemma~\ref{lem:simp} and Substitution (\ref{thm:substitution}).
\end{compactenum}
\end{corollary}

\subsection*{\L ukasiewicz's proof system}\label{sect:L}

Here is developed the proof-system $\psys{L}$ (named for its inventor \L
ukasiewicz).\index{Lukasiewicz@\L ukasiewicz}  It is of
interest for the simplicity of its definition.  It
involves only formulas in the 
signature $\{\lto,\lnot\}$.  (We know from
\S~\ref{sect:adequacy}, Exercise~\ref{exer:not-to-ad} that
this signature is adequate.)  The only rule of inference of $\psys L$
is Detachment\index{detachment} (as in the definition of $\psys D$ above).  The axioms of $\psys L$ are of three
kinds:\footnote{Frege had an earlier proof-system in this signature
  that used three additional kinds of axioms.}
\begin{compactenum}[1)]

\item\label{item:aff-of-cons}
\textbf{Affirmation of the
  Consequent:}\dindex{affirmation of the
  consequent}\dindexsub{consequent}{affirmation of the ---}
  \begin{equation*}
\proves[L] {\sv F}\lto {\sv G}\lto {\sv F};
\end{equation*}
\item\label{item:self-dist}
\textbf{Self-Distributivity of
  Implication:}\dindexsub{self}{---{}-distributivity of
  implication}\dindexsub{distributivity}{self-{}--- of
  implication}\dindexsub{implication}{self-distributivity of ---}
  \begin{equation*}
\proves[L] ({\sv F}\lto {\sv G}\lto {\sv H}) \lto ({\sv F}\lto {\sv G})\lto {\sv F}\lto {\sv H};
\end{equation*}
\item\label{item:contrap}
  \textbf{Contraposition:}\dindex{contraposition}
  \begin{equation*}
\proves[L](\lnot {\sv F}\lto \lnot {\sv G})\lto {\sv G}\lto {\sv F}.
\end{equation*}
\end{compactenum}

\begin{theorem}\label{thm:L-sound}
System $\psys L$ is sound.
\end{theorem}

To prove
completeness, we shall need the following.

\begin{lemma}\label{lem:FtoF}
$\proves[L]{\sv F}\lto {\sv F}$.
\end{lemma}

\begin{proof}
The formal proof is
\begin{align*}
&{\sv F}\lto {\sv F}\lto {\sv F},\\
&{\sv F}\lto({\sv F}\lto {\sv F})\lto {\sv F},\\
&({\sv F}\lto({\sv F}\lto {\sv F})\lto {\sv F})\lto({\sv F}\lto {\sv F}\lto {\sv F})\lto {\sv F}\lto {\sv F},\\
&({\sv F}\lto {\sv F}\lto {\sv F})\lto {\sv F}\lto {\sv F},\\
&{\sv F}\lto {\sv F},
\end{align*} 
where the first three entries are axioms (\ref{item:aff-of-cons}),
(\ref{item:aff-of-cons}), and (\ref{item:self-dist}) respectively, and the
last two follow by Detachment.
\end{proof}

\begin{lemma}\label{lem:conv-ded}
If ${\sv F}_0,\dots,{\sv F}_{n-1}\proves[L]{\sv G}\lto {\sv H}$, then
${\sv F}_0,\dots,{\sv F}_{n-1},{\sv G}\proves[L]{\sv H}$.  
\end{lemma}

The converse of Lemma \ref{lem:conv-ded} is the following; the proof
is by cases (and the method of infinite descent).

\begin{theorem}[Deduction]\label{thm:deduction}
  \dindexsub{deduction}{D--- Theorem}\dindexsub{theorem}{Deduction Th---}
If ${\sv F}_0,\dots,{\sv F}_{n-1},{\sv G}\proves[L]{\sv H}$, then
\begin{equation*}{\sv F}_0,\dots,{\sv F}_{n-1}\proves[L]{\sv G}\lto {\sv H}.\end{equation*}
\end{theorem}

\begin{proof}
There are three possibilities for ${\sv H}$:

If ${\sv H}$ is an axiom of $\psys L$, or is one of the formulas ${\sv
  F}_i$, then ${\sv F}_0,\dots,{\sv F}_{n-1}\proves[L]{\sv H}$; but
also $\proves[L]{\sv H}\lto {\sv G}\lto {\sv H}$; hence ${\sv
  F}_0,\dots,{\sv F}_{n-1}\proves[L]{\sv G}\lto {\sv H}$ by
Detachment. 

If ${\sv H}$ is ${\sv G}$, then $\proves[L]{\sv G}\lto {\sv H}$ by Lemma
\ref{lem:FtoF}. 

Finally, suppose ${\sv K}_0,\dots,{\sv K}_m$ is the formal proof in
$\psys L$ of ${\sv H}$ from ${\sv F}_0,\dots,{\sv F}_{n-1}$ and ${\sv
  G}$, and suppose the last step in the proof is by Detachment.  (If
it is not, then we have already treated this possibility.)  Then ${\sv
  K}_i$ is ${\sv F}$, and ${\sv K}_j$ is ${\sv F}\lto {\sv H}$, for
some formula ${\sv F}$, and for some $i$ and $j$ that are less than
$m$.  If ${\sv G}\lto {\sv K}_i$ and ${\sv G}\lto {\sv K}_j$ can be
deduced in $\psys L$ from $\{{\sv F}_0,\dots,{\sv F}_{n-1}\}$, then,
by Detachment and the Self-Distributivity Axiom, so can ${\sv G}\lto
{\sv H}$.  Also, both ${\sv K}_i$ and ${\sv K}_j$ have shorter
deductions than ${\sv H}$ in $\psys L$.  Hence, if ${\sv G}\lto {\sv
  H}$ cannot be deduced, then neither can ${\sv G}\lto {\sv K}$ for
some ${\sv K}$ with a shorter deduction than ${\sv H}$, which would be
absurd. 
\end{proof}

\begin{lemma}\label{lem:several}
The following are validities of $\psys L$:
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi)}
\item\label{item:contrad}
$\lnot {\sv G}\lto {\sv G}\lto {\sv F}$;
\item\label{item:double-neg}
$\lnot\lnot {\sv F}\lto {\sv F}$;
\item\label{item:other-way}
${\sv F}\lto\lnot\lnot {\sv F}$;
\item\label{item:other-contrap}
$({\sv F}\lto {\sv G})\lto \lnot {\sv G}\lto\lnot {\sv F}$;
\item\label{item:imp}
${\sv F}\lto\lnot {\sv G}\lto\lnot({\sv F}\lto {\sv G})$.
\item\label{item:two-cases}
$({\sv F}\lto {\sv G})\lto(\lnot {\sv F}\lto {\sv G})\lto {\sv G}$.
\end{enumerate}
\end{lemma}

\begin{proof}
\ref{item:contrad}.  The following is a formal proof in $\psys L$
  from $\lnot {\sv G}$: 
\begin{equation*}\lnot {\sv G}\fcom \lnot {\sv G}\lto (\lnot {\sv F}\lto \lnot {\sv G})\fcom \lnot {\sv F}\lto \lnot {\sv G}\fcom \lnot {\sv F}\lto\lnot
{\sv G}\lto ({\sv G}\lto {\sv F})\fcom {\sv G}\lto {\sv F}.\end{equation*}
So $\lnot {\sv G}\proves[L]{\sv G}\lto {\sv F}$.
By the Deduction Theorem, the claim follows.

\ref{item:double-neg}. By part (\ref{item:contrad}) (and Lemma
\ref{lem:conv-ded}) we have 
$\lnot\lnot {\sv F}\proves[L]\lnot {\sv F}\lto\lnot\lnot\lnot {\sv
  F}$.  Use contraposition to 
get $\lnot\lnot {\sv F}\proves[L]{\sv F}$, then use the Deduction Theorem to get
the claim.
\end{proof}

We know how to evaluate a formula at a given truth-assignment.  The
following shows that we can prove in $\psys L$ the correctness of our
computation.

\begin{theorem}\label{thm:eval-corr}
Let ${\sv F}$ be an $n$-ary formula in the signature $\{\lto,\lnot\}$.  Let
$\tuple e$ be a truth-assignment for ${\sv F}$.  Define
\begin{equation*}P_i'=
\begin{cases}
P_i, &\text{ if }e_i=1;\\
\lnot P_i, &\text{ if }e_i=0;
\end{cases}
\quad\text{ and }\quad
{\sv F}'=
\begin{cases}
{\sv F},&\text{ if }\named {\sv F}(\tuple e)=1;\\
\lnot {\sv F}, &\text{ if }\named {\sv F}(\tuple e)=0.
\end{cases}
\end{equation*}
Then $P_0'\fcom \dots\fcom P_{n-1}'\proves[L] {\sv F}'$.
\end{theorem}

\begin{proof}
If ${\sv F}$ is $P_i$, then $P_i'$ is ${\sv F}'$, so
$P_0'\fcom \dots\fcom P_{n-1}'\proves[L] {\sv F}'$. 

Now we can suppose ${\sv F}$ is not just a variable, and use infinite
descent.\index{infinite!--- descent}\index{descent!infinite ---}  So,
assume ${\sv F}'$ is \emph{not} deducible in $\psys L$ from
$P_0'\fcom \dots\fcom P_{n-1}'$.  There are two cases:

Say ${\sv F}$ is $\lnot {\sv G}$ for some formula ${\sv G}$.  Then
\begin{equation*}{\sv F}'=\begin{cases}
{\sv G}',& \text{ if }\named {\sv F}(\tuple e)=1;\\
\lnot\lnot {\sv G}', &\text{ if }\named {\sv F}(\tuple e)=0.
\end{cases}
\end{equation*}
Hence ${\sv G}'$ is also not deducible; but ${\sv G}$ is shorter then ${\sv F}$.

Say ${\sv F}$ is ${\sv G}\lto {\sv H}$ for some formulas ${\sv G}$ and ${\sv H}$.  Then
\begin{align*}
P_0',\dots,P_{n-1}'&\proves[L] {\sv G}'&
P_0',\dots,P_{n-1}'&\proves[L] {\sv H}'.
\end{align*}  
There are three sub-cases to
consider, according as
\begin{compactenum}[1)]
  \item
${\sv G}'$ is $\lnot {\sv G}$, or
\item
 ${\sv H}'$ is ${\sv H}$, or
\item
 ${\sv G}'$ is ${\sv G}$ and ${\sv H}'$ is $\lnot {\sv H}$.\qedhere
\end{compactenum}
\end{proof}

\begin{corollary}
The proof-system $\psys L$ is complete.
\end{corollary}

\begin{proof}
Suppose ${\sv F}_0,\dots,{\sv F}_{m-1}\models {\sv F}_m$, the formulas being $n$-ary.
Let ${\sv G}$ be the tautology ${\sv F}_0\lto\dots \lto {\sv F}_m$.  Then for all $n$-ary
truth-assignments $\tuple e$, we have
\begin{equation*}
P_0'\dots,P_{n-1}'\proves[L]{\sv G}.
\end{equation*}
If $n=0$, we are done.  If $n>0$, then by the Deduction Theorem we
have
\begin{align*}
P_0'\dots,P_{n-2}'&\proves[L]P_{n-1}\lto {\sv G},&
P_0'\dots,P_{n-2}'&\proves[L]\lnot P_{n-1}\lto {\sv G},
\end{align*}
so $P_0'\dots,P_{n-2}'\proves[L]{\sv G}$ by
Lemma~\ref{lem:several}~\eqref{item:two-cases}.  Continuing, we find
$\proves[L]{\sv G}$, so ${\sv F}_0,\dots,{\sv F}_{m-1}\proves[L]{\sv F}_m$.
\end{proof}

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Prove Corollary~\ref{cor:D}.
\item
Prove Theorem~\ref{thm:L-sound}.
\item
Prove Lemma \ref{lem:conv-ded}.
\item
Prove parts  
(\ref{item:other-way}), (\ref{item:other-contrap}),
(\ref{item:imp}) and (\ref{item:two-cases}) of Lemma
\ref{lem:several}.
\item
Supply the missing details in the proof of Theorem
\ref{thm:eval-corr}. 
\end{enumerate}


\section{Compactness}\label{sect:compactness}

So far, we have dealt with only finitely many formulas at once.  But
suppose $\Ey$ is a possibly infinite set of formulas.  If $\psys N$ is a proof-system, then the expression
\begin{equation*}
\Ey\proves[N]\sv F
\end{equation*}
has the same meaning as before.  So does the expression
\begin{equation}\label{eqn:Ey}
\Ey\models {\sv F},
\end{equation}
except that there may be no $n$ such that each formula in $\Ey\cup\{\sv F\}$ is $n$-ary.

A \textbf{truth-assignment,}\dindexsub{truth}{---{}-assignment} simply, is a function from $\N$ to $\B$.  Then $\Ey$ is \textbf{satisfied}\dindex{satisfied} by a truth-assignment $k\mapsto e_k$ if $\named{\sv G}(e_0,\dots,e_{n-1})=1$ for every $n$-ary formula $\sv G$ in $\Ey$, for every $n$ in $\N$; and $\sv F$ is \textbf{true}\dindex{true} in this assignment if $\named{\sv F}(e_0,\dots,e_{n-1})=1$ (assuming $\sv F$ is $n$-ary).  Then~\eqref{eqn:Ey} holds, by definition, if $\sv F$ is true in every truth-assignment that satisfies $\Ey$.
  Hence the following are
equivalent:
\begin{compactenum}[1)]
  \item
$\Ey$ does not logically entail $\sv F$;
\item
$\Ey\cup\{\lnot {\sv F}\}$ is
\textbf{satisfiable}\dindex{satisfiable} (that is, satisfied by some truth-assignment; note
that this definition is compatible with the one in \S \ref{equivalent}).
\end{compactenum}

\begin{theorem}[Compactness]\label{thm:compactness}%
\dindex{Compactness Theorem}%
\dindexsub{theorem}{Compactness Th---}
If every finite subset of a set of formulas is satisfiable, then the
whole set is satisfiable.
\end{theorem}

\begin{proof}
Suppose $\Ey$ is an infinite set of formulas such that every finite
subset of $\Ey$ is satisfiable.  For any $n$ in $\N$, let $\Ey_n$
consist of the $n$-ary formulas in $\Ey$.  Then $\Ey_n$ is
finite, so it is satisfiable by assumption.  In particular, $\Ey_n$
is satisfied 
by a certain truth-assignment 
\begin{equation*}(e_0^n,e_1^n,\dots,e_{n-1}^n).\end{equation*}
For each $n$, let such an assignment be chosen.  So for each pair
$(i,n)$ of natural numbers, if $i<n$, then we have
chosen an a certain element $e_i^n$ of $\B$.

In the following way, we can recursively define an infinite
truth-assignment $(e_0,e_1,e_2,\dots)$ satisfying $\Ey$.  Suppose
$(e_0,\dots,e_{k-1})$ has been chosen so that there are
\emph{infinitely many} values of $n$ such that $k\leq n$ and
\begin{equation*}(e_0^n,\dots,e_{k-1}^n)=(e_0,\dots,e_{k-1}).\end{equation*}
(This is a trivial assumption if
$k=0$.)  Then choose $e_k$ so that
\begin{equation*}(e_0^n,\dots,e_{k}^n)=(e_0,\dots,e_{k})\end{equation*}
for infinitely many values of $n$.  (Why does this $e_k$ exist?)

We now have that $(e_0,\dots,e_{n-1})$ satisfies $\Ey_n$ for each $n$
in $\N$.  Therefore the whole assignment $(e_0,e_1,\dots)$
satisfies $\Ey$. 
\end{proof}

\begin{corollary}
If $\psys N$ is a sound, complete proof-system, then 
\begin{equation*}
\Ey\models {\sv F}\iff\Ey\proves[N]{\sv F}
\end{equation*} 
for all formulas ${\sv F}$ and sets $\Ey$ of formulas.
\end{corollary}

\begin{proof}
If $\Ey\proves[N] {\sv F}$, then ${\sv G}_0,\dots,{\sv G}_{m-1}\proves[N] {\sv F}$ 
for some formulas
${\sv G}_i$ in $\Ey$, since proofs are finite.  Hence
${\sv G}_0,\dots,{\sv G}_{m-1}\models {\sv F}$,
so $\Ey\models {\sv F}$.

If ${\sv F}$ is not derivable in $\psys N$ from $\Ey$, then it is not
derivable from any 
finite subset of $\Ey$.  This means ${\sv F}$ is not a consequence of any
finite subset of $\Ey$, which means that every finite subset of
\begin{equation*}
  \Ey\cup\{\lnot {\sv F}\}
\end{equation*}
is satisfiable.  Hence the whole set is satisfiable by the Compactness
Theorem, so ${\sv F}$ is not a consequence of $\Ey$.
\end{proof}

\subsection*{Exercise}

Identify the parts of the proof of Theorem \ref{thm:compactness} that
do not seem fully justified, and justify them if you can.



%\input{chapter-sets.tex}

\chapter{Sets and Relations}\label{ch:sets}
\setcounter{section}{-1}
\section{Boolean operations on sets}\label{sect:Boole}

As observed in \S~\ref{sect:p-formulas}, propositional logic is a
model\index{model} of the use of conjunctions in ordinary language.  A basic
\emph{application} of propositional logic is to \emph{sets.}  In fact, the sets that will be discussed here need only be classes; I call them sets, because this is the usual terminology.

As in \S\S~\ref{sect:sets} and~\ref{sect:quantifiers}, suppose $\universe$ is some large set---a \textbf{universal
  set}\dindexsub{universal}{--- set}\dindexsub{set}{universal ---}, which
will include all of the other sets that we shall work with.
Again, by the Axiom of Separation,~\ref{ax:separation}, if
$P$ is a predicate and $\universe$ is a set, then we can form a set
\begin{equation}\label{eqn:set}
  \{x\in\universe\colon Px\}.
\end{equation}
We have not yet said much about what $P$ might be.  Now we do.

If $A\included\universe$ and
$c\in\universe$, then we can form the proposition
\begin{equation*}
  c\in A,
\end{equation*}
which is either true or false.  We can analyze this proposition into
two parts:
\begin{center}
  \begin{tabular}{|r|l|}\hline
$c$ & $\in A$\\\hline
subject & predicate\\\hline
  \end{tabular}
\end{center}
With the predicate $\in A$ and an
\textbf{individual variable}\dindex{individual
  variable}\dindexsub{variable}{individual ---}, $x$, we can make the 
\textsl{formula}
\begin{equation*}
  x\in A.
\end{equation*}
This is not a \emph{propositional} formula, since $\in$ is not a
symbol of propositional logic.  Let us call the formula a
\textbf{set-theoretic formula}%
\dindexsub{formula}{set-theoretic ---}%
\dindexsub{set}{---{}-theoretic formula}  or an
\textbf{$\in$-formula.}%
\dindexsub{formula}{epsilon-{}---@$\in$-{}---}
In particular, it is an $\in$-formula with $A$ as a
\textbf{parameter.}%
\index{parameter}  
We may replace this parameter
with other sets, but for now, our only individual variable
will be $x$.  We shall allow more variables in \S~\ref{sect:cartesian}.

Meanwhile, we can create new $\in$-formulas in $x$ from formulas $x\in
A$, just as we create new propositional formulas from the
propositional variables $P_k$.  So $0$ and $1$, along with $x\in A$,
are $\in$-formulas in $x$, and if $\phi(x)$ and $\psi(x)$ are
arbitrary $\in$-formulas in $x$, then so are $\lnot\phi(x)$ and
$(\phi(x)*\psi(x))$, where $*$ is one of $\land$, $\lor$, $\eor$,
$\lto$, and $\liff$. 
Then each $\in$-formula in $x$ can be written as
\begin{equation*}
\sv F(x\in A_0,\dots,x\in A_{n-1})
\end{equation*}
for some $n$ in $\N$, where $\sv F$ is an $n$-ary propositional formula, and each parameter $A_k$ is a set.  We shall always choose these parameters from among the subsets of $\universe$.

Suppose $\phi(x)$ is an $\in$-formula, and $c\in\universe$.  Then we can obtain the \textbf{$\in$-sentence}\index{sentence} $\phi(c)$, which is the result of replacing each $x$ in $\phi(x)$ with $c$.  This sentence is true or false.  Indeed, the sentence $c\in A$ is true if and only if $c$ is in $A$; and $\lnot\phi(c)$ is true if and only if $\phi(c)$ is false; and $(\phi(c)\land\psi(d))$ is true if and only if both $\phi(c)$ and $\psi(c)$ are true; and so forth.  Alternatively, if $\phi(x)$ is $\sv F(x\in A_0,\dots,x\in A_{n-1})$, then $\phi(c)$ is true if and only if
\begin{equation*}
\named{\sv F}(\tuple e)=1,
\end{equation*}
where $\tuple e$ is defined by
\begin{equation}\label{eqn:e}
e_k=\begin{cases}
1,&\text{ if }c\in A_k,\\
0,&\text{ if }c\notin A_k.
\end{cases}
\end{equation}

Now each $\in$-formula $\phi(x)$ can be understood as a predicate applied to $x$.  By the Axiom of Separation (\ref{ax:separation}) then, the formula \textbf{defines}\index{defines} a subset of $\universe$, namely
\begin{equation*}
\{x\in\universe\colon \phi(x)\},
\end{equation*}
comprising those $c$ in $\universe$ such that $\phi(c)$ is true.

In particular, the set $\{x\in\universe\colon x\in A\}$ is just $A$ itself.  We usually write the negation $\lnot\ x\in A$ as
\begin{equation*}
x\notin A.
\end{equation*}
Then by \S~\ref{sect:quantifiers}, this formula defines the \textbf{complement}\dindex{complement} of $A$
in~$\universe$: 
\begin{equation*}
  \{x\in\universe\colon x\notin A\}=A\comp.
\end{equation*}
Suppose also $B\included\universe$.  
Using both of the formulas $x\in A$ and $x\in B$, we obtain the
following standard combinations:
\begin{equation*}
\{x\in \universe\colon x\in A\land x\in B\}=A\cap B,\glossary{$A\cap B$}
\end{equation*}
 the \textbf{intersection}\dindex{intersection} of $A$ and~$B$, which contains everything that is
    in \emph{both} $A$ and~$B$;
\begin{equation*}
\{x\in \universe\colon x\in A\lor x\in B\}=A\cup B, 
\end{equation*}
the \textbf{union}\dindex{union} of
    $A$ and~$B$, which contains everything that is 
    in (at least) \emph{one} of $A$ and~$B$ (the union was defined first in
    \S~\ref{sect:sets});  
\begin{equation*}
\{x\in \universe\colon x\in A\eor x\in B\}=A\symdiff
    B,\glossary{$A\symdiff B$} 
\end{equation*}
the
    \textbf{symmetric difference}\dindexsub{symmetric}{---
    difference}\dindexsub{difference}{symmetric ---} of
    $A$ and~$B$, which contains everything that is 
    in \emph{exactly one} of $A$ and~$B$; 
\begin{equation*}
\{x\in \universe\colon x\in A\land x\notin B\}=A\setminus
    B,\glossary{$A\setminus B$}
\end{equation*}
     the
    \textbf{difference}\dindex{difference} of $A$ and~$B$, which contains everything that is
    in $A$, but \emph{not} in~$B$.

Pictures of these combinations are in Figure~\ref{figure:sets}.
\begin{figure}[t!]
\begin{center}
  \setlength{\unitlength}{0.3pt}
  \begin{picture}(180,240)
\put(0,190){$A$}
\put(10,80){$A$}
\put(0,0){\line(1,0){180}}
\put(0,0){\line(0,1){180}}
\put(0,180){\line(1,0){180}}
\put(180,0){\line(0,1){180}}
\put(90,90){\circle{100}}
\put(40,90){\line(1,0){100}}
\put(45,110){\line(1,0){90}}
\put(45,70){\line(1,0){90}}
\put(60,130){\line(1,0){60}}
\put(60,50){\line(1,0){60}}
\end{picture}\qquad\qquad
  \begin{picture}(240,240)
\put(0,190){$A\cap B$}
\put(45,80){$A$}
\put(170,80){$B$}
\put(0,0){\line(1,0){240}}
\put(0,0){\line(0,1){180}}
\put(0,180){\line(1,0){240}}
\put(240,0){\line(0,1){180}}
\put(90,90){\circle{100}}
\put(150,90){\circle{100}}
\put(100,90){\line(1,0){40}}
\put(105,110){\line(1,0){30}}
\put(105,70){\line(1,0){30}}
\end{picture}\qquad\qquad
  \begin{picture}(240,240)
\put(0,190){$A\cup B$}
\put(10,80){$A$}
\put(205,80){$B$}
\put(0,0){\line(1,0){240}}
\put(0,0){\line(0,1){180}}
\put(0,180){\line(1,0){240}}
\put(240,0){\line(0,1){180}}
\put(90,90){\circle{100}}
\put(150,90){\circle{100}}
\put(40,90){\line(1,0){160}}
\put(45,110){\line(1,0){150}}
\put(45,70){\line(1,0){150}}
\put(60,130){\line(1,0){120}}
\put(60,50){\line(1,0){120}}
\end{picture}\\\mbox{}\\
  \begin{picture}(180,240)
\put(0,190){$A\comp$}
\put(45,80){$A$}
\put(0,0){\line(1,0){180}}
\put(0,0){\line(0,1){180}}
\put(0,180){\line(1,0){180}}
\put(180,0){\line(0,1){180}}
\put(90,90){\circle{100}}
\put(0,90){\line(1,0){40}}
\put(140,90){\line(1,0){40}}
\put(0,110){\line(1,0){45}}
\put(135,110){\line(1,0){45}}
\put(0,70){\line(1,0){45}}
\put(135,70){\line(1,0){45}}
\put(0,130){\line(1,0){60}}
\put(120,130){\line(1,0){60}}
\put(0,50){\line(1,0){60}}
\put(120,50){\line(1,0){60}}
\put(0,150){\line(1,0){180}}
\put(0,30){\line(1,0){180}}
\put(0,170){\line(1,0){180}}
\put(0,10){\line(1,0){180}}
\end{picture}\qquad\qquad
  \begin{picture}(240,240)
\put(0,190){$A\symdiff B$}
\put(10,80){$A$}
\put(205,80){$B$}
\put(0,0){\line(1,0){240}}
\put(0,0){\line(0,1){180}}
\put(0,180){\line(1,0){240}}
\put(240,0){\line(0,1){180}}
\put(90,90){\circle{100}}
\put(150,90){\circle{100}}
\put(40,90){\line(1,0){60}}
\put(140,90){\line(1,0){60}}
\put(45,110){\line(1,0){60}}
\put(135,110){\line(1,0){60}}
\put(45,70){\line(1,0){60}}
\put(135,70){\line(1,0){60}}
\put(60,130){\line(1,0){120}}
\put(60,50){\line(1,0){120}}
\end{picture}\qquad\qquad
  \begin{picture}(240,240)
\put(0,190){$A \setminus B$}
\put(10,80){$A$}
\put(170,80){$B$}
\put(0,0){\line(1,0){240}}
\put(0,0){\line(0,1){180}}
\put(0,180){\line(1,0){240}}
\put(240,0){\line(0,1){180}}
\put(90,90){\circle{100}}
\put(150,90){\circle{100}}
\put(40,90){\line(1,0){60}}
%\put(140,90){\line(1,0){60}}
\put(45,110){\line(1,0){60}}
%\put(135,110){\line(1,0){60}}
\put(45,70){\line(1,0){60}}
%\put(135,70){\line(1,0){60}}
\put(60,130){\line(1,0){60}}
\put(60,50){\line(1,0){60}}
\end{picture}
\end{center}
\caption{Venn diagrams of combinations of sets}
\label{figure:sets}
\end{figure}
The symbols ${}\comp$, $\cap$, $\cup$, $\symdiff$, and~$\setminus$,
along with $\emptyset$, stand for \textbf{Boolean
  operations}\dindexsub{Boolean}{---
  operation}\dindexsub{operation}{Boolean ---}.  If $\phi(x)$ is $\sv F(x\in A_0,\dots,x\in A_{n-1})$, then $\{x\in\universe\colon \phi(x)\}$ is a \textbf{Boolean combination}\dindexsub{Boolean}{--- combination}\dindexsub{combination}{Boolean ---} of the sets $A_k$.  A set is a Boolean combination of itself; beyond this,
there are two trivial Boolean combinations:
\begin{align*}
    \{x\in \universe\colon 0\} 
&=\emptyset,&
  \{x\in \universe\colon 1\} 
&=\universe.
\end{align*}
We now have a sort of correspondence between propositional logic and
set-theory: 
\begin{center}
  \begin{tabular}{rcl}
$\land$            & $\leftrightsquigarrow$ & $\cap$\\ 
$\lor$ & $\leftrightsquigarrow$ & $\cup$\\ 
$\eor$      & $\leftrightsquigarrow$ & $\symdiff$\\ 
$\lnot$            & $\leftrightsquigarrow$ & ${}\comp$\\
$0$ & $\leftrightsquigarrow$ & $\emptyset$\\ 
$1$ & $\leftrightsquigarrow$ & $\universe$
  \end{tabular}
\end{center}
The set $\universe$ depends on the situation.

We can determine membership in Boolean combinations of sets by means
of truth-tables:

\begin{example}\label{example:to}
  From the truth-table
  \begin{equation*}
    \begin{array}{c|c|c}
      \sv P & \lto & {\sv Q}\\\hline
0&1&0\\
1&0&0\\
0&1&1\\
1&1&1
    \end{array},
  \end{equation*}
by considering the lines where the formula $\sv P\lto {\sv Q}$ takes the value
$1$, 
we can conclude that the set $\{x\in\universe\colon x\in {A}\lto x\in {B}\}$
consists of those $c$ in $\universe$ such that one of the following
holds:
\begin{compactenum}[1)]
  \item
$c\notin {A}\amp c\notin {B}$, or
\item
$c\notin {A}\amp c\in {B}$, or
\item
$c\in {A}\amp c\in {B}$.
\end{compactenum}
Alternatively, from the line of the truth-table where $\sv P\lto {\sv Q}$ takes
the value $0$, we
conclude that  the set $\{x\in\universe\colon x\in A\lto x\in {B}\}$ consists
of those $c$ such that either $c\notin A$ or $c\in {B}$.
\end{example}

The foregoing example should recall the notions of disjunctive and
conjunctive normal forms%
\index{disjunctive!--- normal form}%
\index{normal!disjunctive --- form}%
\index{conjunctive normal form}%
\index{normal!conjunctive --- form}
  in \S~\ref{normal}. 

The Axiom of Extension, \ref{axiom:extension}, \index{axiom!A--- of
  Extension}\index{extension!Axiom of E---} is that sets
are determined by their members.  That is,
two subsets $A$ and~$B$ of $\universe$ are equal if
\begin{equation}\label{eqn:extension}
  c\in A\iff c\in B
\end{equation}
for all $c$ in $\universe$.  Strictly, we need this to conclude $A=\{x\in\universe:x\in A\}$.
The \emph{converse}\index{converse} of the Extension Axiom is obviously true:
If two sets are
equal, then in particular, they have the same members.  

\begin{theorem}\label{thm:ext-sets}
Two subsets $A$ and $B$ of $\universe$ are equal if and only if
\begin{equation}\label{ax:ext}
  \{x\in \universe\colon x\in A\liff x\in B\}=\universe.
\end{equation}
\end{theorem}

\begin{proof}
If~\eqref{ax:ext} holds,
then, for all $c$ in $\universe$, the
sentence $c\in A$ is true if and only if the sentence $c\in B$
is true---that is,~\eqref{eqn:extension} holds, so $A=B$
by the Axiom of Extension.
Conversely, if $A=B$, then the two members of~\eqref{ax:ext} have the same elements, so the equation is true by
the Axiom.  
\end{proof}

Another consequence of the Axiom of Extension is that equivalent propositional formulas give rise to equal sets in the following sense.

\begin{theorem}\label{thm:equal-sets}
  Suppose ${\sv F}_0$ and ${\sv F}_1$ are $n$-ary propositional formulas such that
  \begin{equation*}
{\sv F}_0\sim {\sv F}_1.
  \end{equation*}    
When $e\in\B$, let $\phi_e(x)$ be the
  $\in$-formula ${\sv F}_e(x\in A_0\fcom \dots\fcom x\in A_{n-1})$.
Then 
\begin{equation}\label{eqn:F01}
  \{x\in\universe\colon \phi_0(x)\}=\{x\in\universe\colon \phi_1(x)\}.
\end{equation}
\end{theorem}

\begin{proof}
If $c\in \universe$, let the $n$-ary truth-assignment $\tuple e$ be as
  defined by~\eqref{eqn:e} above.  Then
  \begin{equation*}
    c\in\{x\in\universe\colon \phi_0(x)\}\iff \named {\sv F}_0(\tuple e)=1
\iff \named {\sv F}_1(\tuple e)=1\iff c\in\{x\in\universe\colon \phi_1(x)\},
  \end{equation*}
  so~\eqref{eqn:F01} holds.
\end{proof}

Hence for example we have 
\begin{equation*}
A\comp=\universe\setminus A.
\end{equation*}
Equation~\eqref{eqn:F01} is an
\textbf{identity,}\dindex{identity} 
more precisely a
\textbf{set-theoretic identity,}%
\dindexsub{set}{---{}-theoretic identity}
because it holds for all choices
of $A_0$, \dots, $A_{n-1}$.  

\begin{example*}[\ref{example:to} continued]
  Because $\sv P\lto {\sv Q}\sim\lnot \sv P\lor {\sv Q}$, we have
\begin{equation*}  
\{x\in\universe\colon x\in A\lto x\in B\}=\{x\in\universe\colon x\notin A\lor
    x\in B\}.
    \end{equation*}
\end{example*}

Again, we have 
$\{x\in\universe\colon x\in A\land x\in B\}=A\cap B$
by definition; therefore, it seems obvious that
\begin{equation}\label{eqn:xfp}
  \{x\in \universe\colon \phi(x)\land\psi(x)\} 
=\{x\in\universe\colon \phi(x)\}\cap\{x\in\universe\colon \psi(x)\}
\end{equation}
for all $\in$-formulas $\phi(x)$ and $\psi(x)$.
However,~\eqref{eqn:xfp} is not immediate, since the formulas $x\in A$
and $x\in B$ are only special cases of $\in$-formulas.  We can
prove~\eqref{eqn:xfp} by letting $\{x\in\universe\colon \phi(x)\}=A$
and $\{x\in\universe\colon \psi(x)\}=B$.  Then 
\begin{align*}
c\in\{x\in \universe\colon \phi(x)\land\psi(x)\}
&\iff \phi(c)\land\psi(c)\text{ is true}\\
&\iff \phi(c)\text{ is true and }\psi(c)\text{ is true}\\
&\iff c\in A\text{ is true and } c\in B\text{ is true}\\
&\iff c\in A\land c\in B\text{ is true}\\
&\iff c\in\{x\in\universe\colon x\in A\land x\in B\}\\
&\iff c\in A\cap B\\
&\iff c\in \{x\in\universe\colon \phi(x)\}\cap\{x\in\universe\colon \psi(x)\},
\end{align*}
so~\eqref{eqn:xfp} follows.

We can also obtain~\eqref{eqn:xfp} from
\begin{equation*}
\{x\in\universe\colon x\in A\land x\in B\}=\{x\in\universe\colon x\in A\}\cap\{x\in\universe\colon x\in B\}
\end{equation*}
by \emph{replacing} the formula $x\in A$ with $\phi(x)$, and $x\in B$ with $\psi(x)$.
That such an action preserves equality is a consequence of the following, which should be compared with
Theorem~\ref{thm:replacement}.

  \begin{theorem}[Replacement]\label{thm:set-replacement}%
    \dindexsub{replace}{R---ment Theorem}
    \dindexsub{theorem}{Replacement Th---}
    Suppose ${\sv F}$ is a sub-formula of
    the $n$-ary formula ${\sv G}$, so that ${\sv G}$ itself is
    ${\sv H}(P_0,\dots,P_{n-1},{\sv F})$ for some formula ${\sv H}$.  Let
    \begin{equation*}
      B=\{x\in\universe\colon {\sv F}(x\in A_0\fcom \dots\fcom x\in A_{n-1})\}.
    \end{equation*}
Then the set $\{x\in\universe\colon {\sv G}(x\in A_0\fcom \dots\fcom x\in
  A_{n-1})\}$ is equal to
\begin{equation*}
      \{x\in\universe\colon {\sv H}(x\in A_0\fcom \dots\fcom x\in A_{n-1}\fcom x\in B)\}.
\end{equation*}
  \end{theorem}

  \begin{theorem}\label{thm:f-to-set}
For all $\in$-formulas $\phi(x)$ and $\psi(x)$,
    \begin{align}\label{eqn:pp-cap}
  \{x\in \universe\colon \phi(x)\land\psi(x)\} 
&=\{x\in\universe\colon \phi(x)\}\cap\{x\in\universe\colon \psi(x)\},\\\notag
  \{x\in \universe\colon \phi(x)\lor\psi(x)\} 
&=\{x\in\universe\colon \phi(x)\}\cup\{x\in\universe\colon \psi(x)\},\\\notag
  \{x\in \universe\colon \phi(x)\eor\psi(x)\} 
&=\{x\in\universe\colon \phi(x)\}\symdiff\{x\in\universe\colon \psi(x)\},\\\notag
  \{x\in \universe\colon \lnot\phi(x)\} 
&=\{x\in\universe\colon \phi(x)\}\comp.
\end{align}
  \end{theorem}

  \begin{proof}
We have already proved~\eqref{eqn:pp-cap}; but to obtain it from Theorem~\ref{thm:set-replacement}, we can argue as follows.
    Let $A=\{x\in\universe\colon \phi(x)\}$ and
    $B=\{x\in\universe\colon \psi(x)\}$, and let ${\sv H}$ be the binary formula
    $P_0\land P_1$.  Then
    \begin{align*}
&\phantom{{}={}}\{x\in \universe\colon \phi(x)\land\psi(x)\}&& \\
&=\{x\in\universe\colon {\sv H}(\phi(x),\psi(x))\}&&\text{[by def'n of ${\sv H}$]}\\
&=\{x\in\universe\colon {\sv H}(x\in A\fcom x\in B)\} &&\text{[by Replacement]}\\
&=\{x\in\universe\colon x\in A\land x\in B\}&&\text{[by def'n of ${\sv H}$]}\\
&=A\cap B&&\text{[by def'n of $\cap$]}\\
&=\{x\in\universe\colon \phi(x)\}\cap\{x\in\universe\colon \psi(x)\} &&\text{[by
    def'n of $A$ and $B$]}.
    \end{align*}
The other identities are established likewise.
  \end{proof}

\begin{example*}[\ref{example:to} continued~again]
We now have
  \begin{align*}
\{x\in \universe\colon x\in A\lto x\in B\}
&=\{x\in\universe\colon x\notin A\lor x\in B\}\\
&=\{x\in\universe\colon x\notin A\}\cup\{x\in\universe\colon x\in B\}\\
&=A\comp\cup B,
  \end{align*}
and similarly, $\{x\in \universe\colon x\in A\lto x\in B\}=
    (A\comp\cap B\comp)\cup(A\comp\cap B)\cup(A\cap B)$.
Hence the equation
\begin{equation*}
A\comp\cup B=(A\comp\cap B\comp)\cup(A\comp\cap B)\cup(A\cap B)
\end{equation*}
is an identity.
\end{example*}

\begin{example}
  From the truth-table
  \begin{equation*}
    \begin{array}{c|c|c|c|c}
      \sv P & \land & ({\sv Q} & \lor & {\sv R})\\ \hline
0&0&0&0&0\\
1&0&0&0&0\\
0&0&1&1&0\\
1&1&1&1&0\\
0&0&0&1&1\\
1&1&0&1&1\\
0&0&1&1&1\\
1&1&1&1&1
    \end{array}
  \end{equation*}
we can infer that the set
$\{x\in\universe\colon x\in A\land(x\in B\lor x\in C)\}$ is precisely
\begin{equation*}
  (A\cap B\cap C\comp)\cup(A\cap B\comp\cap C)\cup(A\cap B\cap C);
\end{equation*}
alternatively, the set is
$A\cap\{x\in\universe\colon x\in B\lor x\in C\}$, which is
$A\cap (B\cup C)$.
\end{example}

As a consequence of Lemmas~\ref{lem:simp} and~\ref{lem:abs-laws}, we have:

\begin{lemma}\label{lem:set-identities}
The following are set-theoretic identities.
  \begin{compactenum}
\item
\textbf{Definition:}
\begin{align}
A\symdiff B&=(A\cup B)\setminus (A\cap B)\\
&=(A\setminus B)\cup(B\setminus A),\\ \label{eqn:sm1}
  A\setminus B&=A\cap B\comp;
\end{align}
 \item
\textbf{Double complementation:}
\begin{gather}
     A\comp{}\comp  = A;
\end{gather}
\item
\textbf{De Morgan's Laws:} \dindex{De Morgan's Laws}
\dindexsub{law}{De Morgan's L---s}
\begin{equation}
  \label{eqn:DM}
\begin{split}
  (A\cup B)\comp & =  A\comp\cap B\comp, \\
  (A\cap B)\comp & = A\comp\cup B\comp;
\end{split}
\end{equation}
\item
\textbf{Commutativity:} \dindexsub{commutativity}{Law of C---}
\dindexsub{law}{L--- of Commutativity}
 \begin{align}
     A\cap B & = B\cap A, &  A\cup B & = B\cup A;
 \end{align}
\item
\textbf{Associativity:} \dindexsub{associativity}{Law of A---}
\dindexsub{law}{L--- of Associativity}
\begin{align}
     (A\cap B)\cap C & = A\cap (B \cap C),   &  (A \cup B)\cup C & =
   A\cup (B\cup C),
 \end{align}
\item
Mutual \textbf{Distributivity} of $\cap$ and $\cup$:
\dindexsub{distributivity}{Law of D---} \dindexsub{law}{L--- of
  Distributivity} 
\begin{equation}
  \begin{split}
       A\cap(B\cup C)&= (A\cap B)\cup(A\cap C),\\A\cup (B\cap C)&=(A\cup
   B)\cap(A\cup C);
  \end{split}
\end{equation}
\item
\textbf{Redundancy:}
\begin{align}
&& \emptyset\comp&=\universe,& \universe\comp&=\emptyset;&&\\
  \label{eqn:cap-redund} 
  A\cap A & = A, & A\cap A\comp & = \emptyset, & A\cap \universe & =
  A, & A\cap \emptyset & = \emptyset,\\ \label{eqn:cup-redund}
  A\cup  A & = A, & A\cup A\comp & = \universe, & A\cup  \emptyset & =
  A, & A\cup  \universe & = \universe, 
\end{align}
\item
\textbf{New set:}
\begin{gather}
    A= (A\cap B)\cup (A\cap B\comp);
\end{gather}
\item
\textbf{Absorption:} \dindex{Absorption Laws}
\dindexsub{law}{Absorption L---s}
\begin{gather}
    \begin{split}
A\cap(A\cup B)& = A,\\
A\cup(A\cap B)& = A.
  \end{split}
\end{gather}
  \end{compactenum}
\end{lemma}

We can now prove other set-theoretic identities by a process of
simplification parallel to the one we use for logical equivalences:

\begin{theorem}\label{thm:minus}
  The equations
  \begin{align}\label{eqn:sm2}
    A\setminus(B\cap C)&=(A\setminus B)\cup(A\setminus C),\\
    \label{eqn:sm3} 
    A\setminus(B\cup C)&=(A\setminus B)\cap(A\setminus C)
  \end{align}
are identities of sets.
\end{theorem}

\begin{proof}
For \eqref{eqn:sm2}, we have the chain of identities
\begin{align*}
  A\setminus(B\cap C)&= A\cap(B\cap C)\comp&& \text{[def'n of $\setminus$]}\\
&=A\cap(B\comp\cup C\comp)&&\text{[De Morgan]}\\
&=(A\cap B\comp)\cup (A\cap C\comp)&&\text{[distributivity]}\\
&=(A\setminus B)\cup(A\setminus C)&&\text{[def'n of $\setminus$]}.
\end{align*}
Equation \eqref{eqn:sm3} is an exercise.
\end{proof}

An alternative method for proving set-theoretic identities uses the
original statement of the Axiom of Extension on \S~\ref{sect:sets}.
To prove~\eqref{eqn:sm1} for example, it is enough to prove
$A\setminus B\included A\cap B\comp$ and $A\cap B\comp\included
A\setminus B$.  To prove the former, suppose $c\in A\setminus B$.
Then $c\in A$, but $c\notin B$.  Hence also $c\in B\comp$.  Hence
$c\in A\cap B\comp$.  Therefore $A\setminus B\included A\cap B\comp$.
The other inclusion can be proved similarly.

\subsection*{Exercises}

\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item\label{exer:unequal-sets}
Prove the converse\index{converse} of Theorem~\ref{thm:equal-sets} in
the following
sense:  Show that, if ${\sv F}$ and ${\sv G}$ are not equivalent, then there is a
set $\universe$ with
subsets $A_k$ such that $\{x\in\universe\colon {\sv F}(x\in A_0\fcom\dots\fcom
x\in A_{n-1})\}\neq \{x\in\universe\colon {\sv G}(x\in A_0\fcom\dots\fcom
x\in A_{n-1})\}$.  (\emph{Suggestion:}  Let $\universe$ be a set of
truth-assignments, and let $A_k$ comprise those $\tuple e$ such that
$e_k=1$.) 
\item
Prove Theorem~\ref{thm:set-replacement}.
\item
Complete the proof of Theorem~\ref{thm:f-to-set}.
\item
Complete the proof of Lemma~\ref{lem:set-identities}.
\item
Complete the proof of Theorem~\ref{thm:minus}.
    \item
Prove that $(A\setminus B)\cup(B\setminus A)=A\symdiff B$. 
\item
Prove that
$(A\cap B)\cup(A\cup B)\comp=\{x:x\in A\liff x\in B\}$.
\item
Prove the following set-theoretic identities:
\begin{enumerate}
\renewcommand{\labelenumii}{(\theenumii)}
 \item
  $(A\setminus B)\comp=A\comp\cup B$
  \item
  $B\comp\setminus A\comp=A\setminus B$
  \item
  $A\setminus(B\setminus C)\comp=(A\cap B)\setminus C$
\end{enumerate}
\end{enumerate}

\section{Inclusions and implications}

We consider inclusion in place of equality.
Corresponding to Theorem~\ref{thm:ext-sets}, we have

\begin{theorem}\label{thm:set-inclusion}
For two subsets $A$ and $B$ of $\universe$, we have $A\included B$ if and only if
\begin{equation*}
  \{x\in \universe\colon x\in A\lto x\in B\}=\universe.
\end{equation*}
\end{theorem}

Corresponding to
Theorem~\ref{thm:equal-sets}, we have:

\begin{theorem}\label{thm:included-sets}
  Suppose ${\sv F}_0$ and ${\sv F}_1$ are $n$-ary propositional formulas such that
  \begin{equation*}
{\sv F}_0\models {\sv F}_1.
  \end{equation*}    
When $e\in\B$, let $\phi_e(x)$ be the
  $\in$-formula ${\sv F}_e(x\in A_0\fcom \dots\fcom x\in A_{n-1})$.
Then 
\begin{equation*}
  \{x\in\universe\colon \phi_0(x)\}\included\{x\in\universe\colon \phi_1(x)\}.
\end{equation*}
\end{theorem}

Some of the rules of inference in Lemma~\ref{lem:new-rules} now translate
into 
\textbf{tautological} inclusions%
\dindexsub{tautological}{--- inclusion}%
\dindexsub{inclusion}{tautological ---} 
(inclusions that are
true for all sets):

\begin{lemma}\label{lem:inclusions}
  The following inclusions are tautological:
\begin{gather}
  A\cap B\included B;\\
A\included A\cup B;\\
  (A \cup B)\cap A\comp\included B.
\end{gather}
\end{lemma}

\begin{proof}
The first two inclusions are translations (justified by
Theorem~\ref{thm:included-sets}) of the logical consequences ${\sv P}\land
{\sv Q}\models {\sv Q}$ and ${\sv P}\models {\sv P}\lor {\sv Q}$; the last inclusion is a
translation of the rule of Disjunctive Syllogism, in view of
\S~\ref{sect:entailment}, Exercise~\ref{exer:and-models}. 
\end{proof}

There is no common symbol for the Boolean operation corresponding to the connective $\lto$; so
Rules of inference like Hypothetical Syllogism and Constructive Dilemma, which involve $\lto$, do not translate into inclusions like those in the lemma.
However, Theorem~\ref{thm:set-inclusion} shows a connexion between $\lto$ itself and inclusion.  Moreover, logical entailment corresponds to implication in the following sense.

\begin{theorem}\label{thm:set-imp}
  Suppose ${\sv F}_0$, \dots, ${\sv F}_m$ are $n$-ary propositional formulas such that
  \begin{equation*}
{\sv F}_0,\dots,\sv F_{m-1}\models {\sv F}_m.
  \end{equation*}    
When $e\leq m$, let $\phi_e(x)$ be the
  $\in$-formula ${\sv F}_e(x\in A_0\fcom \dots\fcom x\in A_{n-1})$.
Then 
\begin{multline*}
\{x\in\universe\colon\phi_0(x)\}=\universe\amp\dots\amp
\{x\in\universe\colon\phi_{m-1}(x)\}=\universe\\
{}\implies\{x\in\universe\colon\phi_m(x)\}=\universe.
\end{multline*}
\end{theorem}

Now Hypothetical Syllogism and Constructive Dilemma can be 
expressed set-theoretically as implications:
\textbf{tautological implications.}
\dindexsub{tautological}{--- implication}%
\dindexsub{implication}{tautological ---}

\begin{lemma}\label{lem:implications}
The following implications are tautological:
\begin{align}\label{inclusion-is-transitive}
A\included B\amp B\included C&\implies A\included C;\\\label{id:inclusions}
  A\included B\amp C\included D&\implies A\cup C\included B\cup
  D\amp A\cap C\included B\cap D.
\end{align}
\end{lemma}
\begin{proof}
Here~\eqref{inclusion-is-transitive} is a direct translation of~\eqref{eqn:hyp-syl} by means of the theorems above.
Alternatively, suppose $A\included B$ and $B\included C$ and $d\in A$.  Then $d\in
  B$, so $d\in C$.  Thus $A\included C$.

For~\eqref{id:inclusions},
  suppose $A\included B$ and $C\included D$.  Say $d\in A\cup C$.
  Then $d\in A$ or $d\in C$.  If $d\in A$, then $d\in B$, so $d\in
  B\cup D$.  The same conclusion follows similarly if $d\in C$.
  Therefore $A\cup C\included B\cup D$.  The remaining inclusion is an
  exercise. 
\end{proof}

By~\eqref{inclusion-is-transitive}, we can reasonably abbreviate the
proposition $A\included B\amp B\included C$ by
\begin{equation*}
  A\included B\included C.
\end{equation*}
By~\eqref{eqn:cap-redund} and~\eqref{eqn:cup-redund}
above,~\eqref{id:inclusions} has the special cases: 
\begin{align}\label{eqn:const-dil}
A\included B\amp A\included C&\implies A\included B\cap C,\\
\label{eqn:const-dil2}
  A\included B\amp C\included B&\implies A\cup C\included B.
\end{align}

Their converses\index{converse} are a part of the following:
%Some additional tautological implications are thus:

\begin{lemma}\label{lem:implications2}
  The following are true for all sets.
  \begin{enumerate}
\item
$A\included B\cap C\implies A\included B$.
  \item
$A\cup B\included C\implies A\included C$.
\item
$A\cap B=\emptyset\amp A\included B\implies A=\emptyset$.
\item
$A\comp\included A\iff A\comp=\emptyset\iff A=\universe$.
\item
$A\setminus B=\emptyset\iff A\included B$.
\end{enumerate}
\end{lemma}

\begin{proof}
  Suppose $A\included B\cap C$.  Since $B\cap C\included B$ by
  Lemma~\ref{lem:inclusions}, we get $A\included B$ by
  Lemma~\ref{lem:implications}.  The remaining implications are
  exercises. 
\end{proof}

We are now equipped to prove some non-obvious claims:

\begin{example}\label{example:non-obvious}
  Suppose $A\comp\cup(B\symdiff C)\included A\cap B\comp\cap C$.  Then
  \begin{equation}\label{eqn:ex:non-obv}
    A\cap(B\cup C)=(A\cup B)\cap C.
  \end{equation}
Indeed, to see this, note first
\begin{align*}
  A\comp &\included A\comp\cup(B\symdiff C)&&\text{[by
        Lemma~\ref{lem:inclusions}]}\\
        &\included A\cap B\comp\cap C&&\text{[by assumption]}\\
&\included A. &&\text{[by
        Lemma~\ref{lem:inclusions}]}
\end{align*}
Then $A\comp\included A$ by
Lemma~\ref{lem:implications}, and therefore
\begin{equation*}
  A=\universe
\end{equation*}
by Lemma~\ref{lem:implications2}.  By the same lemmas,
and Lemma~\ref{lem:set-identities}, our assumption
now gives us
\begin{equation*}
  (B\setminus C)\cup(C\setminus B)=B\symdiff C\included B\comp\cap
  C=B\setminus C;
\end{equation*}
therefore
$C\setminus B\included B\setminus C$, that is,
\begin{equation*}
  C\cap B\comp\included B\cap C\comp.
\end{equation*}
Say $a\in C\cap B\comp$.  Then $a\in B\comp$.
But also, $a\in B\cap C\comp$, so $a\in B$.  Thus $a\in B\comp\cap
B=\emptyset$, which is absurd.  So $C\cap B\comp$ must be empty, which
means 
\begin{equation*}
  B\included C.
\end{equation*}
Finally then,
\begin{equation*}
A\cap(B\cup C)=B\cup C=C=(A\cup B)\cap C
\end{equation*}
since $A=\universe=A\cup B$.
\end{example}

Where did this example come from?  And, where did the proof come from?
First, note that variations of the proof are possible:  For example,
part of the proof is showing
\begin{equation*}
  C\cap B\comp\included B\cap C\comp\implies C\cap B\comp=\emptyset.
\end{equation*}
But if $C\cap B\comp\included B\cap C\comp$, then 
\begin{equation*}
  C\cap
B\comp\included (B\cap C\comp)\cap(C\cap B\comp)=B\cap(C\comp\cap
C)\cap B\comp=\emptyset.
\end{equation*}
Thus there is no need to look at individual elements of $C\cap
B\comp$, as in the proof above.

Whatever minor adjustments we make, the proof in
Example~\ref{example:non-obvious} does not seem to follow a general
pattern.  Each step is justified, and the conclusion is as desired; so
the proof is correct.  But this observation does not tell us how to
\emph{find} the proof.

There \emph{is} an alternative proof that follows a general pattern;
this proof also suggests how the proposition being proved was
discovered.  The key is the set-theoretic analogue of the disjunctive
normal forms%
\index{disjunctive!--- normal form}%
\index{normal!disjunctive --- form} 
of \S~\ref{normal}:

\begin{example*}[\ref{example:non-obvious} continued]
  We can analyze the given Boolean combinations of $A$, $B$, and $C$
  as follows.  First note that
  \begin{align*}
A\comp
&=(A\comp\cap B\comp)\cup(A\comp\cap B)\\
&=(A\comp\cap B\comp\cap C\comp)\cup (A\comp \cap B\comp\cap C)\cup 
(A\comp\cap B\cap C\comp)\cup (A\comp\cap B\cap C),
  \end{align*}
while
\begin{align*}
B\symdiff C
&=(B\cap C\comp)\cup(B\comp\cap C)\\
&=(A\comp\cap B\cap C\comp)\cup(A \cap B\cap C\comp)\cup(A\comp\cap
B\comp\cap C)\cup(A\cap B\comp\cap C).
  \end{align*}
Therefore
  \begin{multline*}
    A\comp\cup(B\symdiff C)
=(A\comp\cap B\comp\cap C\comp)\cup(A\comp \cap B\comp\cap C)\cup
(A\comp\cap B\cap C\comp)\cup{}\\
{}\cup(A\comp\cap B\cap C)\cup
(A \cap B\cap C\comp)\cup(A\cap B\comp\cap C).
  \end{multline*}
The six constituents of this union are disjoint, and the whole set
$A\comp\cup(B\symdiff C)$ is
assumed to be a subset of its last constituent, $A\cap B\comp\cap C$;
therefore the first five constituents are empty.  We aim to prove
Equation~\eqref{eqn:ex:non-obv}. 
Analyzing the two members of this equation,  we have
\begin{align*}
  A\cap(B\cup C)
&=(A\cap B)\cup(A\cap C)\\
&=(A\cap B\cap C\comp)\cup(A\cap B\cap C)\cup(A\cap B\comp\cap C),\\
(A\cup B)\cap C
&=(A\cap C)\cup(B\cap C)\\
&=(A\cap B\comp\cap C)\cup(A\cap B\cap C)\cup(A\comp\cap B\cap C).
\end{align*}
Under the assumption, two constituents in each case are empty, and
each member of Equation~\eqref{eqn:ex:non-obv} is
$A\cap B\cap C$.
\end{example*}

Thus the alternative proof takes more writing, although it follows a
general procedure that involves writing every set in question as a
union of intersections of the sets $A$, $B$, and $C$ and their
complements. 

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Prove Theorem~\ref{thm:set-inclusion}.
\item
Prove Theorem \ref{thm:included-sets}.
\item
Prove Theorem~\ref{thm:set-imp}.
\item
Complete the proof of Lemma~\ref{lem:implications}.
\item
Complete the proof of Lemma~\ref{lem:implications2}.
\item
Prove the following tautological inclusions:
\begin{enumerate}
  \item
  $A\cap(A\setminus B)\comp\included B$
  \item
  $A\setminus C\included(A\setminus B)\cup(B\setminus C)$
  \item
  $(A\setminus B)\comp\cap(B\setminus C)\comp\included(A\setminus C)\comp$
  \item
  $A\setminus C\included(A\setminus(B\setminus C)\comp)\cup(A\setminus B)$
  \item
  $A\included A\setminus(B\cap B\comp)$
  \item
  $(A\comp\setminus A)\comp\included A$
  \item
  $A\comp\included A\comp\setminus A$
  \item
  $(A\cup B)\setminus C\included(A\setminus C)\cup(B\setminus C)$
  \item
  $B\comp\included(A\setminus B)\cup(A\comp\setminus B)$
  \item
  $A\setminus B\included B\comp$
  \item
  $B\setminus A\included B$
\end{enumerate}
\item
Prove the following implications: 
\begin{enumerate}
  \item
  $\universe\included B\implies\universe=B$
  \item
  $A\included B\amp A\included(B\setminus C)\comp\implies A\included C$
  \item
  $A\comp\included B\cap B\comp\implies A=\universe$
  \item
  $A\included B\amp A\included B\comp\implies A=\emptyset$
  \item
  $A\comp=\universe\implies A\included B$
\item
$A\included B\implies A\cap C\included B\cap C$
\end{enumerate}
\item
Prove the following equivalences: 
\begin{enumerate}
  \item
  $A\included B\iff A\comp\cup B=\universe$
  \item
  $A\not\included B\iff A\cap B\comp\neq\emptyset$
  \item
  $A\included B\iff B\comp\included A\comp$
  \item
  $A\included(B\setminus C)\comp\iff A\cap B\included C$
\end{enumerate}
\item
Simplify
the following
 to the form
$A\comp\cup(B\symdiff C)$:
 \begin{multline*}
(A\comp\cap B\comp\cap C\comp)
\cup(A\comp \cap B\comp\cap C)
\cup(A\comp\cap B\cap C\comp)\cup{}\\
{}\cup(A\comp\cap B\cap C)
\cup(A \cap B\cap C\comp)
\cup(A\cap B\comp\cap C).
 \end{multline*}
\item
Compose an example like \ref{example:non-obvious}.
\end{enumerate}


\section{Cartesian products, and relations}\label{sect:cartesian}

Suppose\label{sect:relations}
$\phi(x)$ is an $\in$-formula as in \S~\ref{sect:Boole}.  Again, this formula
\textbf{defines,}\dindex{defines} in $\universe$, the set
$\{x\in \universe\colon \phi(x)\}$.  This set
can be called the \textbf{interpretation}\dindex{interpretation} of $\phi(x)$ in $\universe$.
The
interpretation of $\phi(x)$ may change if $\universe$ changes.
For example,
the interpretation of $x\notin A$ in $\universe$ is
$\universe\setminus A$, which depends on $\universe$.  However,
as long as $\universe$ includes $A$, the 
interpretation of $x\in A$ in $\universe$ does not change: it is just
$A$. 

We now allow variables besides $x$, and we ask, for example, whether
the \textbf{binary}
$\in$-formula
\begin{equation*}
  x\in A\land y\in B
\end{equation*}
defines a set.  It \emph{does} define a set, which is
denoted by  
\begin{equation*}
  A\times B\glossary{$A\times B$}
\end{equation*}
and called the \textbf{Cartesian product}\dindex{Cartesian
  product}\dindexsub{product}{Cartesian ---} of $A$ and $B$.  This set
  $A\times B$ can be depicted as in Figure~\ref{fig:AxB}.  If $a\in A$
and $b\in B$, then there will be an element of $A\times B$, denoted by
\begin{equation*}
  (a,b)\glossary{$(a,b)$}
\end{equation*}
and called an \textbf{ordered pair}\dindexsub{order}{---ed
  pair}\dindexsub{pair}{ordered ---}.  Such objects will have the
property that 
\begin{equation}\label{eqn:pairs}
  (a,b)=(a',b')\iff a=a'\amp b=b';
\end{equation}
consequently,
\begin{equation*}
  (a,b)\in A\times B\iff a\in A\amp b\in B.
\end{equation*}
But what \emph{is} an ordered pair?

\begin{figure}[t!]
    \setlength{\unitlength}{0.3pt}
\begin{center}
  \begin{picture}(440,440)(-40,-40)
\put(0,0){\line(1,0){400}}
\put(0,0){\line(0,1){400}}
\put(0,400){\line(1,0){400}}
\put(400,0){\line(0,1){400}}
\put(205,-40){$A$}
\put(-40,210){$B$}
\put(160,210){$A\times B$}
\put(100,170){\line(1,0){250}}
\put(100,290){\line(1,0){250}}
\put(100,170){\line(0,1){120}}
\put(350,170){\line(0,1){120}}
%\thicklines
\linethickness{2pt}
\put(100,0){\line(1,0){250}}
\put(0,170){\line(0,1){120}}
\end{picture}
\end{center}
\caption{Cartesian product}
\label{fig:AxB}
\end{figure}

So far (in this chapter), all of our sets have been Boolean
combinations of given sets.  
But recall that the Adjunction Axiom~(\ref{ax:adjunction}) and its consequence, the Pairing Theorem~(\ref{thm:pairing}), give
alternative ways of producing new
sets.
If
$a\neq b$, then the $\{a,b\}$ is an
\textbf{(unordered) pair.}\dindex{unordered
  pair}\dindexsub{pair}{unordered ---}
  
\begin{lemma}\label{lem:pairs}
  $\{\{a\},\{a,b\}\}=\{\{c\},\{c,d\}\}\iff a=c\amp b=d$.
\end{lemma}

Now we can define ordered pairs so as to have the desired
Property~\eqref{eqn:pairs}: by definition,
\begin{equation*}
  (a,b)=\{\{a\},\{a,b\}\}.
\end{equation*}
Note well that we make this definition solely so that ordered pairs
will have Property~\eqref{eqn:pairs}.  It is true but
unimportant\footnote{Some discussion of this point is in
  \cite[\S~6]{MR0453532}.} 
that $\{a\}\in(a,b)$---except that, in the usual treatment of
set-theory, one still needs the precise 
definition of $(a,b)$ to justify \emph{axiomatically} the existence of
the set $A\times B$.  I shall discuss this point later.  Meanwhile,
we can write
\begin{equation*}
A\times B=
  \{(x,y)\in\universe\times\universe\colon x\in A\land y\in B\}.
\end{equation*}

Suppose now $F$ is a $2n$-ary propositional formula.  Then we have the
{binary} $\in$-formula
\begin{equation}\label{eqn:binary-F}
  F(x\in A_0\fcom \dots \fcom x\in A_{n-1}\fcom y\in A_0\fcom \dots
  \fcom y\in A_{n-1}).
\end{equation}
Call this $\phi(x,y)$.  Its interpretation in $\universe$ is a subset
of $\universe\times\universe$, namely
\begin{equation}\label{eqn:binary-set}
  \{(x,y)\in\universe\times\universe\colon \phi(x,y)\},
\end{equation}
which consists precisely of those $(c,d)$ in
$\universe\times\universe$ such that
\begin{equation*}
  \named F(\tuple e,\tuple f)=1,
\end{equation*}
where $\tuple e$ and $\tuple f$ are the $n$-ary truth assignments such
that
\begin{align*}
  e_k=1&\iff c\in A_k,\\
f_k=1&\iff d\in A_k
\end{align*}
for each $k$ in $\{0,\dots,n-1\}$.
As special cases, we have
\begin{gather*}
    \{(x,y)\in\universe\times\universe\colon x\in A\}=A\times\universe;\\
    \{(x,y)\in\universe\times\universe\colon y\in B\}=\universe\times B.
\end{gather*}
These sets are \emph{also} the interpretations in
$\universe\times\universe$ of 
$(x,y)\in A\times\universe$ and $(x,y)\in \universe\times B$
respectively.  Hence, for example, the formulas $x\in A$ and
$(x,y)\in A\times\universe$ are interchangeable or, as we may say,
\textbf{equivalent}\dindex{equivalent} as binary formulas.
  In~\eqref{eqn:binary-set}, we can now replace
$\phi(x,y)$ with the formula
\begin{multline}\label{eqn:binary-F2}
  F((x,y)\in A_0\times\universe\fcom\dots\fcom (x,y)\in
  A_{n-1}\times\universe,\\ (x,y)\in\universe\times
  A_0\fcom\dots\fcom (x,y)\in\universe\times A_{n-1}),
\end{multline}
without changing the set.

Since we have a new operation on sets, we may wonder how it interacts
with the ones that we already have.  Let use first establish the
notational convention that $\times$ has priority over $\cap$, $\cup$,
$\symdiff$, and $\setminus$, but not over ${}\comp$, so that, for
example, 
\begin{gather*}
  A\times B\cap C=(A\times B)\cap C;\\
A\times B\comp=A\times(B\comp).
\end{gather*}
Then we have:

\begin{theorem}\label{thm:set-x}
  The following are set-theoretic identities:
  \begin{align*}
    A\times(B\cap C)&=A\times B\cap A\times C, &
    (A\cap B)\times C&=A\times C\cap B\times C,\\
    A\times(B\cup C)&=A\times B\cup A\times C,&
    (A\cup B)\times C&=A\times C\cup B\times C,\\
\universe\times A\comp&=(\universe\times A)\comp, &
A\comp\times\universe&=(A\times\universe)\comp.
    \end{align*}
\end{theorem}

\begin{proof}
  We prove the first identity in two ways; the rest are exercises.

Suppose $(a,b)\in A\times(B\cap C)$.  Then $a\in A$, and $b\in B\cap
C$.  Hence also $b\in B$ and $b\in C$.  Therefore $(a,b)\in A\times B$
and $(a,b)\in A\times C$.  Consequently $(a,b)\in (A\times B)\cap
(A\times C)$.  Thus $A\times(B\cap C)\included(A\times B)\cap(A\times
C)$.  The reverse inclusion is an exercise.

Alternatively, by (a slight variant of) Theorem~\ref{thm:equal-sets}, we have
\begin{align*}
  A\times(B\cap C)
&=\{(x,y)\in\universe\times\universe\colon  x\in A\land y\in B\cap C\}\\
&=\{(x,y)\in\universe\times\universe\colon x\in A\land y\in B\land y\in C\}\\
&=\{(x,y)\in\universe\times\universe\colon (x\in A\land y\in B)\land(x\in
  A\land y\in C)\}\\ 
&=\{(x,y)\in\universe\times\universe\colon (x,y)\in A\times B\land(x,y)\in
  A\times C\},
\end{align*}
which is
$(A\times B)\cap(A\times C)$ by definition of intersection.  To save
writing, we might just note that $A\times(B\cap C)$ is the
interpretation of the following equivalent formulas:
\begin{gather*}
  x\in A\land y\in B\cap C,\qquad
x\in A\land y\in B\land y\in C,\\
(x\in A\land y\in B)\land(x\in A\land y\in C),\qquad
(x,y)\in A\times B\land(x,y)\in A\times C
\end{gather*}
---while the last formula defines $(A\times
   B)\cap(A\times C)$. 
\end{proof}

The identity for $A\times B\comp$ is not so neat: see
Exercise~\ref{exer:times-comp}.  Part of the last theorem can be
generalized: 

\begin{theorem}
  The equation
  \begin{equation*}
    A\times B\cap C\times D=(A\cap C)\times(B\cap D)
  \end{equation*}
is an identity.
\end{theorem}

\begin{proof}
  $A\times B\cap C\times D$ is the interpretation of
  \begin{equation*}
    x\in A\land y\in B\land x\in C\land y\in D, 
  \end{equation*}
which is equivalent to 
\begin{equation*}
  x\in A\land x\in C\land y\in B\land y\in D, 
\end{equation*}
which is the interpretation of $(A\cap C)\times(B\cap D)$.
\end{proof}

For $A\times B\cup C\times D$ and $(A\times B)\comp$, see
Exercise~\ref{exer:times-cup}. 

We have observed that~\eqref{eqn:binary-F}
and~\eqref{eqn:binary-F2} are equivalent.  This suggests a further
generalization:  If $(R_0,\dots,R_{n-1})$ is a list of $n$ subsets of
$\universe\times\universe$, and ${\sv G}$ is an $n$-ary propositional
formula, then we have a binary $\in$-formula
\begin{equation*}
  {\sv G}((x,y)\in R_0\fcom\dots\fcom (x,y)\in R_{n-1}).
\end{equation*}

A subset of $\universe\times\universe$ is a \textbf{binary
  relation}\dindexsub{binary}{--- relation}\dindexsub{relation}{binary
  ---} on $\universe$.  If $R\included\universe\times\universe$, and
  $(a,b)\in R$, then we may also write
\begin{equation*}
  a\mathrel R b.\glossary{$A\mathrel Rb$}
\end{equation*}
Then $R=\{(x,y)\in\universe\times\universe\colon x\mathrel Ry\}$.

\begin{example}
  The less-than relation on $\Z$ (named in \S~\ref{algebra}) is the
  set 
  \begin{equation*}
      \{(x,y)\in\Z\times\Z:x<y\},
  \end{equation*} 
which can be depicted as in Figure~\ref{fig:<}. 
\end{example}

\begin{figure}[t!]\label{fig:<}
  \begin{center}
%  \setlength{\unitlength}{0.3pt}
    \begin{picture}(100,100)(-50,-50)
      \put(-50,0){\vector(1,0){100}}
      \put(0,-50){\vector(0,1){100}}
      \multiput(-40,-40)(20,0){5}{\circle 4}
      \multiput(-20,-20)(20,0){4}{\circle 4}
      \multiput(0,0)(20,0){3}{\circle 4}
      \multiput(20,20)(20,0){2}{\circle 4}
      \put(40,40){\circle 4}
      \multiput(-40,40)(20,0)4{\circle* 4}
      \multiput(-40,20)(20,0)3{\circle* 4}
      \multiput(-40,0)(20,0)2{\circle* 4}
      \multiput(-40,-20)(20,0)1{\circle* 4}
    \end{picture}
  \end{center}
\caption{The less-than relation on $\Z$}
\end{figure}

There are two generalizations:
\begin{compactenum}
  \item
If $R\included A\times B$, then $R$ is a relation 
\textbf{from $A$ to $B$};%
\dindexsub{relation}{--- from A to B@--- from $A$ to $B$} 
then $A$ can be called the 
\textbf{domain}%
\dindex{domain} of $R$, and $B$
can be called the 
\textbf{co-domain}%
\dindex{co-domain}
of $R$.
\item
There are $n$-ary relations on $\universe$ for every $n$ in $\N$.
\end{compactenum}
The first of these will be taken up in the next section.
On the latter point, note that we can form an $n$-ary
$\in$-formula 
\begin{equation*}
  x_0\in A_0\land\dots\land x_{n-1}\in A_{n-1};
\end{equation*}
its interpretation in $\universe$ can be denoted by
\begin{equation*}
  A_0\times\dots\times A_{n-1}.
\end{equation*}
This is a subset of
$\underbrace{\universe\times\dots\times\universe}_n$, which we can
also denote by
\begin{equation*}
  \universe^n.
\end{equation*}\glossary{$\universe^n$}
The elements of $\universe^n$ are just the 
 \textbf{(ordered) $n$-tuples,}%
\dindexsub{order}{---ed ntuple@---ed $n$-tuple}%
\dindexsub{tuple}{ordered n---@ordered $n$-{}---}
written as one of
 \begin{align*}
   &(c_0,\dots,c_{n-1}),&
&\tuple c\glossary{$(c_0,\dots,c_{n-1})$}\glossary{$\tuple c$} 
 \end{align*}
where each $c_k$ is in $\universe$.  Such an $n$-tuple is just what we
have called a \textbf{list}\dindex{list} of $n$ elements of $\universe$.  In particular, an
$n$-ary truth-assignment is an element of $\B^n$.

Instead of $A\times A$, we can write $A^2$.  We can let $A^1$ be $A$
itself.  We can define $A^3$ to be $A^2\times A$; define $A^4$ to be
$A^3\times A$; and so on.
By our precise definition then,
\begin{equation*}
  (a_0,\dots,a_{n})=
((a_0,\dots,a_{n-1}),a_n)=
  \{\{(a_0,\dots,a_{n-1})\},\{(a_0,\dots,a_{n-1}),a_{n}\}\}, 
\end{equation*}
but this is not important; we could also use the definition
\begin{equation*}
  (a_0,\dots,a_{n-1})=\{\{a_0\},\{a_0,a_1\},\dots,\{a_0,a_1,\dots,a_{n-1}\}\} 
\end{equation*}
for example.  (See also \S~\ref{sect:equipollence}.)  In any case, we
should understand 
\begin{equation*}
  (a_0,\dots,a_{n-1})=
  \begin{cases}
    a_0,&\text{ if }n=1;\\
\emptyset,&\text{ if }n=0;
  \end{cases}
\end{equation*}
that is, $(a)$ is just $a$, and $(\;)$ is $\emptyset$.
Then $A^1=A$ as we said; also, $A^0=\{\emptyset\}$, which is $1$ in
the von-Neumann \index{von Neumann!--- natural number} definition of
the natural numbers in
\S~\ref{sect:sets}.  Finally, if $\tuple a$ is the $n$-tuple
$(a_0,\dots,a_{n-1})$, and $\tuple b$ is the $m$-tuple
$(b_0,\dots,b_{m-1})$, then we treat the ordered pair $(\tuple
a,\tuple b)$ as the ordered $(n+m)$-tuple
$(a_0,\dots,a_{n-1},b_0,\dots,b_{m-1})$.  Then we have
\begin{equation*}
  A^m\times A^n=A^{m+n}
\end{equation*}
for all $m$ and $n$ in $\vnn$.  (We do not have a meaning for $A^n$
if $n$ is a negative integer.)

An \textbf{$n$-ary relation}\dindexsub{relation}{nary ---@$n$-ary ---}
on $\universe$
is a subset of $\universe^n$.  In particular, a singulary relation on
$\universe$ is just a subset of $\universe$.  A
nullary relation on $\universe$ is a
subset of $\universe^0$; which is $\{\emptyset\}$; so a nullary
relation is either $\emptyset$ or $\{\emptyset\}$.  In the von-Neumann
definition, these sets are $0$ and $1$ respectively; so a nullary
relation is just a truth-value.

An \textbf{$n$-ary predicate}\dindex{predicate} is a name for an
$n$-ary relation.  An $n$-ary relation is then a possible
\textbf{interpretation} of an $n$-ary predicate.

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Prove Lemma \ref{lem:pairs}.
  \item
Complete the proof of Theorem~\ref{thm:set-x}.
\item\label{exer:times-comp}
Prove the identity $A\times B\comp=A\times\universe\setminus \universe\times
B$. 
\item
Prove the identities:
\begin{enumerate}
  \item
$(A\symdiff B)\times C=A\times C\symdiff B\times C$;
\item
$(A\setminus B)\times C=A\times C\setminus B\times C$.
\end{enumerate}
\item\label{exer:times-cup}
Prove the identities:
\begin{enumerate}
  \item$A\times B\cup C\times D=((A\cup C)\times(B\cup
D)\setminus 
A\comp\times D\comp)\setminus C\comp\times B\comp$;
\item
$(A\times B)\comp=A\comp\times\universe\cup\universe\times B\comp$.
\end{enumerate}
\end{enumerate}

\section{Functions}\label{sect:functions}

A relation $R$ from a set $A$ to
a set $B$ is a \textbf{function}\dindex{function} from $A$ to $B$ if
it has two properties:
\begin{compactenum}
  \item
For every $a$ in $A$ there is some $b$ in $B$ such that $(a,b)\in R$.
\item
If $R$ contains both $(a,b)$ and $(a,c)$, then $b=c$.
\end{compactenum}
One might abbreviate these properties as follows:
\begin{compactenum}
  \item
$\Frall xA\Exsts yB x\mathrel R y$.
\item
$\Frall xA\Frall yB\Frall zB(x\mathrel R y\amp x\mathrel R z\implies y=z)$.
\end{compactenum}
Alternatively, $R$ is a function if it has the property:
\begin{compactitem}
  \item
For every $a$ in $A$, there is a
\emph{unique} $b$ in $B$ such that $a\mathrel Rb$.  
\end{compactitem}
\emph{Unique}
existence---existence of exactly one---is sometimes abbreviated by the
quantifier
\begin{equation*}
  \existsunique.
\end{equation*}\glossary{$\exists!\qsep x$}
Then the last property can be abbreviated:
\begin{compactitem}
  \item
$\Frall xA(\existsunique\qsep y\in B)\qsep a\mathrel Rb$.
\end{compactitem}

Often a function is denoted by a letter like $f$; then, instead of
writing $(a,b)\in f$, or $a\mathrel f b$, one writes
\begin{equation*}
  f(a)=b.
\end{equation*}\glossary{$f(a)=b$}
Suppose $f$ is a function from $A$ to $B$.  This can be indicated by one of
\begin{align*}
  f&\colon A\To B,&A&\overset{f}{\To}B.
\end{align*}\glossary{$f:A\to B$}\glossary{$A\overset{f}{\to}B$}
In accordance with the definitions in the previous
section, $A$ is then the 
\textbf{domain}%
\dindex{domain} 
of $f$, and $B$ is the
\textbf{co-domain}%
\dindex{co-domain}
of $f$.
Also, $f$ is a function \textbf{on}\dindex{on} $A$, and $f$ is 
a function \textbf{from}\dindex{from} $A$ \textbf{to}\dindex{to} $B$.
Functions are sometimes 
called \textbf{maps;}\dindex{map} in the present case, $f$ can be said
to \textbf{map} $A$ into $B$.

Considered as a string of symbols, $f(x)$ is a
\textbf{term.}\dindex{term}  Then the function $f$ might be given by the notation
\begin{equation*}
  x\longmapsto f(x),\glossary{$x\mapsto f(x)$}
\end{equation*}
and we might say that $f$ \textbf{takes}\dindex{takes} or \textbf{sends}\dindex{sends} $x$ to
$f(x)$.  As we shall see presently, the term $f(x)$ might be replaced
with another term that does not contain a specific name for $f$
itself.

Note that, considered as a set, a function uniquely determines its domain, but not its co-domain.  If $f\colon A\to B$, then
\begin{align*}
A&=\{x\colon\Exists yf(x)=y\},&
\{y\colon\Exists xf(x)&=y\}\included B.
\end{align*}

An \textbf{$n$-ary operation}\dindexsub{nary@$n$-ary}{---
  operation}\dindexsub{operation}{nary@$n$-ary} on a set $A$ is a
function from $A^n$ to $A$.  Then there is at least one singulary
  operation on $A$, namely the \textbf{identity}\dindex{identity} on $A$: this is the
  function
  \begin{equation*}
    x\longmapsto x
  \end{equation*}
on $A$, which can be denoted by
\begin{equation*}
  \id_A.
\end{equation*}\glossary{$\mathrm{id}_A$}
More generally, if $k<n$, then there is an $n$-ary operation
\begin{equation*}
  (x_0,\dots,x_{n-1})\longmapsto x_k
\end{equation*}
on $A$.  (This operation is $\id_A$
if $n=1$ and $k=0$.)  But there are all sorts of
operations besides these:

\begin{examples}
\mbox{}
\begin{asparaenum}
\item
  In \S\ref{sect:sets}, the successor of a number $n$ in $\N$ is denoted by
  $\scr n$ or $n+1$.  This means there is a function
  $x\mapsto \scr x$ from $\N$ to itself; this is a singulary operation
  on $\N$.  
\item
The operations $+$ and $\cdot$ named in \S~\ref{algebra} are binary
operations on $\Z$ and can be denoted by $(x,y)\mapsto x+y$ and
$(x,y)\mapsto xy$ respectively.
\item
Hence any arithmetic term $t$ in an $n$-tuple $(x_0,\dots,x_n)$ of
variables 
determines the $n$-ary operation $\tuple x\mapsto t$ on $\Z$.
\item
The fundamental theorem of calculus%
\index{calculus!infinitesimal ---}%
\index{infinite!---simal calculus}
is that if $f$ is
a \textsl{continuous}%
\tindex{continuous function}
function on $\R$, and $a\in\R$, then the function
$x\mapsto \int_a^xf$ is a \textsl{primitive}%
\tindex{primitive} for $f$ (that is, a
function whose derivative is $f$).
\end{asparaenum}
\end{examples}

Several refinements of the notion of a function are useful.  Suppose
again that $f\colon A\to B$.  Then $f$ is:
\begin{compactenum}[1)]
  \item
\textbf{surjective}%
\dindex{surjective function} 
or 
\textbf{onto,}%
\dindexsub{on}{---to} if every element of
$B$ is $f(a)$
for \emph{at least} one $a$ in $A$;
\item
\textbf{injective}%
\dindex{injective function, injection} 
or 
\textbf{one-to-one,}%
\dindexsub{one-to-one}{--- function}
if every element of $B$ is 
$f(a)$ for \emph{at most} one $a$ in $A$;
\item
\textbf{bijective,}%
\dindex{bijective function, bijection} 
if it is one-to-one and onto
(injective and surjective).
\end{compactenum}
A surjective function is a 
\textbf{surjection;}%
\dindex{surjection} 
an injective function is an \textbf{injection;} 
a bijective function is a \textbf{bijection.}
An injection is also called an 
\textbf{embedding;}\dindex{embedding} 
a bijection is also
called a 
\textbf{one-to-one correspondence.}%
\dindexsub{one-to-one}{--- correspondence}%
\dindexsub{correspondence}{one-to-one ---}
More symbolically, $f$ is:
\begin{compactenum}[1)]
\item
  surjective, if $\Frall yB\Exsts xAf(x)=y$;
\item
injective, if $\Frall xA\Frall yA(f(x)=f(y)\implies x=y)$.
\end{compactenum}

\begin{examples}
\mbox{}
  \begin{compactenum}
    \item
$\id_A$ is a bijection.
\item
The squaring function $x\mapsto x^2$ is injective on $\N$, but not on
$\Z$; as
a function from $\C$ to $\C$, it is surjective, but not as a function
from $\R$ to $\R$.
\item
The tangent-function $x\mapsto \tan x$ from $\R$ to $\R$ is
surjective, but not injective.
\item
The cubing function $x\mapsto x^3$ from $\R$ to $\R$ is
bijective.
  \end{compactenum}
\end{examples}

Again suppose $f\colon A\to B$.  The \textbf{range} of $f$ is the set
\begin{equation*}
  \{y\in B\colon\Exsts xAf(x)=y\};
\end{equation*}
this is a subset of the co-domain of $f$, and can be denoted by
\begin{equation*}
  \{f(x)\colon x\in A\},
\end{equation*}\glossary{$\{f(x)\colon x\in A\}$}
or more simply by $f(A)$.\glossary{$f(A)$}
However, since the latter notation suggests---usually wrongly---that $A$ is actually
an \emph{element} of the domain of $f$, I prefer to use the notation
\begin{equation*}
  f\setimb A.\glossary{$f[A]$}
\end{equation*}
A function is surjective if and only if its range is equal to its
co-domain. 

\begin{examples}
\mbox{}
\begin{compactenum}
  \item
  The co-domain of $x\mapsto \sin x$ is usually considered to be $\R$,
  although the range of the function is the interval $[-1,1]$.
\item
The function $x\mapsto 1+x^2$, as a function on $\R$, has range
$[1,\infty)$.
\end{compactenum}
\end{examples}

Suppose also $g\colon B\to C$.  The \textbf{composition} of $f$ and $g$ is
\begin{equation*}
  \{(x,z)\in A\times C\colon g(f(x))=z\};
\end{equation*}
This can be denoted by
\begin{equation*}
  g\circ f,
\end{equation*}\glossary{$g\circ f$}
which can be read as \Eng{$g$ composed with $f$}.  Showing
that $g\circ f$ is a function is Exercise~\ref{exer:comp} below; it is
Exercise~\ref{exer:comp-inj-surj} to show that the composition of
injective functions is injective, and the composition of surjective
functions is surjective.

Many of the foregoing ideas are connected by the following:

\begin{theorem}\label{thm:bij}
Suppose $A\neq\emptyset$ and $f\colon A \to B$.
  \begin{compactenum}
    \item
The function $f$ is injective if and only if
$g\circ f=\id_A$ for some function~$g$ from $B$ to $A$.
\item
The function  $f$ is surjective if and only if
  $f\circ g=\id_B$ for some function~$g$ from $B$ to $A$.
\item
The function $f$ is bijective if and only if
$g\circ f=\id_A$ and $f\circ g=\id_B$ for some function $g$ from
  $B$ to $A$.
\end{compactenum}
\end{theorem}

\begin{proof}
\begin{asparaenum}
\item
  Suppose $f$ is injective.  Then for every $b$ in $f\setimb A$,
     there is exactly one $a$ in $A$ such that $f(a)=b$.  This means
     that the set $\{(f(x),x)\colon x\in A\}$ (which is the range of the function $x\mapsto(f(x),x)$ from $A$ to $B\times A$) is a function from $f\setimb A$
     to $A$.  Since $A\neq\emptyset$, there is some $c$ in $A$; then
     $y\mapsto c$ is a function from $B\setminus f\setimb A$ to $A$.
     The union of these two functions, as sets, is a function $g$
     from $B$ to $A$, and $g(f(a))=a$ for all $a$ in $A$, so $g\circ
     f=\id_A$. 

Suppose conversely that $g\circ f=\id_A$.  If $f(a)=f(a')$, then
$g(f(a))=g(f(a'))$, that is, $\id_A(a)=\id_A(a')$, which means $a=a'$.
Thus $f$ is injective.
\item
Suppose $f$ is surjective.  Then for every $b$ in $B$, there is
     \emph{at least} one $a$ in $A$ such that $f(a)=b$.  Now we have
     to do something sneaky:  We pick \emph{one} such $a$, and define
     $g(b)=a$.  We do this for all $b$ in $B$, and this gives us $g$
     as desired.  (That such picking can be done once for all is
     perhaps not obvious, but it is a consequence of the set-theoretic
     \textsl{Axiom of Choice}.)\tindexsub{axiom}{A--- of
     Choice}\tindexsub{choice}{Axiom of C---}

The converse,\index{converse} and the remaining part, are left as an exercise.\qedhere
\end{asparaenum}
\end{proof}

\begin{theorem}
  Suppose $f\colon A\to B$ and is bijective.  Then there is \emph{exactly
  one} function $g$ from $B$ to $A$ such that $g\circ f=\id_A$ and
  $f\circ g=\id_B$.
\end{theorem}

\begin{proof}
  By the last theorem, there is at least one such function.  Suppose
  $g_0$ and $g_1$ are such functions, and $b\in B$.  Then $b=f(a)$ for
  some $a$ in $A$, since $f$ is surjective.  Hence
  \begin{equation*}
    g_0(b)=g_0(f(a))=g_0\circ f(a)=\id_A(a)=g_1\circ
    f(a)=g_1(f(a))=g_1(b). 
  \end{equation*}
Thus $g_0=g_1$.
\end{proof}

The unique function $g$ in the theorem is the \textbf{inverse}\dindex{inverse} of $f$
and can be denoted by
\begin{equation*}
  f\inv.
\end{equation*}\glossary{$f^{-1}$}
A bijection can also be called an \textbf{invertible}\dindex{invertible} function.

In general, if $f\colon A\to B$ and $C\included A$, then $f\cap(C\times B)$
is a function from $C$ to
$B$; this can be denoted by
\begin{equation*}
  f\restriction C;
\end{equation*}\glossary{$f\restriction C$}
it is the \textbf{restriction}\dindex{restriction} of $f$ to $C$, and its range is $f\setimb
C$.  This range is also called the \textbf{image}\dindex{image} of $C$ under $f$.


\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}

\item\label{exer:comp}
Show that the composition of two functions is a function.
\item\label{exer:comp-inj-surj}
Show that the composition if injective functions is injective; of
surjective, surjective.
\item
Complete the proof of Theorem~\ref{thm:bij}.
  \item
Suppose $f$ and $g$ are functions from $A$ to $B$.  For each of the
relations
\begin{align*}
&f\cup g,&&f\cap g,
\end{align*}
\begin{itemize}
  \item
prove whether it is always a function; and
\item
prove whether it is always \emph{not} a function.
\end{itemize}
\item
Let $f\colon A\to B$ and $g\colon B\to C$.
\begin{enumerate}
  \item
Supposing $g$ and $f$ are invertible, write $(g\circ f)\inv$ as a
composition of inverses (rather than an inverse of compositions).
\item
If $g\circ f$ is injective, does it follow that $f$ is
injective?---that $g$ is injective?
\item
Same question, with \Eng{surjective} for \Eng{injective}.
\item
Same question, with \Eng{bijective} for \Eng{surjective}.
\end{enumerate}
\end{enumerate}



\section{More functions}\label{sect:deeper}

\subsection*{Induced functions}

If $f\colon A\to B$ and $C\included A$, then we have defined $f\setimb C$ as
a subset of $B$.  This suggests that we have a function $X\mapsto
f\setimb X$; but what are its domain and co-domain?

I noted at the beginning of the chapter that the sets we would discuss
need only be classes.  This is no longer the case.  In particular, if
$\class C$ is a class, we define the \textsl{power class} of $\class
C$ to be the class of all sub\emph{sets} of $\class C$; there is not
necessarily a class of sub\emph{classes} of $\class C$.  The
power class of a set $A$ is denoted by 
\begin{equation*}
\pow A.\glossary{$\mathcal P(A)$}
\end{equation*}
If $f\colon A\to B$, then the function $X\mapsto f\setimb X$ has 
domain $\pow A$ and co-domain $\pow B$.

The power class of a set is called its
\textbf{power set}\dindexsub{power}{--- set}\dindexsub{set}{power ---}
because of the following.

\begin{axiom}[Power Set]%
\dindexsub{power}{P--- Set Axiom}%
\dindexsub{axiom}{Power Set A---}\label{ax:pow}
The power class of a set is a set.
\end{axiom}

We shall not actually use this axiom until \S~\ref{sect:equipollence}.

\begin{examples}\mbox{}
  \begin{compactenum}
    \item
$\pow{\emptyset}=\{\emptyset\}$, that is, $\pow 0=1$ in the definition
      of von Neumann;\index{von Neumann}\index{number!von-Neumann
      natural ---}
\item
$\pow{\{\emptyset\}}=\{\emptyset,\{\emptyset\}\}$, that is, $\pow 1=2$.
\item
$\emptyset\in\pow A$ and $A\in\pow A$ for all sets $A$.
  \end{compactenum}
\end{examples}

\begin{lemma}
  Suppose $f\colon A\to B$.  Then
  \begin{equation*}
    X\included Y\implies f\setimb X\included f\setimb Y
  \end{equation*}
for all subsets $X$ and $Y$ of $A$.
\end{lemma}

\begin{proof}
  Suppose $x\in f\setimb X$.  Then $x=f(u)$ for some $u$ in $X$.  But
  $X\included Y$, so $u\in Y$, and hence $f(u)\in f\setimb Y$, that is,
  $x\in f\setimb Y$.
\end{proof}

\begin{theorem}
  Suppose $f\colon A\to B$.  Then
  \begin{align}\label{eqn:cup=}
    f\setimb{X\cup Y}&=f\setimb X\cup f\setimb Y,\\ \label{eqn:cap-inc}
f\setimb{X\cap Y}&\included f\setimb X\cap f\setimb Y
  \end{align}
for all subsets $X$ and $Y$ of $A$.
\end{theorem}

\begin{proof}
  We have that $f\setimb X$ and $f\setimb Y$ are subsets of
  $f\setimb{X\cup Y}$ by the last lemma.  Hence
  \begin{equation*}
f\setimb X\cup f\setimb Y\included     f\setimb{X\cup Y}
  \end{equation*}
by \eqref{eqn:const-dil2}.  For the reverse inclusion, suppose $x\in
f\setimb{X\cup Y}$.  Then $x=f(u)$ for some $u$ in $X\cup Y$.  Either
$u\in X$ or $u\in Y$, hence, either $x\in f\setimb X$ or $x\in f\setimb
Y$.  In either case, $x\in f\setimb X\cup f\setimb Y$.  This proves
\eqref{eqn:cup=}. 

For \eqref{eqn:cap-inc}, note that if $f\setimb{X\cap Y}$ is a subset
of both $f\setimb X$ and $f\setimb Y$, by the last lemma; we are now done, by
\eqref{eqn:const-dil}. 
\end{proof}

  The inclusion \eqref{eqn:cap-inc} can be strict.  To see this, one need only consider a non-injective function on a set of size $2$:

\begin{example}
If $f$ is $\{(0,0),(1,0)\}$ and $X=\{0\}$ 
  and $Y=\{1\}$, then $X\cap Y=\emptyset$, but $f\setimb X\cap f\setimb
  Y=\{0\}$. 
\end{example}

\begin{theorem}\label{thm:f-inj}
  Suppose $f\colon A\to B$.  
  \begin{enumerate}
    \item
  The following are equivalent:
  \begin{compactenum}
    \item
$f$ is injective.
\item
$f\setimb{X\cap Y}= f\setimb X\cap f\setimb Y$ for all
  subsets $X$ and $Y$ of $A$.
  \end{compactenum}
\item
If $f$ is injective, then 
\begin{align*}
  f\setimb{X\comp}&\included (f\setimb X)\comp,\\
f\setimb{X\setminus Y}&\included f\setimb X\setminus f\setimb Y
\end{align*}
for all subsets $X$ and $Y$ of $A$.
\item
  The following are equivalent:
  \begin{compactenum}
    \item
$f$ is bijective.
\item
$f\setimb{X\comp}=(f\setimb X)\comp$ for all subsets $X$ of $A$.
  \end{compactenum}
  \end{enumerate}
\end{theorem}

If $f\colon A\to B$, and $C\included B$, then $A$ has the subset
\begin{equation*}
  \{x\in A\colon f(x)\in C\},
\end{equation*}
which can be denoted by
\begin{equation*}
  f\inv[C];\glossary{$f^{-1}[C]$}
\end{equation*}
this is the \textbf{pre-image}\index{pre-image} of $C$ under $f$.
Thus we have a function
\begin{equation*}
  Y\longmapsto f\inv[Y]
\end{equation*}
with domain $\pow B$ and co-domain $\pow A$.  Note well that this
function exists, whether $f$ is invertible or not.
The function $Y\mapsto f\inv[Y]$ behaves more nicely than
$X\mapsto f\setimb X$ with respect to the Boolean operations:

\begin{theorem}\label{thm:inv-homom}
  Suppose $f\colon A\to B$.  Then
  \begin{align}
    f\inv[X\cup Y]&=f\inv[X]\cup f\inv[Y],\\ \label{eqn:inv-cap}
    f\inv[X\cap Y]&=f\inv[X]\cap f\inv[Y],\\ \label{eqn:inv-comp}
f\inv[X\comp]&=(f\inv[X])\comp,\\
    f\inv[X\setminus Y]&=f\inv[X]\setminus f\inv[Y]
  \end{align}
for all subsets $X$ and $Y$ of $B$.
\end{theorem}

\begin{proof}
  Exercise.  Note that, by adequacy of the signature
  $\{\land,\lnot\}$, the other equations follow from
  \eqref{eqn:inv-cap} and \eqref{eqn:inv-comp}.
\end{proof}

\subsection*{Operations on relations}


It is possible to give a neat account of functions by
first defining the composition of \emph{relations}.  Suppose
$R\included A\times B$ and $S\included B\times C$.  Then the
\textbf{composition}\dindex{composition} of $R$ and $S$ is the set
\begin{equation*}
\{(x,z)\in A\times C:\Exsts yB (x\mathrel R y\amp y\mathrel
  Sz)\},
\end{equation*}
which can be denoted by
\begin{equation*}
    S\circ R.
\end{equation*}\glossary{$S\circ R$}
Note well the order in which $R$ and $S$ are written, which seems
unnatural, but agrees
with the notation for the composition of functions. At the expense of
introducing a new symbol, I propose to write\footnote{Tarski\index{Tarski} \cite[\S~28, p.~92]{Tarski-Intro} and Suppes\index{Suppes}
\cite[\S~3.1, Definition 7, p.~63]{MR0349389} are among those who use this notation.}
\begin{equation*}
  R/S
\end{equation*}\glossary{$R/S$}
for $S\circ R$.

\begin{figure}[t!]
\begin{center}
  \begin{picture}(100,120)(-20,-20)
    \put(0,0){\line(0,1){100}}
    \put(0,0){\line(1,0){80}}
    \put(80,0){\line(0,1){100}}
    \put(0,100){\line(1,0){80}}
    \thicklines
    \put(20,20){\line(2,1){40}}
    \put(20,80){\line(1,-1){40}}
    \put(20,80){\line(1,0){20}}
    \put(-20,45){$B$}
    \put(35,-20){$A$}
    \put(40,65){$R$}
  \end{picture}
\qquad
  \begin{picture}(120,120)(-20,-20)
    \put(0,0){\line(1,0){100}}
    \put(0,0){\line(0,1){80}}
    \put(100,0){\line(0,1){80}}
    \put(0,80){\line(1,0){100}}
    \thicklines
    \put(20,20){\line(1,2){20}}
    \put(80,20){\line(-1,1){40}}
    \put(80,20){\line(0,1){20}}
    \put(45,-20){$B$}
    \put(-20,35){$A$}
    \put(65,40){$\conv R$}
  \end{picture}
\end{center}
\caption{Converse of a relation}
\label{fig:converse}
\end{figure}

The relation $R$ from $A$ to $B$
has a 
\textbf{converse,}
\dindex{converse} namely, the relation 
\begin{equation*}
 \{(y,x)\in B\times A:x\mathrel Ry\}
\end{equation*}
 from $B$ to $A$;
it can be denoted by
\begin{equation*}
   \conv R.\glossary{$\breve R$}
\end{equation*}
(See Figure~\ref{fig:converse}.)
This is sometimes denoted by $R\inv$,\glossary{$R\inv$} but such
notation can be misleading.


Finally, the binary relation of \textbf{equality}\dindex{equality} on $A$ is just the
set 
\begin{equation*}
  \{(x,y)\in A\times A:x=y\}.  
\end{equation*}
We can also call this the
\textbf{diagonal} on
$A$, and give it the symbol 
\begin{equation*}
  \Delta_A.\glossary{$\Delta_A$}
\end{equation*}
(The delta stands for \Eng{diagonal;} see Figure~\ref{fig:diagonal}.)

\begin{figure}[t!]
\begin{center}
  \begin{picture}(120,120)(-20,-20)
    \put(0,0){\line(0,1){100}}
    \put(0,0){\line(1,0){100}}
    \put(100,0){\line(0,1){100}}
    \put(0,100){\line(1,0){100}}
    \thicklines
    \put(0,0){\line(1,1){100}}
    \put(-20,45){$A$}
    \put(45,-20){$A$}
    \put(35,60){$\Delta_A$}
  \end{picture}
\end{center}
\caption{Diagonal on a set}
\label{fig:diagonal}
\end{figure}


We can now make the following definitions:  $R$ is
\begin{compactenum}[1)]
  \item
\textbf{full,}%
\dindexsub{full}{--- relation}%
\dindexsub{relation}{full ---}
if $\Delta_A\included R/\conv R$;
\item
\textbf{functional,}%
\dindexsub{function}{---al relation}%
\dindexsub{relation}{functional ---} 
if $\conv R/R\included\Delta_B$.
\end{compactenum}

\begin{theorem}\label{thm:fff}
Let $R\included A\times B$.  Then
$R$ is a function from $A$ to $B$ if and only if $R$ is full and
    functional (as a relation from $A$ to $B$).
\end{theorem}

\begin{proof}
  Exercise.
\end{proof}

We have alternative characterizations for notions in
\S~\ref{sect:functions}:

\begin{theorem}\label{thm:fsib}
Suppose $f\colon A\to B$.
  \begin{compactenum}
    \item
$f$ is surjective if and only if $\Delta_B\included\conv f/f$.
\item
$f$ is injective if and only if $f/\conv f\included\Delta_A$.
\item
$f$ is bijective if and only if $\conv f/f=\Delta_B$ and $f/\conv
  f=\Delta_A$. 
  \end{compactenum}
\end{theorem}

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}

  \item
Prove Theorem~\ref{thm:f-inj}.
\item
Prove Theorem~\ref{thm:inv-homom}.
  \item
Prove Theorem~\ref{thm:fff}.
\item
Prove Theorem~\ref{thm:fsib}.
\end{enumerate}


\section{First-order logic}\label{sect:1st}

First-order logic provides a formal way to talk about particular
operations and relations.  It allows for a precise definition of the
\emph{context,} mentioned in \S~\ref{sect:language}, in which a
mathematical proposition is true or false.
First-order logic is a large subject; this section will be only a
cursory treatment.
However, we have already mentioned the ingredients of
first-order logic, in an informal way at least.  A 
\textsl{signature}%
\tindex{signature}
for a
\textsl{first-order logic}%
\tindexsub{first}{---{}-order logic}%
\tindexsub{logic}{first-order ---}
consists of
\textsl{constants,}%
\footnote{Constants are also called \textsl{constant-symbols.}%
\tindexsub{constant}{---{}-symbol}%
\tindexsub{symbol}{constant-{}---}}%
\tindex{constant}%
\tindexsub{symbol}{constant}
\textsl{function-symbols,}%
\tindexsub{function}{---{}-symbol}%
\tindexsub{symbol}{function-{}---}
and 
\textsl{predicates.}%
\footnote{Predicates are also called \textsl{relation-symbols.}%
\tindexsub{relation}{---{}-symbol}%
\tindexsub{symbol}{relation-{}---}}%
\tindex{predicate}%
\tindexsub{symbol}{predicate}
A 
\textsl{structure}%
\dindex{structure}
in a signature $\lang$ is a non-empty set $A$
along with a function that takes: 
\begin{compactenum}[1)]
  \item
each constant of $\lang$ to an element of $A$;
\item
each function-symbol of $\lang$ to an operation on $A$;
\item
each predicate of $\lang$ to a relation on $A$.
\end{compactenum}
Thus the elements of $\lang$ \textsl{symbolize} elements of $A$ and
operations and relations on $A$.
More elements and operations are symbolized by \textsl{terms}\tindex{term},
which are strings made of constants, function-symbols, and 
\textsl{variables}\tindex{variable}.  More {relations} are symbolized by
\textsl{formulas}\tindex{formula}.  The simplest formulas are the
\textsl{atomic}\footnote{From the Greek \Gk{>'atomos} \Eng{uncuttable,
    not compound,} from \Gk{t'omos} \Eng{a slice.}}
formulas,%
\tindex{atomic formula}%
\tindexsub{formula}{atomic ---} 
which
consist of terms
joined by the sign of equality%
\index{equality!sign of ---}%
\index{sign of equality} 
or by a predicate.  Atomic formulas can be preceded by
quantifiers\index{quantifier} (with variables) or combined by means of
Boolean connectives; formulas in
general are obtained in this way.  New constants symbolizing
particular elements of $A$
can be used as \textsl{parameters}\tindex{parameter} in terms and formulas.

\begin{example}
The set  $\Z$ of integers can be understood as a structure in the signature
  $\{0,1,-,+,\cdot,<\}$ (see \S~\ref{algebra}~\eqref{eqn:Z<}); a term
  in this signature (with parameters from $\Z$ as desired) is an
  \textsl{arithmetic term}\index{arithmetic!---
  term}\index{term!arithmetic ---} as defined in
  \S~\ref{algebra}.
  Diophantine equations%
\index{Diophantine equation}%
\index{equation!Diophantine ---} and arithmetic inequalities%
\index{arithmetic!--- inequality}%
\index{inequality!arithmetic ---} are the {atomic}
  formulas in this signature.
\end{example}

The terminology of first-order logic is a
means to give a precise but general account of some ideas that one
encounters in high-school mathematics.

\subsection*{Structures}

By formal definition,
a \textbf{structure}%
%\dindex{structure} 
is an ordered pair $(A,\interpretation)$---which
can also be
referred to as $\str A$---where:
\begin{compactenum}[1)]
  \item
$A$ is a non-empty set, which is called the \textbf{universe}\dindex{universe} of the
    structure;
\item
$\interpretation$ is a function, written also as
  \begin{equation*}
    s\longmapsto s^{\str A},
  \end{equation*}
whose domain $\lang$ is called the \textbf{signature}\dindex{signature} of the structure;
\item
$s^{\str A}$ is either an element of $A$ or an $n$-ary operation or
  relation on $A$ for some positive integer $n$, for each $s$ in $\lang$.
\end{compactenum}
Here $\str A$ may be called a structure \textbf{of} $\lang$, or an \textbf{$\lang$-structure.}%
\dindex{L-structure@$\lang$-structure}%
\dindexsub{structure}{L-structure@$\lang$-structure}
If $\lang=\{s_0,s_1,\dots\}$, then $\str A$ can be written as
\begin{equation*}
(A,s_0{}^{\str A}, s_{1}{}^{\str A},\dots),
\end{equation*}
or just as $(A,s_0, s_{1},\dots)$ unless ambiguity would result
(that is, unless another structure of interest has the
same universe and signature as $\str A$).  Moreover, if the intended
signature is clear, then $\str A$ may be written simply as $A$; that
is, the universe may stand for the structure.  The function
$\interpretation$ is almost never referred to, except in general
accounts like this one. 

\begin{examples}\label{examples:structures}
The following are structures:
\begin{compactenum}[1)]
    \item
%  $(\vnn,{}',0)$;
$(\N,{}\scr{},0)$, or more briefly $\N$ (see \S~\ref{sect:sets});
\item
the {power-set} structure\index{power!---{}-set structure}
\index{structure!power-set ---}on a non-empty set $\Omega$, namely
\begin{equation*}
  (\pow{\Omega},\emptyset,\Omega,\cap,\cup,{}\comp,\included); 
\end{equation*}
\item
the 
\textbf{truth-structure}%
\footnote{This is not a standard term.}%
\dindexsub{truth}{---{}-structure}%
\dindexsub{structure}{truth-{}---} 
\begin{equation*}
  (\B,0,1,\land,\lor,\lnot{},\models), 
\end{equation*}
where
  $\models$ is the binary relation $\{(0,0), (0,1), (1,1)\}$ on $\B$.
  \end{compactenum}
\end{examples}

The last two examples are the same if the elements of $\B$ are
  von-Neumann natural numbers%
\index{von Neumann!--- natural number}
  and $\Omega$ is the von-Neumann natural number $1$.  
{Propositional logic}%
\index{proposition!---al logic}%
\index{logic!propositional ---} 
studies the truth-structure.  The area of mathematics and logic called
  \textbf{model-theory}%
\dindexsub{model}{---{}-theory}%
\dindexsub{theory}{model-{}---}
  studies \emph{all} structures.  


When $\interpretation$ is as above in the structure
$(A,\interpretation)$, and $s$ is an element of $\lang$, then:
\begin{compactenum}[1)]
  \item
$s^{\str A}$ is called the 
\textbf{interpretation}%
\dindex{interpretation} in $\str A$ of $s$;
\item
$s$ is called a \textbf{symbol}\dindex{symbol} for $s^{\str A}$.
\end{compactenum}
So $s$ is one of the following, according to its interpretation:
\begin{compactenum}[1)]
  \item
a \textbf{constant;}\dindex{constant}
\item
an 
\textbf{$n$-ary function-symbol}%
\dindexsub{function}{---{}-symbol}%
\dindexsub{symbol}{function-{}---}%
\dindexsub{nary@$n$-ary}{--- function-symbol} 
for some positive $n$ in $\vnn$;
\item
an \textbf{$n$-ary predicate}%
\dindex{predicate}%
\dindexsub{nary@$n$-ary}{--- predicate}%
\dindexsub{symbol}{predicate}
(or
\textbf{relation-symbol}%
\dindexsub{relation}{---{}-symbol}%
\dindexsub{symbol}{relation-{}---}%
\dindexsub{nary@$n$-ary}{--- relation-symbol}) 
for some positive $n$ in $\vnn$.  
\end{compactenum}

Since nullary operations on $A$ can be considered as elements of $A$,
a constant can be considered as a nullary function-symbol.

Here are some observations about the definition of \Eng{structure}:
\begin{asparaenum}
  \item
I am following the old convention\footnote{Used for example by Chang and Keisler~\cite{MR0409165}.  Recent writers (as Marker~\cite{MR1924282} and
Rothmaler~\cite{MR1800596}) use `calligraphic' letters, not Fraktur:
\begin{center}
  \begin{tabular}{r | c | c | c | c | c | c | c}
For a structure with universe: & $A$ & $B$ & $C$ & \dots & $M$ & $N$ &
\dots \\ \hline
\rule{0mm}{4mm}I write: & $\str A$ & $\str B$ & $\str C$ &
\dots & $\str M$ & $\str N$ & \dots\\ \hline 
\rule{0mm}{4mm}others may write: & $\mathcal A$ & $\mathcal B$ &
$\mathcal C$ & \dots & $\mathcal M$ & $\mathcal N$ & \dots
  \end{tabular}
\end{center}
Another option, used by Hodges~\cite{MR94e:03002}, is to use an ordinary letter
like $A$ for a structure, and then $\operatorname{dom}(A)$ for its
universe.  (Here \Eng{dom} stands for \textsl{domain.}%
\tindex{domain})} of denoting the
universe of a structure by a Roman 
letter, and the structure itself by the corresponding Fraktur or
Gothic letter.
One might not bother to make a typographical
distinction between a structure and its universe.  Indeed, as
suggested in the examples, the
distinction is not easy to make with standard structures like $\B$ or $\Z$
(which are commonly denoted by letters in a so-called blackboard-bold
font). 
\item
Similarly, it is not always easy or convenient to distinguish in
writing between a symbol and its interpretation.
\item
In a structure $(A,\interpretation)$, the
\textbf{interpretation-function}\dindexsub{interpretation}{---{}-function}\dindexsub{function}{interpretation-{}---}
$\interpretation$ could be considered to carry, within itself, the
universe $A$.  In any case, $A$
and $\interpretation$ work together to provide interpretations of the
symbols in $\lang$ \emph{as} elements of, or operations or relations on,
a certain set, namely $A$ itself.  That's all a structure is:
something that provides a mathematical 
interpretation for certain symbols.  What makes model-theory
interesting is that the same symbols
can have different interpretions.  Here begins the distinction between
\textbf{syntax}%
\dindex{syntax}\label{syntax} (formal symbolism) and
\textbf{semantics}%
\dindexsub{semantic}{---s}
(mathematical 
meaning).\footnote{The distinction was alluded to in
  \S~\ref{sect:quantifiers}.  In propositional logic, formal
  entailment ($\proves$) can be understood as a syntactic notion,
  while logical entailment ($\models$) is semantic.}  
\end{asparaenum}

\subsection*{Terms and formulas}

The \textbf{terms}\dindex{term} of a first-order signature $\lang$ are
conveniently written in 
Polish notation\index{Polish!--- notation}\index{notation!Polish ---} 
(see \S~\ref{sect:unique}).
First, we introduce a list
\begin{equation*}
  x_0,x_1, x_2,\dots
\end{equation*}
of \textbf{variables}\dindex{variable} (that is,
\textbf{individual variables}\dindex{individual
  variable}\dindexsub{variable}{individual ---}: 
variables standing for \emph{individual} elements of a universe).
Then, by definition,
\begin{compactenum}[1)]
\item
  all variables are terms of $\lang$;
\item
all constants of $\lang$ are terms of $\lang$;
\item
if $f$ is an $n$-ary function-symbol in $\lang$, and
$(t_0,\dots,t_{n-1})$ is a list of $n$ terms of $\lang$, then
\begin{equation*}
  ft_0\dotsb t_{n-1}
\end{equation*}
is a term of $\lang$; if $f$ is binary, then $ft_0t_1$ may also be
written as
\begin{equation*}
  (t_0\mathbin{f}t_1).
\end{equation*}
Finally, singulary function-symbols are sometimes written as
superscripts on their arguments, as in $\scr n$ in
\S~\ref{sect:sets}~\eqref{eqn:scrn} and $A\comp$ in
\S~\ref{sect:quantifiers}~\eqref{eqn:Ac}. 
\end{compactenum}
The 
\textbf{atomic formulas}%
\dindex{atomic formula}%
\dindexsub{formula}{atomic ---} 
are defined similarly:
\begin{compactenum}
  \item
If $t_0$ and $t_1$ are terms of $\lang$, then the equation
\begin{equation*}
      t_0=t_1
  \end{equation*}
is an atomic formula of $\lang$;
\item
If $R$ is an $n$-ary predicate of $\lang$, and $(t_0,\dots,t_{n-1})$
is a list of $n$ terms of $\lang$, then the string
\begin{equation*}
  Rt_0\dotsb t_{n-1}
\end{equation*}
is a term of $\lang$; if $R$ is binary, then $Rt_0t_1$ may
also be written as
\begin{equation*}
  t_0\mathrel Rt_1.
\end{equation*}
\end{compactenum}
Finally, \textbf{formulas}\dindex{formula} in general can be defined:
\begin{compactenum}
  \item
Atomic formulas of $\lang$ are formulas of $\lang$.
\item
If $\phi$ is a formula of $\lang$, then so is $\lnot\phi$.
\item
If $\phi$ and $\psi$ are formulas of $\lang$, then $(\phi\land\psi)$ is a
formula of $\lang$.
\item
If $\phi$ is a formula of $\lang$, and $x$ is an individual variable,
then $\Exists x\phi$ is a formula of $\lang$.
\end{compactenum}
These are the 
\textbf{first-order formulas}%
\dindexsub{first}{---{}-order formula}%
\dindexsub{order}{first-{}--- formula}%
\dindexsub{formula}{first-order formula} 
in the signature $\lang$;
they constitute the 
\textbf{first-order logic}%
\dindexsub{first}{---{}-order logic}%
\dindexsub{order}{first-{}--- logic}%
\dindexsub{logic}{first-order ---} 
in that signature.
We can use other connectives in addition to, or instead of, $\land$
and $\lto$.  One will generally want to use an adequate signature for
propositional logic, like $\{\lnot,\land\}$ (Theorem~\ref{thm:and-not-ad}) or $\{\lnot,\lto\}$ (by \S~\ref{sect:adequacy}, Exercise~\ref{exer:not-to-ad}).  Once the criterion of
adequacy is met, then using fewer symbols makes the ensuing
definitions and proofs easier to write down.  

We can also use the quantifier
$\forall$; but
formulas using $\forall$ can be rewritten with $\exists$ alone by means
of~\eqref{eqn:notE} and~\eqref{eqn:notA} in \S~\ref{sect:quantifiers}.

It is standard to write a formula $\lnot(t_0=t_1)$ as $t_0\neq t_1$.

In the definition of formula, if the last condition is removed, then what is defined is the \textbf{quantifier-free formulas}\dindexsub{formula}{quantifier-free ---}\dindexsub{quantifier}{---{}-free formula} of $\lang$.

\subsection*{Interpretations of terms}

A term $t$ can be called \textbf{$n$-ary} if the set of its variables
is a subset of $\{x_k:k<n\}$; then $t$ is interpreted in an
$\lang$-structure $\str A$ as an $n$-ary operation $t^{\str A}$ on
$A$.  The possibility that $n=0$ is allowed; in that case, $t$ is
\textbf{nullary}\dindexsub{nullary}{--- term}\dindexsub{term}{nullary
  ---} or \textbf{constant,}\dindexsub{constant}{---
  term}\dindexsub{term}{constant ---} and its 
interpretation in $\str A$
is just an element of $A$.  The precise definition is what one should
expect:
\begin{compactenum}
\item
  If $k<n$, then the variable $x_k$ is an $n$-ary term; as such, it is
  interpreted in $\str A$ as the $n$-ary operation $\tuple x\mapsto
  x_k$ on $A$. (Here necessarily $n>0$.)
\item
Every constant $c$ is an $n$-ary term, interpreted in $\str A$ as the
constant 
$n$-ary operation $\tuple x\mapsto c^{\str A}$ on $A$. (If $n=0$, then this operation can be understood as the element $c^{\str A}$ of $A$.)
\item
If $(t_0,\dots,t_{k-1})$ is a list of $n$-ary terms, and $f$ is a
$k$-ary function-symbol, then the term $ft_0\dotsb f_{k-1}$ is $n$-ary
and, as such, is interpreted in $\str A$ as the $n$-ary operation
\begin{equation*}
  \tuple x\longmapsto f^{\str A}(t_0{}^{\str A}(\tuple
  x),\dots,t_{k-1}{}^{\str A}(\tuple x))
\end{equation*}
on $A$. (If $n=0$, the interpretation is just the element $f^{\str A}(t_0{}^{\str A},\dots,t_{k-1}{}^{\str A})$ of $A$.) 
\end{compactenum}

\begin{example}
  In $\Z$, the ternary terms $(x_0\cdot(x_1+x_2))$ and $((x_0\cdot
  x_1)+(x_0\cdot x_2))$ have the same interpretation, namely the
  ternary operation 
  \begin{equation*}
  (x,y,z)\mapsto x(y+z)
  \end{equation*}
  on $\Z$.  We could also
  write this operation more precisely as $(x,y,z)\mapsto
  x\cdot^{\Z}(y+^{\Z}z)$.  (See
  \S~\ref{algebra} \eqref{eqn:identity}.)
\end{example}

\subsection*{Interpretations of formulas}

Interpretations of formulas take longer to define precisely, but the
idea is that $\lnot$, $\land$, and $\exists$ symbolize
complementation, intersection, and \textsl{projection}
respectively.   An \emph{atomic} formula $\phi$ can be called
\textbf{$n$-ary} if the set of its variables 
is a subset of $\{x_i\colon i<n\}$.  Then $\phi$ is interpreted in a
structure $\str A$ as an $n$-ary relation $\phi^{\str A}$ on
$A$.  This relation $\phi^{\str A}$ is the
\textbf{solution set}%
\dindexsub{solution}{--- set}%
\dindexsub{set}{solution-{}---}
in $\str A$ of the formula $\phi$. In particular:
\begin{align}\label{eqn:=int}
(t_0=t_1)^{\str A}&=\{\tuple x\in A^n\colon t_0{}^{\str A}(\tuple x)=
    t_1{}^{\str A}(\tuple x)\},\\\label{eqn:Rint}
(Rt_0\dotsb t_{k-1})^{\str A}&=\{\tuple x\in A^n\colon (t_0{}^{\str
  A}(\tuple x),\dots,t_{k-1}{}^{\str A}(\tuple x))\in R^{\str A}\}. 
\end{align}

\begin{example}\label{ex:25}
  The interpretation of the equation
  \begin{equation*}
  ((x_0\cdot x_0)+(x_1\cdot x_1))=25
\end{equation*}
(usually written as $x_0{}^2+x_1{}^2=25$) in $\R$ is
  a circle of radius $5$ and center $(0,0)$; see Fig.~\ref{fig:circ-5}.  
\begin{figure}[t]
\psset{unit=2.4mm}
\mbox{}\hfill
\begin{pspicture}(-6,-6)(6,6)
\psaxes[labels=none]{->}(0,0)(-6,-6)(6,6)
\pscircle[linewidth=0.8mm](0,0)5
\end{pspicture}
\hfill
\begin{pspicture}(-6,-6)(6,6)
\psaxes[labels=none]{->}(0,0)(-6,-6)(6,6)
\psdots(5,0)(4,3)(3,4)(0,5)(-3,4)(-4,3)(-5,0)(-4,-3)(-3,-4)(0,-5)(3,-4)(4,-3)
\end{pspicture}
\hfill
\begin{pspicture}(-6,-6)(6,6)
\pscircle[linewidth=0.8mm,linestyle=dashed,fillstyle=solid,fillcolor=lightgray](0,0)5
\psaxes[labels=none]{->}(0,0)(-6,-6)(6,6)
\end{pspicture}
\hfill
\begin{pspicture}(-6,-6)(6,6)
%\pscircle[linestyle=dashed](0,0)5
\psaxes[labels=none]{->}(0,0)(-6,-6)(6,6)
\psdots    (2,-4)(1,-4)(0,-4)(-1,-4)(-2,-4)
     (3,-3)(2,-3)(1,-3)(0,-3)(-1,-3)(-2,-3)(-3,-3)
(4,-2)(3,-2)(2,-2)(1,-2)(0,-2)(-1,-2)(-2,-2)(-3,-2)(-4,-2)
(4,-1)(3,-1)(2,-1)(1,-1)(0,-1)(-1,-1)(-2,-1)(-3,-1)(-4,-1)
(4,0)(3,0)(2,0)(1,0)(0,0)(-1,0)(-2,0)(-3,0)(-4,0)
(4,1)(3,1)(2,1)(1,1)(0,1)(-1,1)(-2,1)(-3,1)(-4,1)
(4,2)(3,2)(2,2)(1,2)(0,2)(-1,2)(-2,2)(-3,2)(-4,2)
     (3,3)(2,3)(1,3)(0,3)(-1,3)(-2,3)(-3,3)
          (2,4)(1,4)(0,4)(-1,4)(-2,4)
\end{pspicture}
\hfill\mbox{}
\caption{Interpretations of $x_0{}^2+x_1{}^2=25$ and $x_0{}^2+x_1{}^2<25$.}\label{fig:circ-5}
\end{figure}
  The interpretation in
  $\Z$ consists of the integer points on this circle, namely $(\pm
  5,0)$, $(\pm 4,3)$, $(\pm 4,-3)$, $(\pm 3,4)$, $(\pm 3,-4)$, and
  $(0,\pm 5)$.  The interpretation of $x_0{}^2+x_1{}^2<25$ in $\R$ is
  the interior of the disk bounded by the circle.
\end{example}

In the sense described in \S~\ref{sect:cartesian}, a nullary relation
is a truth-value, $0$ or $1$.  If $n=0$, then~\eqref{eqn:=int}
and~\eqref{eqn:Rint} can be written as:
\begin{gather}\label{eqn:=intE}
  (t_0=t_1)^{\str A}=
\begin{cases}
    1,&\text{ if } t_0{}^{\str A}=t_1{}^{\str A},\\
    0,&\text{ if }t_0{}^{\str A}\neq t_1{}^{\str A};
    \end{cases}\\  \label{eqn:RintE}
(Rt_0\dotsb t_{k-1})^{\str A}=
\begin{cases}1,&\text{ if } 
(t_0{}^{\str A},\dots,t_{k-1}{}^{\str A})\in R^{\str A},\\
    0,&\text{ if }
(t_0{}^{\str A},\dots,t_{k-1}{}^{\str A})\notin R^{\str A}.
\end{cases}
\end{gather}

Quantifiers complicate matters, such as defining when a formula is $n$-ary.  Assume that we \emph{have} defined this, and that
$\phi$ and $\psi$ are
arbitrary $n$-ary formulas, whose interpretations $\phi^{\str A}$ and
$\psi^{\str A}$ are $n$-ary relations on $A$.  Then the interpretations of $\lnot\phi$ and $(\phi\land\psi)$ are given by
\begin{gather*}
  (\lnot\phi)^{\str A}=A^n\setminus\phi^{\str A}=(\phi^{\str A})\comp;\\
(\phi\land\psi)^{\str A}=\phi^{\str A}\cap\psi^{\str A}.
\end{gather*}
Now we have defined the interpretations of all \emph{quantifier-free} formulas.

Suppose $\phi$ is an $(n+1)$-ary formula.
Then $(\Exists{x_n}\phi)^{\str A}$ is an $n$-ary
relation on $A$, namely the set of all $(a_0,\dots,a_{n-1})$ in
$A^{n-1}$ such that $(a_0,\dots,a_{n-1},b)\in\phi^{\str A}$ for
\emph{some} $b$ in $A$.  This means
\begin{equation}\label{eqn:n-1int}
  (\Exists{x_n}\phi)^{\str A}=\coordproj {n+1}n\setimb{\phi^{\str A}},
\end{equation}
where $\coordproj{n+1}n$ is the function
\begin{equation}\label{eqn:projection}
  (x_0,\dots,x_{n-1},x_n)\longmapsto(x_0,\dots,x_{n-1})
\end{equation}
from $A^{n+1}$ to $A^n$; such a function can be called a
\textbf{projection.}%
\dindex{projection}%
\dindexsub{function}{projection}  
(See Figure~\ref{fig:projection} and
\S~\ref{sect:eq}.)
\begin{figure}[t!]
\begin{center}
  \begin{picture}(120,120)(-20,-20)
    \put(0,0){\line(0,1){100}}
    \put(0,0){\line(1,0){100}}
    \put(100,0){\line(0,1){100}}
    \put(0,100){\line(1,0){100}}
    \put(0,80){\circle*{5}}
    \put(-10,80){$b$}
    \put(30,80){\circle*{5}}
    \put(35,80){$(\tuple a,b)$}
    \thicklines
    \put(30,75){\vector(0,-1){70}}
    \put(30,0){\circle*{5}}
    \put(35,5){$\tuple a$}
    \put(35,40){$\coordproj{n+1}n$}
    \put(45,-20){$A^n$}
    \put(-20,45){$A$}
    \put(75,60){$A^{n+1}$}
  \end{picture}
\end{center}
\caption{Projection}
\label{fig:projection}
\end{figure}
  Note then that
the formula $\Exists{x_n}\phi$ is considered as
$n$-ary, not $(n+1)$-ary, even though it contains the variable
$x_n$.  The point is that this variable is not \textsl{free}%
\dindex{free variable}%
\dindexsub{variable}{free ---} 
in the formula; it is only 
\textsl{bound.}%
\tindex{bound occurrence of variable}

\begin{example*}[\ref{ex:25} continued]
The formula $\Exists{x_1}x_0{}^2+x_1{}^2=25$ is singulary.  Its
interpretation in $\R$ is the interval $[-5,5]$; in $\Z$, the set
$\{-5,-4,-3,0,3,4,5\}$. 
\end{example*}

The set $\fv{\phi}$ of \textbf{free variables}%
in a formula $\phi$ is defined
  recursively.
  \begin{compactenum}
    \item
$\fv{\phi}$ is the set of variables appearing in $\phi$, if $\phi$ is
      atomic.
\item
$\fv{\lnot\phi}=\fv{\phi}$.
\item
$\fv{\phi\land\psi}=\fv{\phi}\cup\fv{\psi}$.
\item
$\fv{\Exists x\phi}=\fv{\phi}\setminus\{x\}$.
  \end{compactenum}
Thus quantifiers \textbf{bind}\dindex{bind} variables, making them not free.

\begin{example}\label{example:free}
  Suppose $R$ and $S$ are binary predicates.  Then the free variables of
  \begin{equation*}
      \Exists x(x\mathrel Ry\land x\mathrel Sz)
  \end{equation*}
  are $y$ and $z$, but the free variables of
  \begin{equation*}
\Exists xx\mathrel Ry\land x\mathrel Sz
\end{equation*}
are $x$, $y$, and $z$.
\end{example}

The second formula in the example is complicated by having bound
\emph{occurrences}\index{occurrence} of $x$, even though $x$ is a free
variable of the formula.  In practice one avoids this situation by
using instead a formula like $\Exists uu\mathrel Ry\land x\mathrel
Sz$.  For lack of a better term, let us refer to such a formula as
\textbf{good.}%
\dindexsub{good}{--- formula}  
There is a recursive definition
of good formulas: 
\begin{compactenum}
  \item
Atomic formulas of $\lang$ are good formulas of $\lang$.
\item
If $\phi$ is a good formula of $\lang$, then so is $\lnot\phi$.
\item
If $\phi$ and $\psi$ are good formulas of $\lang$, 
and every variable that occurs in both formulas is a \emph{free} variable of both formulas, 
then $(\phi\land\psi)$ is a good
formula of $\lang$.
\item
If $\phi$ is a good formula of $\lang$, and $x$ is a free variable of $\phi$,
then $\Exists x\phi$ is a good formula of $\lang$.
\end{compactenum}

\begin{example}
If $\phi$ and $\psi$ are formulas, then so are $\Exists x\phi\land\Exists x\psi$ and
$\Exists x\Exists x\phi$; but these are not \emph{good} formulas.
\end{example}

\begin{lemma}\label{lem:good}
Suppose $x$ is a free variable of a good formula $\phi$, and $c$ is a constant.
\begin{compactenum}
\item
In $\phi$, the variable $x$ never occurs right after $\exists$.
\item
The result of replacing each occurrence of $x$ in $\phi$ with $c$ is a good formula.
\end{compactenum}
\end{lemma}

We never need work with any formulas other than good formulas.  Also, restricting our attention to good formulas makes some general definitions easier.
An arbitrary formula is \textbf{$n$-ary}\dindex{nary@$n$-ary}\dindexsub{arity}{nary@$n$-ary} if its \emph{free} variables are among $x_0$, \dots, $x_{n-1}$.  If $\phi$ is such a formula, we may write it as
\begin{equation*}
\phi(x_0,\dots,x_{n-1}).
\end{equation*}
Suppose in particular $\phi$ is a \emph{good} formula.
If $t_0$, \dots, $t_{n-1}$ are terms, we denote by
\begin{equation*}
\phi(t_0,\dots,t_{n-1})
\end{equation*}
the formula that results from substituting $t_k$ for each occurrence of $x_k$ in $\phi$, \emph{provided} $x_k$ is actually a free variable of $\phi$.  If $\phi$ is not necessarily good, then $\phi(t_0,\dots,t_{n-1})$ is the result of substituting $t_k$ for each free \textsl{occurrence}\tindex{occurrence} of $x_k$; but then one must define the free occurrences of variables.  We avoid having to do this by restricting our attention to good formulas.


\begin{example}
If $\phi$ is ternary, and $\psi$ is $\Exists{x_1}\phi$, then $\psi$ is also ternary, but $\psi(c_0,c_1,c_2)$ is $\Exists{x_1}\phi(c_0,x_1,c_2)$.
\end{example}

The notation for substitution can be modified in an obvious way.  If $\phi$ has at most one variable, $x$ (which could be $x_{1066}$, for all we know), then we can write $\phi$ as $\phi(x)$; then $\phi(t)$ is the result of substituting $t$ for $x$ in $\phi$, as long as $x$ really is a free variable of $\phi$; otherwise $\phi(t)$ is just $\phi$.

A nullary formula is a \textbf{sentence.}\index{sentence}  In a given signature $\lang$, a sentence $\sigma$ is either \textbf{true} or \textbf{false} in a structure $\str A$; if true, we write
\begin{equation}\label{eqn:true-in}
\str A\models\sigma;
\end{equation}
otherwise, $\str A\nmodels\sigma$.
The definition is recursive:
\begin{compactenum}
\item
If $\sigma$ is atomic, then
\begin{equation*}
\str A\models\sigma\iff\sigma^{\str A}=1.
\end{equation*}
\item
If $\sigma$ is $\lnot\tau$, then
\begin{equation*}
\str A\models\sigma\iff\str A\nmodels\tau.
\end{equation*}
\item
If $\sigma$ is $\tau\land\rho$, then
\begin{equation*}
\str A\models\sigma\iff\str A\models\tau\amp\str A\models\rho.
\end{equation*}
If $\sigma$ is $\Exists x\phi$, then $\str A\models\sigma$ if and only if
\begin{equation*}
\str A'\models\phi(c_b)
\end{equation*}
for \emph{some} element $b$ of $A$, where $c_b$ is a new constant, and $\str A'$ is the same as $\str A$, except that it interprets $c_b$ as $b$.
\end{compactenum}

Another way to write the last condition is as follows.  Given the structure $\str A$ of $\lang$, we let $\lang(A)$ be $\lang$, together with a new constant $c_b$ for each element $b$ of $A$.  Then we define $\str A_A$ as a structure of $\lang(A)$ in the obvious way: it interprets symbols of $\lang$ as $\str A$ does, and it interprets each new constant $c_b$ as $b$.  If $\phi$ is a quantifier-free formula of $\lang(A)$, then we can denote by
$\phi^{\str A}$ the interpretation of $\phi$ in $\str A_A$.  If $\sigma$ is a sentence of $\lang(A)$, then we write $\str A\models\sigma$ if $\sigma$ is true in $\str A_A$.  Usually we denote the constant $c_b$ by $b$.
If $\sigma$ is $\Exists x\phi$, then we have simply that $\str A\models\sigma$ if and only if
\begin{equation*}
\str A\models\phi(b)
\end{equation*}
for some element $b$ of $A$.

The following is an easy consequence of the definitions.

\begin{lemma}\label{lem:qf}
In some signature, if $\str A$ is a structure, and $\phi$ is a quantifier-free $n$-ary formula, then $\phi^{\str A}$ is the set of all $\tuple b$ in $A^n$ such that
\begin{equation*}
\str A\models\phi(\tuple b).
\end{equation*}
\end{lemma}

Now we can use the lemma as a \emph{definition} of $\phi^{\str A}$ for arbitrary formulas $\phi$.  Then the following is also easy.

\begin{lemma}
In some signature, if $\str A$ is a structure, and $\phi$ is a quantifier-free $(n+1)$-ary formula, then 
\begin{equation*}
(\Exists{x_n}\phi)^{\str A}=\coordproj{n+1}n[\phi^{\str A}].
\end{equation*}
\end{lemma}

\subsection*{Entailment}

Suppose $\sigma$ is a sentence of some signature $\lang$, and $\str A$ is a structure of $\lang$.  If $\sigma$ is true in $\str A$, then we may say that $\str A$ is a \textbf{model}\dindex{model} of $\sigma$.  More generally,
if $\Gamma$ is a set of sentences of $\lang$, and each sentence in $\Gamma$ is true in $\str A$, then $\str A$ is a \textbf{model} of $\Gamma$; in this case, we may write
\begin{equation}\label{eqn:model}
\str A\models\Gamma.
\end{equation}
If $\sigma$ is true in \emph{every} model of $\Gamma$, then $\sigma$ is a
\textbf{logical consequence}\dindexsub{logic}{---al
  consequence}\dindexsub{consequence}{logical ---} of
$\Gamma$, or $\Gamma$ \textbf{logically entails}\dindexsub{logic}{---ally entails}\dindexsub{entails}{logically ---} $\sigma$,
and we write
\begin{equation}\label{eqn:entails}
\Gamma\models\sigma.
\end{equation}
In case $\Gamma=\{\sigma_0,\dots,\sigma_{n-1}\}$, we may write also
\begin{equation*}
\sigma_0,\dots,\sigma_{n-1}\models\sigma.
\end{equation*}
If $n=0$ here, that is, $\Gamma$ is empty, then we write
\begin{equation*}
\models\sigma;
\end{equation*}
this means $\sigma$ is true in every structure of $\lang$, or in other words $\sigma$ is a \textbf{validity.}\index{validity}

Note well that the semantic turnstile $\models$ has completely different meanings in~\eqref{eqn:model} and~\eqref{eqn:entails}.  To avoid confusion, one might prefer to write~\eqref{eqn:model} as
\begin{equation*}
\models_{\str A}\Gamma.
\end{equation*}

Let us now permit $\lto$ in formulas, and define interpretations so that
\begin{equation*}
(\phi\lto\psi)^{\str A}=(\lnot(\phi\land\lnot\psi))^{\str A}.
\end{equation*}
Let us also permit $\forall$ in formulas, so that 
\begin{equation*}
(\Forall x\phi)^{\str A}=(\lnot\Exists x\lnot\phi)^{\str A}.
\end{equation*}
A \textbf{generalization}\dindex{generalization} of a formula $\phi$ is a \emph{sentence} of the form $\Forall{u_0}\cdots\Forall{u_{n-1}}\phi$.  A \textbf{tautology}\dindex{tautology} is a sentence $\sv F(\sigma_0,\dots,\sigma_{n-1})$, where $\sv F$ is a tautology of propositional logic.

Let $\lang$ be a signature with infinitely many constants.  We can now define a proof-system for $\lang$, in the sense of \S~\ref{sect:formal}, as follows.
The only rule of inference is Detachment.  The axioms are defined recursively:
\begin{enumerate}
\item
Every tautology is an axiom.
\item
For all constants $c$ and $d$ and all singulary formulas $\phi$ of $\lang$, the following are axioms:
\begin{align*}
c&=c,&
c=d&\lto\phi(c)\lto\phi(d).
\end{align*}
\item
For all singulary formulas $\phi$ and $\psi$ of $\lang$ with free variable $x$, and all sentences~$\sigma$ of $\lang$, the following are axioms:
\begin{gather*}
\Forall x(\phi(x)\lto\psi(x))\lto\Forall x\phi(x)\lto\Forall x\psi(x),\\
\Forall x(\sigma\lto\psi(x))\lto\sigma\lto\Forall x\psi(x).
\end{gather*}
\item
For all singulary formulas $\phi$ of $\lang$ with free variable $x$, the following is an axiom:
\begin{equation*}
\Exists x\phi(x)\lto\lnot\Forall x\lnot\phi(x).
\end{equation*}
\item
If $\phi(x)$ is a formula of $\lang$ in which a constant $c$ of $\lang$ does not appear, and $\phi(c)$ is an axiom, then the following is an axiom:
\begin{equation*}
\Forall x\phi(x).
\end{equation*}
\end{enumerate}
As in \S~\ref{sect:formal}, if $\Gamma$ is a set of sentences, and $\sigma$ is a sentence, we write
\begin{equation}\label{eqn:sequent}
\Gamma\proves\sigma
\end{equation}
if there is a formal proof of $\sigma$ from $\Gamma$ in the proof-system just defined.
In this case, we may say that $\sigma$ is
\textbf{deducible}\dindex{deducible} from $\Gamma$.

The set of sentences deducible from a set $\Gamma$ is recursively defined:
\begin{compactenum}
\item
It contains the axioms.
\item
It contains the sentences in $\Gamma$.
\item
If it contains $\sigma$ and $\sigma\lto\tau$, then it contains $\tau$.
\end{compactenum}
This allows the use of \textbf{induction}\dindex{induction} to prove statements about those sentences.

Note that, by assuming that $\lang$ contains infinitely many constants, we ensures that, if $\Gamma$ does not formally entail $\sigma$ in $\lang$, then neither does it do so in a larger signature.

\begin{theorem}[Soundness]\label{thm:1-soundness}
If $\Gamma\proves\sigma$, then $\Gamma\models\sigma$.
\end{theorem}

\begin{proof}
We use induction.  The claim is trivially true when $\sigma\in\Gamma$.  The claim is true when $\sigma$ is an axiom, since in that case $\models\sigma$ (exercise).
Finally, suppose the claim is true when $\sigma$ is $\rho$ and when $\sigma$ is $\rho\lto\tau$.  If these sentences are deducible from $\Gamma$, then by inductive hypothesis $\Gamma\models\rho$ and $\Gamma\models\rho\lto\tau$; therefore $\Gamma\models\tau$.
\end{proof}

The expression in~\eqref{eqn:sequent} can be called a \textbf{sequent.}\dindex{sequent}  We usually do not write down formal proofs; we show that they exist by considering sequents.

\begin{theorem}[Detachment]
If $\Gamma\proves\rho$, and $\Gamma\proves\rho\lto\sigma$, then $\Gamma\proves\sigma$.
\end{theorem}

\begin{proof}
If $\alpha_0,\dots,\alpha_m$ is a formal proof of $\rho$ from $\Gamma$, and
$\beta_0,\dots,\beta_n$ is a formal proof of $\rho\lto\sigma$ from $\Gamma$, then
\begin{equation*}
\alpha_0,\dots,\alpha_m,\beta_0,\dots,\beta_n,\sigma
\end{equation*}
is a formal proof of $\sigma$ from $\Gamma$.
\end{proof}

\begin{theorem}[Deduction]
If $\Gamma\cup\{\sigma\}\proves\tau$, then
\begin{equation*}
\Gamma\proves\sigma\lto\tau.
\end{equation*}
\end{theorem}

\begin{proof}
We use induction on $\tau$.  There are three cases to consider.
\begin{asparaenum}
\item
If $\tau$ is an axiom or an element of $\Gamma$, then
\begin{align*}
\Gamma&\proves\tau,                  &&\\
   &\proves\tau\lto\sigma\lto\tau,&&\text{[tautology]}\\
\Gamma&\proves\sigma\lto\tau.        &&\text{[Detachment]}
\end{align*}
\item
If  $\tau$ is $\sigma$, then $\sigma\lto\tau$ is a tautology, so again the claim follows.
\item
The last possibility is that $\rho$ and $\rho\lto\theta$ are deducible from $\Gamma\cup\{\sigma\}$, and the claim holds for each of these two sentences.
Then
\begin{align*}
\Gamma&\proves\sigma\lto\rho,&&\text{[inductive hyp.]}\\
\Gamma&\proves\sigma\lto\rho\lto\theta,&&\text{[inductive hyp.]}\\
   &\proves (\sigma\lto\rho) \lto(\sigma\lto\rho\lto\theta)\lto\sigma\lto\theta,&&\text{[tautology]}\\
\Gamma&\proves\sigma\lto\theta.&&\text{[Detachment (twice)]}\qedhere
\end{align*}\qedhere
\end{asparaenum}
\end{proof}

\begin{theorem}[Generalization]
If $\Gamma\proves\phi(c)$, where $x$ is free in $\phi(x)$, and $c$ does not occur in $\phi(x)$ or in any sentence of $\Gamma$, then
\begin{equation*}
\Gamma\proves\Forall x\phi(x).
\end{equation*}
\end{theorem}

\begin{proof}
The claim is true when $\phi(c)$ is an axiom.  The claim is vacuously true when $\phi(c)$ is in $\Gamma$, since then $c$ does occur in a sentence of $\Gamma$.
The remaining possibility is that $\Gamma\proves\sigma$ and $\Gamma\proves\sigma\lto\phi(c)$.  If $c$ does not occur in $\sigma$, then we may assume $\Gamma\proves\Forall x(\sigma\lto\phi(x))$.  By Deduction from the appropriate axiom, $\Gamma\proves\Forall x\phi(x)$.  The argument is nearly the same if $c$ does occur in $\sigma$.
\end{proof}

\begin{theorem}[Tautology]
If in propositional logic, $\sv F_0,\dots,\sv F_{m-1}\models\sv G$, and in first-order logic, $\Gamma\proves\sv F_k(\sigma_0,\dots,\sigma_{n-1})$ when $k<n$, then
\begin{equation*}
\Gamma\proves\sv G(\sigma_0,\dots,\sigma_{n-1}).
\end{equation*}
\end{theorem}

\begin{proof}
Use the tautology
\begin{equation*}
\sv F_0(\sigma_0,\dots,\sigma_{n-1})\lto\dots\lto\sv F_{m-1}(\sigma_0,\dots,\sigma_{n-1})\lto
\sv G(\sigma_0,\dots,\sigma_{n-1}).\qedhere
\end{equation*}
\end{proof}

\begin{theorem}[Equality]
$\proves c=d\lto d=c$.
\end{theorem}

\begin{proof}
It is an axiom that $c=d\lto d=d\lto d=c$.
\end{proof}

A sentence $\sigma\land\lnot\sigma$ is a \textbf{contradiction.}\dindex{contradiction}  A set $\Gamma$ of sentences is \textbf{consistent}\dindex{consistent} if it does not formally entail a contradiction.

\begin{lemma}\label{lem:fin-con}
If every finite subset of a set of sentences is consistent, then the whole set is consistent.
\end{lemma}

\begin{proof}
Suppose $\Gamma$ is not consistent.  Then there is a formal proof from $\Gamma$ of some contradiction.  Such a formal proof can use only finitely many sentences from $\Gamma$.  Those sentences compose an inconsistent finite subset of $\Gamma$.
\end{proof}

\begin{lemma}\label{lem:ccon}
If $\Gamma$ is consistent, then one of $\Gamma\cup\{\sigma\}$ and $\Gamma\cup\{\lnot\sigma\}$ is consistent.
\end{lemma}


\begin{lemma}\label{lem:bigor}
If $\Gamma\cup\{\sigma_0,\dots,\sigma_{n-1}\}$ is inconsistent, then
\begin{equation*}
\Gamma\proves\bigvee_{k< n}\lnot\sigma_k.
\end{equation*}
\end{lemma}

All of the foregoing will be used to prove the completeness of our proof-system in \S~\ref{sect:infinitary}.

\subsection*{Theories}

The \textbf{theory}\dindex{theory} of a structure $\str A$ in a signature $\lang$ is
the set of sentences of $\lang$ that are true in $\str A$.
A set of sentences is a \textbf{theory}\dindex{theory} if it contains all of its
logical consequences.  You should check that the theory \emph{of} a
structure is indeed a theory in the sense just defined.

If some theory $T$ is the set of logical
consequences of a set $\Sigma$ of sentences, then $\Sigma$
\textbf{axiomatizes}\dindexsub{axiom}{---atizes} $T$, or $\Sigma$ is a
set of \textbf{axioms}\dindex{axiom} for $T$.  It is a consequence of
G\"odel's Incompleteness\index{Godel@G\"odel!---'s Incompleteness
  Theorem}\index{incomplete!Godel-s I---ness Theorem@G\"odel's
  I---ness Theorem}\index{theorem!Godel's Incompleteness ---@G\"odel's
  Incompleteness ---}
Theorem\footnote{Published in 1931; available in English in \cite{MR0263601}.}
that the theory of $\N$ in the signature $\{\scr{},+,\cdot,0,1\}$ cannot 
be \textsl{recursively}
axiomatized: there is no computer program that can generate a complete set of
axioms for the theory.  By Moj\.zesz Presburger's earlier
work,\footnote{In Warsaw, in 1928, in his master's thesis, at the 
  suggestion of Alfred Tarski.\index{Tarski}  Then
  Presburger\index{Presburger} went into the 
  insurance industry.  He died under the
  Nazis. \cite[pp.~73--74]{MR2095748}} the 
theory of $\N$ in the signature $\{+,0,1\}$ \emph{is} recursively
axiomatizable \cite[\S~3.1, pp.~81--84]{MR1924282}: the axioms are
\begin{compactenum}[1)]
  \item
$\Forall xx+1\neq0$;
\item
$\Forall x\Forall y(x+1=y+1\lto x=y)$;
\item
$\Forall x x+0=x$;
\item
$\Forall xx +(y+1)=(x+y)+1$;
\item
$\phi(0)\land\Forall x(\phi(x)\lto\phi(x+1))\lto\Forall x\phi(x)$,
for all formulas $\phi(x)$ of $\{+,0,1\}$.
\end{compactenum}
The last line is an
\textbf{axiom-scheme:}
\dindexsub{axiom}{---{}-scheme}\dindexsub{scheme}{axiom-{}---}it
describes a \emph{set} of axioms (in fact, an infinite set). 


In general, a theory $T$ in a signature $\lang$ is
  \textbf{complete}%
\dindexsub{complete}{--- theory}%
\dindexsub{theory}{complete ---} 
if
  \begin{equation*}
    T\models\sigma\iff T\nmodels\lnot\sigma
  \end{equation*}
for all sentences $\sigma$ of $\lang$.  In particular then, the theory
\emph{of} a particular structure is always complete.  Two
complementary problems of model-theory are:
\begin{compactenum}
  \item
To show that a particular set of sentences axiomatizes a complete
theory.
\item
To find a set of sentences that axiomatizes the (complete) theory of a
particular structure.  
\end{compactenum}
Presburger's\index{Presburger} result shows that the former can
sometimes be done;
G\"odel's\index{Godel@G\"odel} result shows that the latter cannot
always be done.

If $T$ is a theory in a signature $\lang$, then two $n$-ary formulas
$\phi(\tuple x)$ and $\psi(\tuple x)$ of $\lang$ are
\textbf{$T$-equivalent}\dindex{Tequivalent@$T$-equivalent}
\dindexsub{equivalent}{T-equivalent@$T$-equivalent}if
\begin{equation*}
  T\models\Forall{x_0}\dotsb\Forall{x_{n-1}}
  (\phi(x_0,\dots,x_{n-1})\liff\psi(x_0,\dots,x_{n-1})).
\end{equation*}
One way to learn about a theory and its models is to try to
\textsl{eliminate quantifiers.}  A theory $T$ in a signature $\lang$
\textbf{admits elimination of
  quantifiers}\dindexsub{quantifier}{elimination of
  ---s}\dindex{elimination of quantifiers} if for every
formula of $\lang$, there is a formula that is $T$-equivalent to it,
but that contains no quantifiers.  Presburger
proved elimination of quantifiers for the theory axiomatized above,
but in a larger signature.

\subsection*{Higher-order logics}

First-order logic uses individual variables, but no other kinds of
variables.  In particular, there are no variables for relations.
Relations are symbolized by predicates in first-order logic, and
predicates stand for different relations in different structures; but
in a particular first-order logic, predicates are constant in the
sense that they cannot be preceded by quantifiers.

In \textbf{second-order logic,}\dindex{second-order
  logic}\dindexsub{logic}{second-order
  ---}\dindexsub{order}{second-{}--- logic} variables standing for
  relations are allowed. 
The third of the properties of $\N$ listed at the end of
\S~\ref{sect:sets} is second order in this sense, since it refers to
\emph{every} subset of $\N$.

Likewise, $\R$ is characterized (among the structures called
\textsl{ordered fields}%
\tindexsub{field}{ordered ---}%
\tindexsub{order}{---ed field}) by the second-order property of
\textsl{completeness,}%
\tindexsub{complete}{---ness} 
namely that every set of real numbers with an upper bound has a least
upper bound.  See \S~\ref{sect:reals}.  

Like propositional logic (see Theorem~\ref{thm:compactness}), first-order logic has a \textsl{compactness theorem,}%
\tindex{Compactness Theorem}%
\tindexsub{Theorem}{Compactness Th---}%
\footnote{Proved by Kurt 
  G\"odel\index{Godel@G\"odel} for \textsl{countable}%
\tindexsub{count}{---able signature} 
signatures in
  his doctoral
  dissertation in Vienna in 1929; proved generally by
  Mal'tsev\index{Mal'tsev} in the 
  Soviet Union, and
  independently by Leon Henkin\index{Henkin} \cite{MR0033781} in 1948
  in \emph{his} 
  doctoral dissertation at Princeton.  \cite[p.~318]{MR94e:03002}}
 Corollary~\ref{cor:compactness} below,
  namely that if every  
  finite subset of a set of sentences has a model, then the whole set
  has a model.  
  Second-order logic does not have such a theorem.  This is a reason why model-theorists
  work mostly with first-order logic.


\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Prove Lemma~\ref{lem:good}.
\item
Show that good formulas and their free variables can be defined simultaneously as follows:
\begin{compactenum}
  \item
An atomic formula of $\lang$ is a good formula of $\lang$, and each of its variables is free.
\item
If $\phi$ is a good formula of $\lang$, then so is $\lnot\phi$, and this has the same free variables as $\phi$.
\item
If $\phi$ and $\psi$ are good formulas of $\lang$, 
and every variable that occurs in both formulas is a \emph{free} variable of both formulas, 
then $(\phi\land\psi)$ is a good
formula of $\lang$, and its free variables are the variables that are free variables of $\phi$ or $\psi$.
\item
If $\phi$ is a good formula of $\lang$, and $x$ is a free variable of $\phi$,
then $\Exists x\phi$ is a good formula of $\lang$, and its free variables are those of $\phi$, except $x$.
\end{compactenum}
\item
Prove Lemma~\ref{lem:qf}.
  \item
Letting $P$ and $Q$ be singulary predicates, determine, from the
    definition of $\models$, whether the following hold. 
    \begin{compactenum}
            \item
$(\Exists xPx\lto \Exists xQx)\models\Forall x(Px\lto Qx)$;
\item
$(\Forall xPx\lto\Exists xQx)\models\Exists x(Px\lto Qx)$;
\item
$\Exists x(Px\lto Qx)\models(\Forall xPx\lto\Exists xQx)$;
    \item
$\{\Exists x Px,\;\Exists xQx\}\models\Exists x(Px\land Qx)$;
\item
$\Exists xPx\lto\Exists yQy\models\Forall x\Exists y(Px\lto Qy)$.
    \end{compactenum}
\item
Let $\lang=\{R\}$, where $R$ is a binary predicate, and let $\str A$
be the $\lang$-structure $(\Z,\leq)$.  Determine $\phi^{\str A}$ if
$\phi$ is:
\begin{compactenum}
  \item
$\Forall {x_1}(Rx_1x_0\lto Rx_0x_1)$;
\item
$\Forall {x_2}(Rx_2x_0\lor Rx_1x_2)$.
\end{compactenum}
\item
    Let $\lang$ be $\{S,P\}$, where $S$ and $P$ are binary
    function-symbols.  Then $(\R,+,\cdot)$ is an $\lang$-structure.
    Show that the following sets and relations are definable in this
    structure:
    \begin{compactenum}
            \item
$\{0\}$;
\item
$\{1\}$;
\item
$\{a\in\R:0< a\}$;
\item
$\{(a,b)\in\R^2:a<b\}$.
    \end{compactenum}
\item
    Show that the following sets are definable in
    $(\vnn,+,\cdot,\leq,0,1)$:
    \begin{compactenum}
      \item
the set of even numbers;
\item
the set of prime numbers.
    \end{compactenum}
\item
  Let $R$ be the binary
  relation 
  \begin{equation*}
      \{(x,x+1):x\in \Z\}
  \end{equation*}
 on $\Z$.  Show that $R$ is $0$-definable in the structure $(\Z,<)$;
  that is, find a binary formula $\phi$ 
  in the signature $\{<\}$ such that $\phi^{(\Z,<)}=R$.  
\item
Prove that the axioms of our proof-system are valid (the missing detail in the proof of Theorem~\ref{thm:1-soundness}).
\item
Prove Lemmas~\ref{lem:ccon} and~\ref{lem:bigor}.
\end{enumerate}

\section{Equipollence}\label{sect:equipollence}

In ordinary life, if two sets have the same size, one way to tell this
is to count the sets.  This procedure has two potential
inconveniences: 
\begin{compactenum}
\item
The procedure gives us more information than necessary: it tells us
not only \emph{that} the sets have the same size, but also \emph{what}
that size is. 
\item
In the usual sense of counting, the procedure does not work for
infinite sets, since we can never count to the end of them. 
\end{compactenum}
An alternative procedure is to arrange the sets in \emph{pairs,} each
pair containing an element of each set.  Strictly, some of those pairs
might be singletons, if the two sets have elements in common.  So
really, if the sets are $A$ and $B$, we should make \emph{ordered}
pairs $(c,d)$, where $c\in A$ and $d\in B$; each element of $A$ should
be the left entry of exactly one such pair, and each element of $B$
should be the right entry of exactly one such pair.  This just means
there should be a \emph{bijection} from $A$ to $B$, if the two sets
are to have the same size. 

We introduce a new terminology for the notion of having the same
size,---a terminology that avoids introducing the notion of size
itself. 
Two sets are \textbf{equipotent}\dindex{equipotent} or \textbf{equipollent}\dindex{equipollent}\footnote{The
  Latin participles \Lat{potent-} and \Lat{pollent-} both mean
  \emph{able}.} if there is a bijection from one to
the other.  If $A$ and $B$ are equipollent, we can write
\begin{equation*}
  A\equip B.\glossary{$A\approx B$}
\end{equation*}
Evidently,
\begin{gather*}
A\equip A,\\
A\equip B\iff B\equip A,\\
A\equip B\amp B\equip C\implies A\equip C.
\end{gather*}
We have in particular
\begin{equation*}
\N\equip\{a_0,a_1,a_2,\dots\},
\end{equation*}
provided $a_i\neq a_j$ when $i\neq j$, since then the function $n\mapsto a_n$ is indeed a bijection from the one set to the other.

\begin{examples}\label{examples:N}
\mbox{}
\begin{asparaenum}
\item
$\N\equip\{1,2,3,\dots\}$.
\item
$N\equip\{k,k+1,k+2,\dots\}$.
\item
$\N\equip\{0,2,4,6,\dots\}$; the bijection is $x\mapsto 2x$.  
%$\Z\equip\{x\in \Z\colon\Exists y 2y=x\}$.
\item
$\N\equip\Z$, because of the bijection $f$ given by
\begin{equation*}
f(x)=\begin{cases}
	0,&\text{ if }x=0,\\
	k,&\text{ if }x=2k-1,\\
	-k,&\text{ if }x=2k.
\end{cases}
\end{equation*}
That is, $\N\equip\Z$, because the elements of $\Z$ can be listed as
\begin{equation*}
0,1,-1,2,-2,3,\dots
\end{equation*}
\item\label{item:NNN}
$\N\equip\N\times\N$, because the elements of the latter set can be listed as
\begin{equation*}
(0,0), (0,1), (1,0), (0,2), (1,1), (2,0), (0,3), (1,2), \dots
\end{equation*}
This list is made up of blocks of the form of
\begin{equation*}
(0,n),(1,n-1),(2,n-2),\dots,(n,0);
\end{equation*}
these are just the diagonals of the matrix
\begin{equation*}
\begin{matrix}
(0,0)&(0,1)&(0,2)&(0,3)&\dots\\
(1,0)&(1,1)&(1,2)&\hdotsfor2\\
(2,0)&(2,1)&\hdotsfor3\\
(3,0)&\hdotsfor4\\
\hdotsfor5
\end{matrix}
\end{equation*}
\item
Suppose $f$ is a bijection from $\N$ to $\Z$, and $g$ is a bijection from $\N$ to $\N\times\N$.  We can write $g(x)$ as $(g_0(x),g_1(x))$.  Then the function
\begin{equation*}
x\mapsto(f(g_0(x)),f(g_1(x)))
\end{equation*}
is a bijection from $\N$ onto $\Z\times\Z$.  Thus $\N\equip\Z\times\Z$.
\item
$\N\equip\{x\in\Q\colon x>0\}$, because of the list:
\begin{equation*}
1,
\frac12,2,
\frac13,3,
\frac14,\frac23,\frac32,4,
\frac15,5,
\frac16,\frac25,\frac34,\frac43,\frac52,6,
\frac17,\dots,
\end{equation*}
This list is made up of blocks of the form of
\begin{equation*}
\frac1n,\frac2{n-1},\frac3{n-2},\dots,\frac n1,
\end{equation*}
but with entries deleted if they are equal to entries that have already appeared.
\item
$\N\equip\Q$.
\end{asparaenum}
\end{examples}

Thus there are sets $A$ and $B$ such that
\begin{equation*}
 A\pincluded B\amp A\equip B.  
\end{equation*}
But no such set $B$ can be \textsl{finite.}  Indeed, to be precise, let us say that a set $C$ is \textbf{finite}\dindex{finite} if, for some $n$ in $\N$,
\begin{equation*}
C\equip\{0,\dots,n-1\},
\end{equation*}
that is, for some $n$ in $\vnn$,
\begin{equation*}
C\equip n.
\end{equation*}

\begin{theorem}\label{thm:no-subset}
No element $n$ of $\vnn$ has a proper subset $A$ such that $A\equip n$.
\end{theorem}

\begin{proof}
We use induction.
The claim is trivially true when $n=0$, since this has no proper subsets at all.
Suppose the claim holds when $n=m$.  Now let $n=m+1$, and suppose $A\subseteq n$, and $f$ is a bijection from $n$ to $A$.  There are two cases to consider.  
\begin{compactenum}
\item
If $f(m)=m$, then $f\setminus\{(n,n)\}$ is a bijection from $m$ to $A\setminus\{m\}$, and the latter set is a subset of $m$.  In this case, by inductive hypothesis, $A\setminus\{m\}=m$, so $A=n$.
\item
If $f(m)=k$, where $k<m$, define the function $g$ on $m$ by
\begin{equation*}
g(x)=\begin{cases}
f(x),&\text{ if }f(x)\neq m,\\
k,&\text{ if }f(x)=m.
\end{cases}
\end{equation*}
Then $g$ is a bijection from $m$ onto $A\setminus\{m\}$, so again $A\setminus\{m\}=m$, and hence $A=n$.
\end{compactenum}
This completes the induction.
\end{proof}

A set is \textbf{infinite}%
\dindex{infinite} if it is not finite.  The
contrapositive of the theorem then gives us that, if a set is
equipollent with a proper subset of itself, then the set is
infinite.\footnote{In 1882, Richard Dedekind~\cite[p.~63]{MR0159773}
  suggested \emph{defining} infinite sets as those that are
  equipollent with proper subsets of themselves.  Agreement of this
  definition with ours will require the Axiom of
  Choice,~\ref{ax:choice}.} 
In particular, $\N$ and all sets equipollent with it are infinite; to
be more precise, such sets are called \textbf{countably
  infinite.}\dindexsub{count}{---ably infinite}  So $\N$, $\Z$, and
$\Q$ are all countably infinite. 
A set is called \textbf{countable}\dindexsub{count}{---able} if it is
a subset of a countably infinite set. 

\begin{theorem}
Suppose $A$ and $B$ are countable sets.
\begin{enumerate}
\item
$A\cup B$ is countable.
\item
$A^n$ is countable for all $n$ in $\N$.
\end{enumerate}
\end{theorem}

\begin{proof}
\begin{asparaenum}
\item
If $A=\{a_0,a_1,\dots\}$ and $B=\{b_0,b_1,\dots\}$, then we can list $A\cup B$ as
\begin{equation*}
a_0,b_0,a_1,b_1,a_2,\dots,
\end{equation*}
with any repeats deleted.
\item
$A^0=\{\emptyset\}$, so it is countable.  Also, $A^{n+1}\equip A^n\times A$,
so if $A^n$ is countable, then so is $A^{n+1}$ by the method of Example~\ref{examples:N} \eqref{item:NNN}.  By induction, $A^n$ is countable for all $n$ in $\N$.\qedhere
\end{asparaenum}
\end{proof}


\begin{theorem}\label{thm:L-count}
If $\lang$ is a countable first-order signature, then the set of formulas of $\lang$ is countable.
\end{theorem}

\begin{proof}
Since $\lang$ is countable, the set of all symbols used in formulas of
$\lang$ is countable.  A bijection $k\mapsto s_k$ from $\N$ to this
set establishes an \textbf{alphabetical
  ordering}\dindexsub{alphabet}{---ical
  ordering}\dindexsub{order}{alphabetical ---ing} of the set: the
symbol $s_i$ comes before $s_j$ in this ordering if and only if $i<j$.
Hence we can arrange all of the formulas of length $n$ in alphabetical
order; in particular, we can list these formulas as $\phi^n_0$,
$\phi^n_1$, $\phi^n_2$, \dots.  Now we can embed the set of \emph{all}
formulas of $\lang$ in $\N\times\N$; so the set of formulas is
countable. 
\end{proof}

Instead of $\lnot(A\equip B)$, we may write $A\nequip B$.
If there is an \emph{injection} from $A$ to $B$, we write
\begin{equation*}
  A\injects B.\glossary{$A\preccurlyeq B$}
\end{equation*}
If there is an injection, but no bijection, we write
\begin{equation}\label{eqn:pinjects}
  A\pinjects B;\glossary{$A\prec B$}
\end{equation}
in this case, $B$ is \textbf{strictly larger}\dindexsub{strict}{---ly
  larger}\dindexsub{larger}{strictly ---} than $A$.
For example, if
$A\neq\emptyset$, then $\emptyset\pinjects A$.


By Theorem~\ref{thm:powergreater} below,~\eqref{eqn:pinjects} can hold even when both $A$ and $B$ are
infinite.
Meanwhile, the following gives some justification for the name
\Eng{power set}.\index{power!--- set}\index{set!power ---}

\begin{theorem}
  If $n\in\N$, and a set $A$ has $n$ elements,
then $\pow A\equip\B^n$.
\end{theorem}

\begin{proof}
It is enough to show $\pow n\equip\B^n$ if $n\in\vnn$.
  Let $f$ be the function from $\pow n$ to $\B^n$ given by
  \begin{equation*}
    f(B)=(e_0,\dots,e_{n-1}),
  \end{equation*}
where
\begin{equation*}
  e_i=
  \begin{cases}
    1,& \text{ if }i\in B;\\
0,& \text{ if }i\notin B.
  \end{cases}
\end{equation*}
Let $g$ be the function from $\B^n$ to $\pow n$ given by
\begin{equation*}
  g((e_0,\dots,e_{n-1}))=\{i\colon e_i=1\}.
\end{equation*}
Then $g\circ f=\id_{\pow n}$ and $f\circ g=\id_{\B^n}$.  So $f$ is a
bijection by Theorem~\ref{thm:bij}.
\end{proof}

The last theorem can be modified to make sense for infinite sets.  In
\S~\ref{sect:cartesian}, a couple of formal definitions of
$n$-tuples\index{ntuple@$n$-tuple}\index{tuple!n---@$n$-{}---} are
mentioned.  By yet another definition, an $n$-tuple of elements of a
set $A$ is just a function\footnote{Many writers will give this
  function the domain $\{1,2,\dots,n\}$ instead of
  $\{0,1,\dots,n-1\}$.} from $\{0,\dots,n-1\}$ (the 
von-Neumann\index{von Neumann!--- natural number} natural number $n$)
into $A$.  To indicate explicitly the set of such functions, I propose
to use the notation
\begin{equation*}
  \mapset nA.\glossary{${}^nA$}
\end{equation*}
Then $\mapset nA\equip A^n$.  The latter set could be \emph{defined}
as the former.  I shall use the notation $A^n$ when the precise
definition of its elements is not important: when all that matters is
that
\begin{equation*}
  \tuple a=\tuple b\iff \bigwedge_{k<n}a_k=b_k
\end{equation*}
for all elements $\tuple a$ and $\tuple b$ of $A^n$.  (Compare the use
of $\N$ instead of $\vnn$ for the set of natural numbers, as described in
\S~\ref{sect:sets},  when the composition of an individual natural
number is not important.)
We can generalize the new notation, writing
\begin{equation*}
  \mapset AB\glossary{${}^AB$}
\end{equation*}
for the set of functions from $A$ to $B$.  

\begin{theorem}
For all sets $A$,
\begin{equation*}
\pow A\equip\mapset A{\B}.
\end{equation*}
\end{theorem}

\begin{proof}
The
function
\begin{equation*}
f\longmapsto\{x\in A:f(x)=1\}
\end{equation*}
is a bijection from $\mapset A{\B}$ to $\pow A$; for, it has the inverse $C\mapsto\chf C$, where
\begin{equation*}
  \chf C(x)=
  \begin{cases}
    1,&\text{ if }x\in C,\\
0,&\text{ if }x\notin C,
  \end{cases}
\end{equation*}
for all subsets $C$ of $A$.  
\end{proof}

Here $\chf C$ is the
\textbf{characteristic function}%
\dindex{characteristic function}%
\dindexsub{function}{characteristic ---} of $C$ on $A$.  Here the 
letter chi may cause confusion because of its resemblance to $X$; but $\chi$ is the initial of the Greek
\Gk{qarakt'hr}.

The inequality
\begin{equation}\label{eqn:n<2^n}
  n<2^n
\end{equation}
holds for all natural numbers $n$ (see \S~\ref{order},
Exercise~\ref{exercise:n<2^n}); 
so the power set of a finite set is 
always strictly larger than the original set.  The same is true for
\emph{all} sets:

\begin{theorem}[Cantor]\label{thm:powergreater}\index{Cantor}
  $A\pinjects\pow A$ for all sets $A$.
\end{theorem}

\begin{proof}
  We have an injection $x\mapsto \{x\}$ from $A$ to $\pow A$, so
  $A\injects\pow A$.  Suppose $f$ is an arbitrary 
injection from $A$ into $\pow A$.  Let $B$ be the subset $\{x\in
A\colon x\notin f(x)\}$ of $A$.  Then $B$ is not in the range of $f$.  For,
suppose $x\in A$.  If $x\in B$, then $x\notin f(x)$, so $B\neq f(x)$.  If
$x\notin B$, then $x\in f(x)$, so again $B\neq f(x)$.  So there is no
bijection between $A$ and $\pow A$.
\end{proof}

Note the resemblance between this proof and that of the Russell Paradox given on p.~\pageref{Rus-alt}.  A set that is not countable is \textbf{uncountable.}%
\dindex{uncountable}  We have now that $\pow{\N}$ is uncountable.

Suppose $A\injects B$ and $B\injects A$; do we then have $A\equip B$?
In fact we do, by Theorem~\ref{thm:Sch--B}, but the proof is not easy.

\subsection*{Exercises}

Suppose $A$ is an infinite set.

\begin{enumerate}
\item
Can you write down a bijection from $A$ to $A\times A$?
\item
Suppose $f$ is a bijection from $A$ to $A^2$.  Can you write down a bijection from $A$ to 
\begin{enumerate}
\item
$A^3$?
\item
$A^4$?
\item
$A^n$?
\end{enumerate}
\end{enumerate}

\section{Equivalence-relations}\label{sect:eq}


Let $R$ be a binary relation.  The \textbf{field}\dindex{field} of $R$ is the set
\begin{equation*}
\{x\colon\Exists yx\mathrel Ry\}\cup\{y\colon\Exists xx\mathrel Ry\}.
\end{equation*}
Let this set be $A$.  Then $(A,R)$ is a structure in the sense of the last section.
We say that $R$ is:
\begin{compactenum}[1)]
  \item
\textbf{reflexive,}\dindex{reflexive} if 
\begin{equation*}
(A,R)\models\Forall xx\mathrel
Rx;
\end{equation*}
\item
\textbf{symmetric,}\dindex{symmetric} if 
\begin{equation*}
(A,R)\models\Forall x\Forall
y(x\mathrel Ry\lto y\mathrel Rx); 
\end{equation*}
\item
\textbf{transitive,}\dindex{transitive} if 
\begin{equation*}
(A,R)\models\Forall x\Forall
y\Forall z(x\mathrel Ry\land y\mathrel Rz\lto x\mathrel Rz).
\end{equation*}
\end{compactenum}
Note that, in these definitions, we need restrict the variables to the field of the relation \emph{only} in the definition of reflexivity.  The relation $R$ is reflexive if $b\mathrel Rb$ for all $b$ \emph{in the field of $R$}.  By contrast, $R$ is symmetric if $c\mathrel Rb$ whenever $b\mathrel Rc$,---there is no need to restrict $b$ and $c$ to the field of $R$, since this is already done by the condition $b\mathrel Rc$.  A similar observation holds for transitivity.

An alternative formulation of the definitions can be given in terms of the notions of
\S~\ref{sect:deeper}.  The relation $R$ is: 
\begin{compactenum}[1)]
  \item
{reflexive} if and only if $\Delta_A\included R$;
\item
{symmetric} if and only if $R=\conv R$;
\item
{transitive} if and only if $R/R\included R$.
\end{compactenum}
A reflexive, symmetric, transitive relation is called an
\textbf{equivalence-relation}.
\dindexsub{equivalence}{---{}-relation}
\dindexsub{relation}{equivalence-{}---} 

\begin{examples}\label{examples:equivalence}
\mbox{}
\begin{asparaenum}
\item
$\Delta_A$ is an equivalence-relation whose field is $A$.
\item
Equipollence is an equivalence-relation whose field is the class of all sets.
\item
Truth-equivalence (\S~\ref{equivalent}) is an equivalence-relation whose field is
the set of propositional formulas.  (Likewise, if $T$ is a first-order
theory of $\lang$, then $T$-equivalence
(\S~\ref{sect:1st}) is an equivalence-relation whose field is the set of first-order
formulas of~$\lang$.)
\item
If $n$ is an integer, then 
\textbf{congruence \emph{modulo} $n$}%
\dindex{congruence \emph{modulo} $n$}%
\dindexsub{modulo@\emph{modulo}}{congruence --- n@congruence --- $n$} 
is an
equivalence-relation with field $\Z$.  This relation consists of
pairs $(a,b)$ such that
\begin{equation*}
a\equiv b\pmod n,
\end{equation*}\glossary{$a\equiv b\pmod n$}
that is, $n\divides a-b$.
\item
On $\N^2$, we can define an equivalence-relation $\sim$ by
\begin{equation*}
  (a,b)\sim(c,d)\iff a+d=b+c. 
\end{equation*}
(See \S~\ref{sect:ZandQ} for elaboration.)
\item
Similarly, on $\Z\times(\Z\setminus\{0\})$, we can define an
equivalence-relation $\approx$ by
\begin{equation*}
  (a,b)\approx(c,d)\iff ad=bc.
\end{equation*}
(Again, see \S~\ref{sect:ZandQ}.)
\item
If $k<n$, and $A$ is a set, then there is an equivalence-relation
$\sim_k^n$ on $A^n$ given by
\begin{equation*}
  \tuple a\sim_k^n\tuple b\iff \bigwedge_{\substack{j<n\\j\neq k}}a_j=b_j,
\end{equation*}
that is, $\tuple a\sim_k^n\tuple b\iff \coordproj nk(\tuple
a)=\coordproj nk(\tuple b)$, where $\coordproj nk$ is as in
\S~\ref{sect:1st}.
\end{asparaenum} 
\end{examples}

\begin{theorem}\label{thm:ff}
If $f\colon A\lto B$, then $f/\conv f$ is an
  equivalence-relation with field~$A$. 
\end{theorem}

Suppose $\sim$ is an
equivalence-relation on $A$.  If $b\in A$, we can 
define
\begin{equation*}
  b\modsim =\{x\in A:b\sim x\};
\end{equation*}\glossary{$b/\mathord{\sim}$}
this is the \textbf{$\sim$-class} of $b$, or the
\textbf{equivalence-class}\dindexsub{equivalence}{---{}-class}\dindexsub{class}{equivalence-{}---}
of $b$ (with respect to $\sim$; the notation here must not be confused
with the notation for composition of relations).  If the
equivalence-relation is clear, one might write $[b]$\glossary{$[b]$}
instead of $b\modsim$, as in the following: 

\begin{lemma}\label{lem:[b]}
  If an equivalence-relation on $A$ is given, then
\begin{equation*}
[b]=[c]\iff [b]\cap[c]\neq\emptyset
\end{equation*}
for all $b$ and $c$ in $A$.
\end{lemma}

The \textbf{quotient}\dindex{quotient} of $A$ by the equivalence-relation $\sim$ is the
set $\{[b]:b\in A\}$, which can be denoted by 
\begin{equation*}
  A/\mathord{\sim};
\end{equation*}\glossary{$A/\mathord{\sim}$}
this can be read as \Eng{$A$ \emph{modulo} $\sim$.}  Then there is a
\textbf{quotient-map}%
\dindexsub{quotient}{---{}-map}%
\dindexsub{map}{quotient-{}---}%
\dindexsub{function}{quotient-map}
or 
\textbf{projection}%
\dindex{projection}%
\dindexsub{function}{projection} 
from $A$ to $A/\mathord{\sim}$, namely the function
\begin{equation*}
  x\longmapsto [x].
\end{equation*}
This function might be denoted by $\pi_{\sim}$\glossary{$\pi_{\sim}$}.
Suppose also $f\colon A\to
B$.  One may ask whether there is a function $g$ from
$A/\mathord{\sim}$ to $B$ such that $f=g\circ\pi_{\sim}$.  That is,
does $g$ exist so that the following diagram
\textbf{commutes?}\dindex{commutes}\dindexsub{diagram}{commutative ---}
\begin{equation*}
  \xymatrix@!{
A \ar[r]^(.4){\pi_{\sim}} \ar[d]_{f} & A/\mathord{\sim} \ar[dl]^{g} \\
B &
}
\end{equation*}
Yet another way to formulate the question is, does $f$ have
$\pi_{\sim}$ as a
\textbf{factor?}\dindex{factor}\dindexsub{function}{factor}
Necessary\index{necessary condition}\index{condition!necessary ---} 
and sufficient%
\index{sufficient condition}%
\index{condition!sufficient ---} 
conditions for a positive answer are given by the following. 

\begin{theorem}\label{thm:well-defined}
  Suppose $E$ is an equivalence-relation on $A$, and $f\colon A\to B$.
The following conditions are equivalent:
\begin{compactenum}
  \item
$E\included f/\conv f$;
\item
$x\mathrel Ey\implies f(x)=f(y)$ for all $x$ and $y$ in $A$;
\item
there is a function $g$ from $A/E$ to $B$ such that $g([x])=f(x)$ for
all $x$ in $A$.
\end{compactenum}
\end{theorem}

\begin{proof}
  Exercise; see 
  Examples~\ref{examples:equivalence2} below. 
\end{proof}

The function $g$ in the theorem can be written
\begin{equation*}
  [x]\longmapsto f(x).
\end{equation*}
Such an expression does not \emph{automatically} define a
function.  If it does, we say the function is
\textbf{well-defined.}
\dindexsub{well}{---{}-defined}\dindexsub{defined}{well-{}---} 

\begin{examples}\label{examples:equivalence2} 
The following parallel Examples~\ref{examples:equivalence}.
\begin{asparaenum}
\item
If $\sv F$ is an $n$-ary propositional formula in a signature $\lang$,
then there is a function 
$\tuple e\mapsto\named {\sv F}(\tuple e)$ or $\named {\sv F}$ from $\B^n$ to $\B$.
Hence there is a function ${\sv F}\mapsto\named {\sv F}$ from the set $\Fm
n{\lang}$\glossary{$\mathrm{Fm}_n(\lang)$} of $n$-ary
propositional formulas  of $\lang$ to the set $\mapset{\B^n}{\B}$.  By
definition
of truth-equivalence, ${\sv F}\sim {\sv G}$ if and only if $\named {\sv F}=\named {\sv G}$.
Hence there is a well-defined injection ${\sv F}/\mathord{\sim}\mapsto\named
{\sv F}$ from $\Fm n{\lang}/\mathord{\sim}$ to $\mapset{\B^n}{\B}$; if
$\lang$ is adequate, then this function is also surjective (at least if $n$ is large enough).
\item
If $n>0$, then the distinct elements of the quotient of $\Z$ by
congruence \emph{modulo} $n$ are $[0]$, $[1]$, $[2]$, \dots, $[n-1]$.
\item
The function $[a,b]\mapsto a-b$ is a well-defined bijection from
$\N^2\modsim$ to $\Z$.
(In \S~\ref{sect:ZandQ}, the structure $\Z$ will be \emph{defined} in
terms of $\N$ 
so that there is such a bijection.)
\item
\index{rational number}\index{number!rational ---}The function
$[a,b]\mapsto a/b$ is a well-defined bijection from
$\Z\times(\Z\setminus\{0\})/\mathord{\approx}$ to $\Q$.  (In
\S~\ref{sect:ZandQ}, the structure $\Q$ will be \emph{defined} in
terms of $\Z$ so that there is such a bijection.)
\item
The equipollence-class of a set $A$ can be called the
\textbf{cardinality}\dindex{cardinality} of $A$
and denoted by 
\begin{equation*}
  \size A.
\end{equation*}
Equipollent sets are sets having the same equipollence-class; such
sets can also be said to have the same cardinality.  An alternative
definition of cardinality is given in \S~\ref{sect:cardinality},
whereby the cardinality of $A$ is a particular \emph{set} in the
equipollence-class of $A$. 
\item
The function $[\tuple x]\mapsto\coordproj nk(\tuple x)$ is a well-defined
bijection from 
$A^n/\mathord{\sim_k^n}$ to $A^{n-1}$.
\end{asparaenum}
\end{examples}

A \textbf{partition} of $A$ is a subset $P$ of $\pow A$ such that:
\begin{compactenum}[1)]
  \item
if $B$ and $C$ are in $P$, and $B\cap C\neq\emptyset$, then $B=C$;
\item
every element of $A$ is an element of some element of $P$.
\end{compactenum}

\begin{theorem}\label{thm:partition}
  If $\sim$ is an equivalence-relation on $A$, then $A\modsim$ is a
  partition of $A$.  Conversely, if $P$ is a partition of $A$, then
  the relation 
\begin{equation*}
  \{(x,y)\in A^2:\Exsts XP\{x,y\}\subseteq X\}
\end{equation*}
is an equivalence-relation on $A$.
\end{theorem}

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Prove Theorem~\ref{thm:ff}.
\item
Prove Lemma~\ref{lem:[b]}.
\item
Prove Theorem~\ref{thm:well-defined}.
\item
Prove Theorem~\ref{thm:partition}.
  \item
Let $A=\{0,1,2,3,4,5,6,7,8,9\}$.  
\begin{enumerate}
\item
Define an equivalence-relation $E$ on $A$ so that $\size{A/E}=5$.
\item
Can you define an equivalence-relation $F$ on $A$ so that $\size{A/F}=7$?
\end{enumerate}
\item
Define an equivalence-relation $\sim$ on $\Z$ so that there is a
bijection from $\Z\modsim$ to $\N$.
\item
For every property in the set $\{\text{reflexive, symmetric,
  transitive}\}$, find a set $A$ and a relation $R$ on $A$ that has
  just the other two properties.
\item
Suppose $R$ is a reflexive and symmetric relation on $A$, but
$R\nincluded R/ R$.  Can you find an equivalence-relation $S$ on
$A$ such that $R\included S$, but $S\neq R\times R$?
\end{enumerate}

\section{Orderings}

Let $R$ be a binary relation with field $A$.
The following possible
properties complement those given in \S~\ref{sect:eq}.  The relation
$R$ is:
\begin{compactenum}[1)]
  \item
\textbf{irreflexive,}\dindex{irreflexive}\dindexsub{reflexive}{ir---} if
\begin{equation*}
(A,R)\models\Forall x\lnot(x\mathrel Rx);
\end{equation*}
\item
\textbf{anti-symmetric,}
\dindex{anti-symmetric}\dindexsub{symmetric}{anti-{}---}if
\begin{equation*}
(A,R)\models\Forall x\Forall y(x\mathrel Ry\land y\mathrel Rx\lto
x=y).
\end{equation*} 
\end{compactenum}
Again we have alternative characterizations.  The relation $R$ is:
\begin{compactenum}[1)]
  \item
{irreflexive} if and only if $R\cap \Delta_A=\emptyset$;
\item
{anti-symmetric} if and only if $R\cap \conv R\included\Delta_A$.
\end{compactenum}
A reflexive, anti-symmetric, transitive relation is
called a \textbf{partial ordering}%
\dindexsub{partial}{--- ordering}%
\dindexsub{order}{partial ---ing}
of its domain.
If $R$ is a partial ordering, and $A$ is its domain, then the
structure $(A,R)$ is a 
\textbf{partially ordered set}%
\dindexsub{partial}{---ly ordered set}%
\dindexsub{order}{partially ---ed set}%
\dindexsub{set}{partially ordered ---} 
or a 
\textbf{partial order.}%
\dindexsub{partial}{--- order}%
\dindexsub{order}{partial ---}
More generally, we may say that a pair $(A,R)$ is a partial order when really it is $(A,R\cap A\times A)$ that is the partial order (see the examples below).
  
A \textbf{strict partial ordering}%
\dindexsub{strict}{--- partial ordering}%
\dindexsub{partial}{strict --- ordering}%
\dindexsub{order}{strict partial ---ing} is an irreflexive,
anti-symmetric, 
transitive relation.  If $R$ is a strict partial ordering, and the set $A$ \emph{includes} the domain of $R$, then the pair $(A,R)$ is a
\textbf{strict partial order.}%
\dindexsub{partial}{strict --- order}%
\dindexsub{order}{strict partial ---}%
\dindexsub{strict}{--- partial ordering}
Note then that a strict
partial order is
technically \emph{not} a partial order (see
Exercise~\ref{exercise:strict}).  In any case, in the terminology used here, an
\emph{order} is a kind of \emph{structure} (see
Figure~\ref{fig:priene});
\begin{figure}[t!]
  \begin{center}
   \includegraphics[width=0.6\textwidth]{priene}
  \end{center}
\caption[The temple at Priene: the Ionic order]{The remains of the
  temple at Priene: an example of the
  {Ionic order}\index{Ionic order}\index{order!Ionic ---} of
  architecture.\index{architect!---ure}
  Think of the
  columns as an order in our sense.}
  \label{fig:priene}
\end{figure}
 an \emph{ordering} is the \emph{relation} that is
part of an order.  However, this terminological distinction is not of great
importance. 

\begin{examples}\label{example:order}
\mbox{}
\begin{asparaenum}
     \item
  $(\pow A,\included)$ is a partial order; so is $(B,\included)$, if
  $B\included\pow A$.
\item
$(\pow A,\pincluded)$ is a strict partial order.
\item
(See the first of Examples~\ref{examples:equivalence2}.)  We
can understand logical entailment $\models$ as a binary relation
on $\Fm n{\lang}/\mathord{\sim}$.
Then $(\Fm
  n{\lang}/\mathord{\sim},\models)$ is a partial order.  The case
  $n=2$ can be depicted as in Figure~\ref{fig:prop-as-order}.  (Such a drawing of a partial order is called a \textsl{Hasse diagram.}\tindex{Hasse diagram}\tindexsub{diagram}{Hasse ---})
\begin{figure}[t!]
\begin{equation*}
%\xymatrix@R=0.45cm@C=0.1cm{
  \xymatrix@!0{
&&&&&1&&&&\\
&&&&&&&&&\\
&\sv P\lor \sv Q \ar@{-}[uurrrr] &&& \sv P\lto \sv Q \ar@{-}[uur] && \sv Q\lto \sv P
  \ar@{-}[uul] && \sv P \shstroke \sv Q \ar@{-}[uulll] & \\  
&&&&&&&&&\\
\sv Q \ar@{-}[uur] \ar@{-}[uurrrr] && \sv P \ar@{-}[uul] \ar@{-}[uurrrr]
&& \sv P\eor \sv Q\qquad
\ar@{-}[uulll] \ar@{-}[uurrrr] & \qquad \sv P\liff \sv Q
\ar@{-}[uul] \ar@{-}[uur] && \lnot \sv P \ar@{-}[uulll] \ar@{-}[uur] && \lnot \sv Q
\ar@{-}[uulll] \ar@{-}[uul] \\
&&&&&&&&&\\
& \sv P\land \sv Q \ar@{-}[uul] \ar@{-}[uur] \ar@{-}[uurrrr] && \sv Q\nRightarrow \sv P
\ar@{-}[uulll] \ar@{-}[uur] \ar@{-}[uurrrr] && \sv P\nRightarrow \sv Q
\ar@{-}[uulll] \ar@{-}[uul] \ar@{-}[uurrrr] &&& \sv P\curlywedge \sv Q
\ar@{-}[uulll] \ar@{-}[uul] \ar@{-}[uur] & \\
&&&&&&&&&\\
&&&&0 \ar@{-}[uulll] \ar@{-}[uul] \ar@{-}[uur] \ar@{-}[uurrrr] &&&&&
}
\end{equation*}
  \caption[A partial order of propositional formulas]{In this
  depiction of the set of (truth-equivalence-classes
  of) propositional formulas in the variables $\sv P$ and $\sv Q$, $\sv F\models {\sv G}$ if and only
  if ${\sv G}$ can be reached from $\sv F$ by travelling upwards along the drawn
  lines.  The new connective $\nRightarrow$\glossary{$P\nRightarrow
  Q$} here has the obvious meaning.}  
\label{fig:prop-as-order}
\end{figure}
\item
$(\Z,{}\divides{})$ is a partial order.
\item
$(A,\Delta_A)$ is a partial order.
\item
$(A,\emptyset)$ is a strict partial order.
\item
The relation $\injects$ on sets is not a partial ordering; but we shall
see in \S~\ref{sect:cardinality} that it `induces' a partial ordering
of \emph{cardinalities}.
\end{asparaenum}
\end{examples}

\begin{lemma}\mbox{}
  \begin{compactenum}
    \item
  If $(A,R)$ is a partial ordering, then $(A,R\setminus\Delta_A)$ is a
  strict partial ordering.  
\item
If $(A,S)$ is a strict partial ordering,
  then $(A,S\cup\Delta_A)$ is a partial ordering.
  \end{compactenum}
\end{lemma}

In the lemma, one might say that $R\setminus\Delta_A$ is
\textbf{associated}\dindex{associated} with $R$, and
$S\cup\Delta_A$ with $S$.   

A partial order $(A,R)$ is a 
\textbf{linear order}%
\dindex{linear order}%
\dindexsub{order}{linear ---}
or a
\textbf{total order}%
\dindex{total order}%
\dindexsub{order}{total ---} 
if
\begin{equation*}
  (A,R)\models\Forall x\Forall y(x\mathrel Ry\lor y\mathrel Rx),
\end{equation*}
  that is, 
  \begin{equation*}
      R\cup \conv R=A^2.  
  \end{equation*}
If\glossary{$\leq\quad<$}
$\leq$ is a linear ordering, then the associated strict linear ordering
can be denoted by $<$, and \emph{vice versa}.

\begin{example}
  $(\Z,\leq)$ is a linear order; $(\Z,<)$ is a strict linear order.
\end{example}

Suppose $(A,R)$ and $(B,S)$ are partial orders, and $f\colon A\to B$.  Then
$f$ is
\textbf{order-preserving}\dindexsub{order}{---{}-preserving}
\dindexsub{preserve}{order-{}---ing}if
\begin{equation*}
  a\mathrel R b\implies f(a)\mathrel S f(b)
\end{equation*}
for all $x$ and $y$ in $A$.  An order-preserving function is an
example of a more general notion:

Suppose $\str A$ and $\str B$ are two structures in a signature
$\lang$.  A function $f$ from $A$ to $B$ is called a
\textbf{homomorphism}\dindex{homomorphism}\dindexsub{function}{homomorphism} from
$\str A$ to $\str B$ if 
\begin{equation}\label{eqn:homom}
  \str A\models\phi(a_0,\dots,a_{n-1})\implies \str
  B\models\phi(f(a_0),\dots,f(a_{n-1}))
\end{equation}
for all atomic formulas $\phi(x_0,\dots,x_{n-1})$ of $\lang$ and all
$a_i$ in $A$, for all $n$ in $\N$.  
If~\eqref{eqn:homom} holds 
for all atomic and \emph{negated} atomic formulas
$\phi(x_0,\dots,x_{n-1})$ of $\lang$ and all 
$a_i$ in $A$, for all $n$ in $\N$, then $f$ is an
\textbf{embedding}%
\dindex{embedding} 
of $\str A$ to $\str B$.   
Finally, $f$ is an
\textbf{isomorphism}%
\dindex{isomorphism, isomorphic structures}%
\dindexsub{function}{isomorphism}
if $f$ is invertible and 
$f\inv$ is a homomorphism from $\str B$ to $\str A$.

A homomorphism is thus a function that 
\emph{preserves structure;} it
\textbf{preserves}%
\dindexsub{preserve}{---s} 
the symbols in a signature (hence it preserves the atomic formulas
that use them).  An embedding also preserves their complements; in
particular, it preserves inequality, so it is an injection. 
The existence of an
isomorphism shows that two structures are the \emph{same} as
structures.  If an isomorphism exists between $\str A$ and $\str B$,
then $\str A$ and $\str B$ are called
\textbf{isomorphic,}
and 
we write
\begin{equation*}
  \str A\cong\str B.\glossary{$\mathfrak A\cong\mathfrak B$}
\end{equation*}
Isomorphism is an
equivalence-relation.
\index{equivalence!---{}-relation} \index{relation!equivalence-{}---}
Isomorphic structures have the same \emph{theories}\index{theory} (the
proof is tedious, but not surprising). 

\begin{examples}
\mbox{}
\begin{asparaenum}
    \item
An order-preserving function is a homomorphism of partial orders.
An isomorphism of partial orders is an invertible order-preserving
function whose inverse is also order-preserving.
\item
The identity is a homomorphism from $(\N,\divides)$ to $(\N,\leq)$, but not an embedding.
\item
Any function from a non-empty set to another is a
homomorphism of sets.  Equipollence is isomorphism of sets.  
\item
By Theorem~\ref{thm:inv-homom}, if $f\colon A\to B$, then $X\mapsto
f\inv\setimb X$ is a
homomorphism from $(\pow B,\cap,\cup,{}\comp)$ to $(\pow
A,\cap,\cup,{}\comp)$. 
\item
More examples of homomorphisms and isomorphisms are in
\S\S~\ref{sect:recursion},~\ref{sect:ZandQ} and~\ref{sect:reals}.
\end{asparaenum}
\end{examples}

The following is a \textbf{representation
  theorem:}\dindexsub{represent}{---ation
  theorem}\dindexsub{theorem}{representation ---} it shows that every
partial order \emph{can be represented by} (is isomorphic to) a
structure of the form given in the first of the
Examples~\ref{example:order}. 
Note how the proof of the theorem uses every property in the
definition of partial orders.

\begin{theorem}
  For every partial order $(A,R)$, there is a
  subset $B$ of $\pow{\Omega}$ 
  such that $(A,R)\cong(B,\included)$.  
  \end{theorem}

\begin{proof}
  Let $f$ be the function from $A$
  to $\pow A$ given by
\begin{equation*}
f(a)=\{y\in A:y\mathrel R a\}.
\end{equation*}
Then $f$ is injective:  Indeed, suppose $c$ and $d$ are
  elements of $A$.  If $c\mathrel Rd$ and $d\mathrel Rc$, then $c=d$
  since $R$ is anti-symmetric.  Suppose $c\neq d$.  Then we may assume
  $\lnot(c\mathrel Rd)$.  Then $c\notin f(d)$.  But $c\in f(c)$ since
  $R$ is reflexive.  Therefore $f(c)\neq f(d)$.  Let $B=f\setimb A$; then
  $f$ gives a bijection between $A$ and $B$.

Also, $f$ is order-preserving:  Suppose $c\mathrel Rd$.  If $e\in
f(c)$, then $e\mathrel Rc$, so $e\mathrel Rd$ since $R$ is transitive;
hence $e\in f(d)$.  Thus $f(c)\included f(d)$.  This shows that $f$ is
order-preserving.  

But $X\mapsto f\inv\setimb X$ is also order-preserving (as a function
on $B$, this set
being equipped with the relation $\included$):  If $f(c)\included
f(d)$, then $c\in f(d)$ since $c\in f(c)$; so $c\mathrel Rd$.
Therefore $f$ is an isomorphism from $(A,R)$ to $(B,\included)$.
\end{proof}

\begin{examples}
\mbox{}
\begin{asparaenum}
    \item
The partial order $(\{1,2,3,4,5,6\},\divides)$ is isomorphic to
$(B,\included)$, where $B$ is the set
\begin{equation*}
  \{\{1\},\{1,2\}, \{1,3\}, \{1,2,4\}, \{5\},
\{1,2,3,6\}\}.
\end{equation*}
See Figure~\ref{fig:6}.
\begin{figure}[t!]
\begin{equation*}
  \xymatrix{
4 & 6 & \\
2 \ar@{-}[u] \ar@{-}[ur] & 3 \ar@{-}[u] & 5 \\
& 1 \ar@{-}[ul] \ar@{-}[u] \ar@{-}[ur] &
}\qquad
\xymatrix{
\{1,4\} & \{1,2,3,6\} & \\
\{1,2\} \ar@{-}[u] \ar@{-}[ur] & \{1,3\} \ar@{-}[u] & \{1,5\} \\
& \{1\} \ar@{-}[ul] \ar@{-}[u] \ar@{-}[ur] &
}
\end{equation*}
  \caption{Two isomorphic partial orders}
\label{fig:6}
\end{figure}
\item
A set of propositional formulas in $n$ variables, partially ordered by
logical entailment $\models$, is isomorphic to a set of
Boolean combinations of $n$ suitable sets, partially ordered by
inclusion.  Compare Figure~\ref{fig:prop-as-order} to
Figure~\ref{fig:2sets}.
\begin{figure}[t!]
\begin{equation*}
  \xymatrix@!0{
&&&&&{\universe}&&&&\\
&&&&&&&&&\\
&A\cup B \ar@{-}[uurrrr] &&& A\comp\cup B \ar@{-}[uur] && A\cup B\comp
  \ar@{-}[uul] && A\comp\cup B\comp \ar@{-}[uulll] & \\  
&&&&&&&&&\\
B \ar@{-}[uur] \ar@{-}[uurrrr] && A \ar@{-}[uul] \ar@{-}[uurrrr]
&& A\symdiff B \qquad
\ar@{-}[uulll] \ar@{-}[uurrrr] & \qquad A\comp\symdiff B
\ar@{-}[uul] \ar@{-}[uur] && A\comp \ar@{-}[uulll] \ar@{-}[uur] && B\comp
\ar@{-}[uulll] \ar@{-}[uul] \\
&&&&&&&&&\\
& A\cap B \ar@{-}[uul] \ar@{-}[uur] \ar@{-}[uurrrr] && B\setminus A
\ar@{-}[uulll] \ar@{-}[uur] \ar@{-}[uurrrr] && A\setminus B
\ar@{-}[uulll] \ar@{-}[uul] \ar@{-}[uurrrr] &&& A\comp\cap B\comp
\ar@{-}[uulll] \ar@{-}[uul] \ar@{-}[uur] & \\
&&&&&&&&&\\
&&&&{\emptyset} \ar@{-}[uulll] \ar@{-}[uul] \ar@{-}[uur]
\ar@{-}[uurrrr] &&&&& 
}
\end{equation*}
  \caption[A partial order of sets]{A partial order of sets.  (The
  sets $A$ and $B$ here should be \emph{independent} in
  the sense that all Boolean combinations here are distinct.)}
\label{fig:2sets}
\end{figure}
\end{asparaenum}
\end{examples}

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}

\item\label{exercise:strict}
Show that no partial ordering is a strict partial ordering.
  \item
Are there partial orderings that are also equivalence-relations?
\item
Are there relations that are both symmetric and anti-symmetric?
\item
Write down the ordered pairs that belong to $\divides$, considered as
a relation on $\{1,2,3,4,5,6\}$.  Can you add pairs to this relation
so that it becomes a linear ordering?
\item
More generally,
if $R$ is a partial order on a finite set $A$, is there a linear ordering
$S$ on $A$ such that $R\included S$?
\item
Find sets $A$ and $B$ such that all of the Boolean combinations
depicted in Figure~\ref{fig:2sets} are distinct.
\end{enumerate}


\section{Infinitary Boolean operations}
\label{sect:infinitary}

The union of two sets is the set comprising everything that is in one
or the other of the sets.  There is no reason to restrict unions to
two sets.  Instead of writing $A\cup B$, we might write
\begin{equation*}
  \bigcup\{A,B\}.
\end{equation*}
This is the \textsl{union} of the single set $\{A,B\}$, whose elements
happen to be the sets $A$ and $B$.  Then $\bigcup\{A,B,C\}$ is $A\cup
B\cup C$, and so forth.  If $\family S$ is a class of sets, then the
\textbf{union}\dindex{union} of $\family S$ is the class
\begin{equation*}
  \{x\colon\Exists y(y\in \family S\land x\in y)\};
\end{equation*}
this is denoted by
\begin{equation*}
  \bigcup \family S.
\end{equation*}
Unions in this new sense are
\textbf{infinitary,}\dindexsub{infinitary}{---
  union}\dindexsub{union}{infinitary ---} in the sense that
the set $\family S$ may be infinite.

We shall need the following for Theorem~\ref{thm:ord-sup}.

\begin{axiom}[Union]\dindexsub{union}{U--- Axiom}\dindexsub{axiom}{A--- of Union}
The union of a set is a set.
\end{axiom}

As there are infinitary unions, so there are \textbf{infinitary
  intersections:}\dindexsub{infinitary}{---
  intersection}\dindexsub{intersection}{infinitary ---}  If $\family
  S$ is a class of sets, then 
\begin{equation}\label{eqn:bigcap}
  \bigcap\family S=\{x\colon\Forall y(y\in \family S\lto x\in y)\}.
\end{equation}
So $A\cap B$ is $\bigcap\{A,B\}$, and so forth.

\begin{theorem}\label{thm:int-set}
  The intersection of a non-empty set of sets is a set.
\end{theorem}

\begin{proof}
If $\family S$ contains $A$, then
\begin{equation*}
  \bigcap\family S=\{x\in A\colon\Forall y(y\in \family S\lto x\in y)\},
\end{equation*}
which is a set by the Axiom of Separation,
\ref{ax:separation}.\index{axiom!A--- of
  Separation}\index{separation!Axiom of S---}
\end{proof}

By strict application of~\eqref{eqn:bigcap}, $\bigcap\emptyset$ is the class of everything that belongs to some set; but this class is not a set.

The following will be useful in the next chapter, starting in
\S~\ref{sect:recursion}.

\begin{theorem}\label{thm:in-A-un}
  Let $\family S$ be a set of sets, one of which is $A$.  Then 
  \begin{equation*}
    \bigcap \family S\included A\included \bigcup \family S.
  \end{equation*}
\end{theorem}

\begin{proof}
  Exercise.
\end{proof}

Sometimes, in an infinitary union $\bigcup\family S$ (or an
intersection $\bigcap \family S$), the set $\family S$ is given as the
range of a function.  Say $f\colon A\to \pow B$.  Then we can write
\begin{equation*}
  \bigcap f\setimb A=\bigcap_{x\in A}f(x)
\end{equation*}
and $\bigcup f\setimb A=\bigcup_{x\in A}f(x)$.

\begin{examples}
\mbox{}
\begin{enumerate}
    \item
$\R=\bigcup_{n\in \N}(-1-n,n+1)$;
\item
$\bigcap_{n\in \N}[n,\infty)=\emptyset$;
\item
$\bigcap_{n\in \N}[-1/(n+1),1/(n+1)]=\{0\}$.
\end{enumerate}
\end{examples}

\subsection*{Completeness}

It is now possible to prove the completeness of our proof-system for first-order logic in a countable signature. 

\begin{theorem}[Completeness]\label{thm:1-completeness}
Our proof-system for first-order logic in a countable signature is complete.
\end{theorem}

\begin{proof}
Suppose $\Gamma$ is a set of sentences that does not formally entail $\sigma$.  Then $\Gamma$ must be consistent (why?).  We shall show that $\Gamma$ has a model.  It will then follow that $\Gamma$ does not logically entail $\sigma$ (why?).

Let $C$ be a set $\{c_k\colon k\in\N\}$ of new constants.  We consider two cases.

Suppose first that $\Gamma\cup\{c_i\neq c_j\colon i<j\}$ is inconsistent.  Then there is some greatest $n$ such that $\Gamma\cup\{c_i\neq c_j\colon i<j<n\}$ is consistent (why?).  
Hence
\begin{equation*}
\Gamma\cup\{c_i\neq c_j\colon i<j<n\}\proves c_0=c_n\lor\dots\lor c_{n-1}=c_n.
\end{equation*}
We now extend $\Gamma$ to a \emph{maximal} consistent set $\Sigma$ of sentences of $\lang\cup\{c_k\colon k<n\}$.  We do this as follows.  Since we are working in a countable signature $\lang$, we can, by Theorem~\ref{thm:L-count}, list the sentences of $\lang$ as $\sigma_0$, $\sigma_1$, \dots.  Now we define a list of \emph{sets} of sentences recursively as follows.  We let $\Gamma_0=\Gamma$.  Supposing $\Gamma_n$ has been defined, we let $\Gamma_{n+1}$ be $\Gamma_n\cup\{\sigma_n\}$, if this is consistent; otherwise, $\Gamma_{n+1}$ is $\Gamma_n\cup\{\lnot\sigma_n\}$.  By Lemma~\ref{lem:ccon} and induction, each set $\Gamma_n$ is consistent.  

Now define
\begin{equation*}
\Sigma=\bigcup_{n\in\N}\Gamma_n.
\end{equation*}
By Lemma~\ref{lem:fin-con}, this set is consistent.  Indeed, every finite subset of $\Sigma$ can be written as $\{\tau_0,\dots,\tau_{n-1}\}$ for some $n$ in $\N$.  Each sentence $\tau_k$ belongs to some set $\Gamma_{f(k)}$.  Let $m$ be the greatest element of $\{f(0),\dots,f(n-1)\}$ (how?).  Then $\{\tau_0,\dots,\tau_{n-1}\}\included\Gamma_m$, so it is consistent.

Suppose $\Sigma\proves\phi(c_k)$ whenever $k<n$.  Then
\begin{equation*}
\Sigma\proves c_k=c_n\lto\phi(c_n)
\end{equation*}
whenever $k<n$, and therefore
\begin{gather*}
\Sigma\proves c_0=c_n\lor\dots\lor c_{n-1}=c_n\lto\phi(c_n),\\
\Sigma\proves\phi(c_n),\\
\Sigma\proves\Forall x\phi(x)
\end{gather*}
by Generalization.  Contrapositively, if $\Sigma\proves\Exists x\phi(x)$, then $\Sigma\proves\phi(c_k)$ for some $k$ that is less than $n$.  This enables us to make $\{c_k\colon k<n\}$ into a model of $\Sigma$ (how?).

In the other case, in producing $\Sigma$, whenever we add $\Exists x\phi(x)$, we must also add $\phi(c_n)$ for some $n$ such that $c_n$ has not already been used.
\end{proof}

The proof can be adapted to the case where $\lang$ is uncountable by means of Theorem~\ref{thm:set-ord}.

\begin{corollary}[Compactness Theorem]\label{cor:compactness}%
\dindex{Compactness Theorem}%
\dindexsub{theorem}{Compactness Th---}
If every finite subset of a set $\Gamma$ of sentences has a model, then $\Gamma$ has a model.
\end{corollary}

\begin{proof}
If $\Gamma$ has no model, then $\Gamma\models\bot$, so $\Gamma\proves\bot$, hence $\Gamma_0\proves\bot$ for some finite subset $\Gamma_0$ of $\Gamma$.  Then $\Gamma_0\models\bot$, so $\Gamma_0$ has no model.
\end{proof}



\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}

  \item
Find $\bigcup\emptyset$ and $\bigcup\{\emptyset\}$.
\item
Can you define $\bigcap\emptyset$?
\item
Find a set $\family S$ of sets such that $\bigcup\family
S=\bigcap\family S$.
\item
Prove Theorem~\ref{thm:in-A-un}.
\item\label{exercise:inf-analogs}
Prove the infinitary analogues of some propositions in
\S~\ref{sect:deeper}:
Suppose $f\colon A\to B$, and $\family S\included\pow A$, and $\family
T\included\pow B$.  Then:
\begin{enumerate}
  \item
$f\setimb{\bigcup\family S}=\bigcup\{f\setimb X:X\in \family S\}$;
\item
$f\setimb{\bigcap\family S}\included\bigcap\{f(X):X\in \family S\}$;
\item
the last inclusion is an equality if $f$ is injective;
\item
$f\inv\setimb{\bigcup\family T}=\bigcup\{f\inv\setimb X:X\in \family T\}$;
\item
$f\inv\setimb{\bigcap\family T}=\bigcap\{f\inv\setimb X:X\in \family T\}$.
\end{enumerate}
\item
Supply the missing details of the proof of the Completeness Theorem.
\end{enumerate}


%\input{chapter-numbers.tex}

\chapter{Numbers}\label{ch:numbers}
\setcounter{section}{-1}
\section{The Peano axioms}\label{Peano}

In a book called \emph{The Principles of Arithmetic, Presented by a
  New Method}
\cite{MR0209111}, originally written in Latin and published in 1889,
  Giuseppe Peano\index{Peano} describes the positive
    integers by
means of nine strings of symbols---strings that he calls
\emph{axioms.}  In our terminology, three of Peano's axioms say that
equality of positive integers is an equivalence-relation; another
says that everything equal to a positive integer is a positive
integer.  The remaining five axioms have more mathematical content,
and versions of them are sometimes listed by themselves\footnote{For
  example, in \cite[pp.~988~f.]{MR0472307} or \cite[\S~1]{MR12:397m}.}
  as \emph{the} axioms for the
positive integers; these axioms may or may not be called \textsl{the
  Peano axioms.}
Two of these axioms say that $1$ is a positive integer and that every
positive integer has a successor that is a positive integer.  

The remaining three of Peano's axioms correspond to the three
statements at the end 
of \S~\ref{sect:sets}, except that the latter statements concern the
non-negative integers, rather than just the positive integers.  The difference is of little mathematical importance.
In model-theoretic terms, Peano's axioms amount to the assertion
that there is a model of three particular sentences.\footnote{Before Peano, Dedekind recognized that the natural
      numbers have this property, and that all
      structures with this property are isomorphic
      \cite[II: \S\S~71, 132]{MR0159773}.}
Two of these sentences are first order; the third is second order.
I propose to make this assertion as follows: it is the Axiom of Infinity, since, as we noted in \S~\ref{sect:equipollence}, $\N$ must be infinite.

\begin{axiom}[Infinity]\label{ax:infinity}%
\dindexsub{axiom}{A--- of Infinity}%
\dindexsub{infinity}{Axiom of I---}
In the signature $\{0,{}\scr{}\}$, there is a structure $\N$ such that:
\begin{enumerate}\renewcommand{\labelenumi}{\theenumi)}
\item
  $\N\models\Forall x\scr x\neq0$;
  \item
  $\N\models\Forall x\Forall y(\scr x=\scr y\lto x=y)$;
  \item
  $(\N,A)\models P0\land \Forall x(Px\lto P(\scr x))\lto
  \Forall x Px$, whenever $A\included\N$, and $P$ is a
  singulary predicate interpreted as $A$ in $\N$.
\end{enumerate}
\end{axiom}
Throughout this book, $\N$ is
simply such a structure as is named in this axiom.
Let us refer to the sentence $\Forall x\scr x\neq0$ as
\textbf{\axz,}\dindexsub{axiom}{A--- Z} since it
says that \emph Zero is not a successor.  Then $\Forall x\Forall
y(\scr x=\scr y\lto x=y)$ is \textbf{\axu,}\dindexsub{axiom}{A--- U}
since it says that successors are
\emph Unique when they exist.  Finally, there is
\textbf{\axi,}\dindexsub{axiom}{A--- I}
or the \textbf{Axiom of
  Induction,}\dindexsub{axiom}{A--- of
  Induction}\dindexsub{induction}{Axiom of I---} a \emph{second-order}
sentence that can be written formally as
\begin{equation*}
  \Forall P(P0\land\Forall x(Px\lto P(\scr x))\lto \Forall xPx),
\end{equation*}
where $P$ is a singulary
\textsl{predicate-variable}.
\tindexsub{predicate}{---{}-variable}\tindexsub{variable}{predicate-{}---}
Collectively, \axz, \axu, and \axi{} can be called \textbf{the Peano
  Axioms.}\dindexsub{Peano}{--- Axioms}\dindexsub{axiom}{Peano A---s}

\axz\ is that the immediate
predecessor of $0$ does \emph{not} exist as an element of $\N$.
The Axiom of {Induction} is that a set contains
all natural 
numbers, provided that it contains $0$ and contains the successor of
each natural number that it contains.
Later we shall define the binary operation $(x,y)\mapsto x+y$ on $\N$
so that $\scr x=x+1$.

\begin{lemma}\label{lem:zero-succ}
Every non-zero natural number is a successor.  Symbolically,
\begin{equation*}
  \N\models\forall x\qsep (x=0\lor\exists y\qsep \scr y=x).
\end{equation*}
\end{lemma}

\begin{proof}
Let $A$ be the set of natural numbers comprising $0$ and the
successors.  That is, $A=\{0\}\cup\{x\in\N:\exists y\qsep\scr y=x\}$.
Then $0\in A$ by definition.  Also, if $n\in A$, then 
$\scr n$ is a successor, so $\scr n\in A$.  By induction, $A=\N$.
\end{proof}

In the last
proof, the full inductive hypothesis $n\in A$ was
not needed; only $n\in\N$ was needed.

\begin{lemma}Every natural number is distinct from its successor:
  \begin{equation*}
      \N\models\forall x\qsep \scr x\neq x.
  \end{equation*}
\end{lemma}

\begin{proof}
Let $A=\{x\in\N:\scr x\neq x\}$.  Now, $\scr 0$ is a successor and is
therefore distinct from $0$ by \axz.  Hence $0\in A$.  Suppose
$n\in A$.  Then $\scr n\neq n$.  Therefore $\scr{\scr n{}}\neq \scr n$ by the
contrapositive\index{contrapositive} of \axu; so $\scr n\in A$.  By
induction, $A=\N$.
\end{proof}

\section{Recursion}\label{sect:recursion}

To able to say much more about the natural numbers, we should
introduce the usual arithmetic operations.  We need not do this by
axioms; we can \emph{define} the operations.  There are
at least two ways to do this.  The approach that I propose to take
starts with the following theorem.  Its proof is difficult, but once
we have the theorem, then we can freely define many useful operations
and functions.

\begin{theorem}[Recursion]\label{thm:recursion}\dindexsub{recursion}{R---
    Theorem}\dindexsub{theorem}{Recursion Th---}
Suppose $A$ is a set with an element $b$, and $f\colon A\to A$.  Then there
is a \emph{unique} function
$h$ from $\N$ to $A$ such that $h(0)=b$ and 
\begin{equation}\label{eqn:recursion}
h(\scr n)=f(h(n))
\end{equation}
for all $n$ in $\N$.
\end{theorem}


\begin{proof}
We seek $h$ as a particular
subset of $\N\times A$.
Let $\mathscr B$ be the set whose elements are the subsets $C$ of $\N\times
A$ such that, if $(x,y)\in C$, then either 
\begin{compactenum}[1)]
\item 
$(x,y)=(0,b)$ or else
\item $C$ has an element
$(u,v)$ such that $(x,y)=(\scr u,f(v))$.
\end{compactenum}
Let $R=\bigcup \mathscr B$; so $R$ is a relation from $\N$ to $A$.
Since ${(0,b)}\in
\mathscr B$, we have $0\mathrel Rb$.  If $n\mathrel Ry$, then $(n,y)\in C$ for
some $C$ in $\mathscr B$, but then $C\cup\{(\scr n,f(y))\}\in \mathscr B$ by definition of $\mathscr B$, so
$(\scr n)\mathrel R f(y)$.  Therefore $R$ is the desired function $h$,
provided it is a \emph{function} from $\N$ to $A$.  Proving this has
two stages.
\begin{asparaenum}[1.]
  \item
For all $n$ in $\N$, there is $y$ in $A$ such that $n\mathrel Ry$.
Indeed, let $D$ be the set of such $n$.  Then we have just seen that
$0\in D$, and if $n\in D$, then $scr n\in D$.  By induction, $D=\N$.
\item
For all $n$ in $\N$, if $n\mathrel Ry$ and $n\mathrel Rz$, then $y=z$.
Indeed, let $E$ be the set of such $n$.  Suppose $0\mathrel R y$.  Then
$(0,y)\in C$ for some $C$ in $\mathscr B$.  Since $0$ is not a successor, we
must have $y=b$, by definition of $\mathscr B$.  Therefore $0\in E$.  Suppose
$n\in E$, and $(\scr n)\mathrel Ry$.  Then $(\scr n,y)\in C$ for some $C$ in
$\mathscr B$.  Again since $0$ is not a successor, we must have
$(\scr n,y)=(\scr m,f(v))$ for some $(m,v)$ in $C$.  Since succession is
injective, we must have $m=n$.  Since $n\in E$, we know $v$ is
\emph{unique} such that $n\mathrel Rv$.  Since $y=f(v)$, therefore $y$
is unique such that $(\scr n)\mathrel Ry$.  Thus $\scr n\in E$.  By
induction, $E=\N$.
\end{asparaenum}

So $R$ is the desired function $h$.
Finally, $h$ is unique by induction.
\end{proof}

In the statement of  Theorem \ref{thm:recursion}, $(A,f,b)$ is a
structure in the signature $\{{}\scr{},0\}$.  Also,
Equation
(\ref{eqn:recursion}) is that the following diagram
commutes:\index{commutes}\index{diagram!commutative ---}  
\begin{equation*}
  \begin{CD}
\N @>{\scr{}}>> \N\\
@V{h}VV @VV{h}V\\
A @>>{f}> A
\end{CD}
\end{equation*}
That is, from the $\N$ on the left to the $B$ on the right, there are two
different routes, but each one yields the
same result.  In fact, the theorem is simply that there is a unique
\emph{homomorphism}\index{homomorphism} from $(\N,{}\scr{},0)$ to $(A,f,b)$.

A \textbf{recursive definition,}\dindexsub{recursive}{---
  definition}\dindexsub{definition}{recursive ---} or a
  \textbf{definition by recursion,}\dindexsub{definition}{--- by 
  recursion}\dindexsub{recursion}{definition by ---} is a definition
  of a function on
$\N$ that is justified by Theorem \ref{thm:recursion}.  Informally,
we can define such a function $h$ by specifying $h(0)$ and by
specifying how $h(\scr n)$ is obtained from $h(n)$.

Sections \ref{sect:arith-ops} and \ref{sect:recursion-gen} will
provide several important examples of recursive definitions.  Such
definitions are sometimes\footnote{Dedekind calls them definitions by
  induction in \cite[Theorem 126, p.~85]{MR0159773}, which corresponds
  to the Recursion Theorem above.} called \textsl{inductive
  definitions,}\tindexsub{inductive}{---
  definition}\tindexsub{definition}{inductive ---} or
\textsl{definitions by induction}.\tindexsub{definition}{--- by
  induction}\tindexsub{induction}{definition by ---}
However, this terminology is misleading when \axi{} is called the
Axiom of Induction.  Logically, the Recursion
Theorem is equivalent to the three Peano Axioms together; the
Recursion Theorem is
strictly stronger than the Induction Axiom, in the sense that there
are models of \axi{} that do not satisfy Theorem~\ref{thm:recursion}.
The remainder of this section is devoted to proving this.

Let us say that a structure \textbf{admits (definition by)
  recursion}\dindexsub{admits}{--- definition by
  recursion}\dindexsub{definition}{admits --- by
  recursion}\dindexsub{recursion}{admits definition by ---} if
it satisfies
the Recursion Theorem.  That is, a structure $\str A$ in the signature
$\{{}\scr{},0\}$ admits
recursion if and only if, for any other structure $\str B$ in this
signature, there is a unique homomorphism from $\str A$ to $\str B$.

Similarly, structures that satisfy the Induction Axiom can be said to
\textbf{admit (proof by) induction.}\dindexsub{admits}{--- proof by
  induction}\dindexsub{proof}{admits --- by
  induction}\dindexsub{induction}{admits proof by ---}

\begin{theorem}\label{thm:isom}
  All structures that admit recursion are isomorphic.
\end{theorem}

\begin{proof}
Suppose $\str A$ and $\str B$ admit recursion.  Then there are unique
homomorphisms $f$ from $\str A$ to $\str B$ and $g$ from $\str B$ to
$\str A$.  Hence the composition
$g\circ f$ is a homomorphism from $\str A$ to itself; so it is the
unique such homomorphism.  But $\id_A$ is
also such a homomorphism.  Therefore $g\circ f=\id_A$.  Similarly,
$f\circ g=\id_B$.  Therefore $g=f\inv$, by Theorem~\ref{thm:bij}. 
\end{proof}

\begin{corollary}\label{cor:rec-Peano}
All structures that admit recursion satisfy the Peano axioms; in
particular, they admit induction.
\end{corollary}

\begin{proof}
  By the theorem, every structure that admits recursion is isomorphic
  to $(\N,{}\scr{},0)$.  This satisfies the Peano axioms; hence so
  does every structure isomorphic to it.
\end{proof}

However, there are structures that admit induction, but not
  recursion:\footnote{Apparently Peano
  himself did not recognize the distinction between proof by induction
  and definition by recursion; see the discussion of Landau~\cite[p.~x]{MR12:397m}. 
  Burris~\cite[p.~391]{Burris} does not acknowledge the distinction.  Stoll~\cite[p.~72]{MR83e:04002} uses the term `definition by weak
  recursion', although he
  observes that the validity of such a definition does \emph{not
  obviously} follow from the Induction Axiom.  However, Stoll does not
  \emph{prove} (as we have done in Example \ref{exam:ind-not-imp-rec})
  that the Induction Axiom is consistent\index{consistent} with the
  negation of the Recursion Theorem.}

\begin{example}\label{exam:ind-not-imp-rec}
  On $\B$, define a singulary operation $s$ by
  $s(0)=1$ and $s(1)=0$.  Then
  $(\B,s,0)$ admits induction,\footnote{The structure $(\B,s,0)$ in
  Example \ref{exam:ind-not-imp-rec} also
  satisfies \axu, but not \axz.  If we define $t:\B\to \B$ so that
  $t(n)=1$ for each $n$ in $\B$, then $(\B,t,0)$ satisfies the
  Induction Axiom and \axz, but not \axu.  Later 
we shall have natural
  examples of structures satisfying \axz\ and \axu, but not admitting
  induction.}
 but there is \emph{no}
  function $g:\B\to\N$ such that $g(0)=0$ and $g(s(n))=\scr{(g(n))}$
  for all $n$ in $\B$.
\end{example}

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Prove the Recursion Theorem by showing that,
if $\family C$ is the set of all subsets $D$ of $A$ such that
\begin{compactenum}[1)]
\item
$(0,b)\in D$, and
\item
if $(u,v)\in D$, then $(\scr u,f(v))\in D$,
\end{compactenum}
then $\bigcap\family C$ is the desired function $h$.
\item
  Prove directly (without Theorem~\ref{thm:isom}) that \axz\ is a
  consequence of the Recursion Theorem.  (For example, if
  in $\str A$ the successor-operation is surjective, show that there
  is no homomorphism from $\str A$ into $\N$.)
\end{enumerate}

\section{Arithmetic operations}\label{sect:arith-ops}

By recursion, we can define addition, multiplication and
exponentiation.\footnote{We can also define addition and
  multiplication using only the
  Induction Axiom, not the Recursion Theorem.  The method is used by Landau~\cite{MR12:397m}.  As a
  result, the operations can be defined on structures that do not
  satisfy all of the Peano Axioms.  For example, let $n$ be a positive
  integer, and on $\Z$ let $\equiv$ be congruence
  \emph{modulo} $n$.%
\index{congruence \emph{modulo} $n$}%
\index{modulo@\emph{modulo}!congruence --- n@congruence --- $n$}
  if $x\equiv y$, then $x+1\equiv y+1$ (though by the standards of
  this chapter, we cannot quite prove this yet).  Hence we can define a
  successor-operation $s$ on $\Z/\mathord\equiv$, namely 
$[x]\mapsto[x+1]$.
The resulting structure $(\Z/\mathord\equiv,s,[0])$ satisfies the
Induction Axiom; 
therefore it can be equipped with an addition and a multiplication
that satisfy the theorems of this section.  Thus we get arithmetic
\textbf{\emph{modulo}} $n$.  We can define
exponentiation on $\Z/\mathord\equiv$ by $x^1=x$ and $x^{k+1}=x^k\cdot
x$ if and only if $n$ is $1$, $2$, $6$, $42$, or $1806$.  If we try
the definition in case 
$n=3$, we 
get $2^1=2$, $2^2=2\cdot 2=1$, so $2^{s(2)}=2$, $2^{s(s(2))}=1$---but also
$s(s(2))=1$, so $2^{s(s(2))}=2^1=2$.}
First, we define the binary operation $+$ of
\textbf{addition}\dindex{addition}\dindexsub{operation}{addition} on $\N$ by
defining, for each 
$n$ in $\N$, the singulary operation $y\mapsto n+y$.  This operation is
given by the rules:
\begin{compactenum}[1)]
\item
$n+0=n$;
\item
$n+\scr m=\scr{(n+m)}$.
\end{compactenum}

\begin{lemma}\label{lem:add}
  $\N$ satisfies
\begin{compactenum}[1)]
  \item
  $\Forall x0+x=x$,
  \item
  $\Forall x\Forall y \scr y+x=\scr{(y+x)}$.
\end{compactenum}
\end{lemma}

\begin{proof}
By definition of addition, $0+0=0$.  
Suppose $0+n=n$.  Then
\begin{align*}
  0+\scr n
&=\scr{(0+n)}&&\text{[by definition of addition]}\\
&=\scr n.&&\text{[by inductive hypothesis]}
\end{align*}
This completes an induction showing $\models\Forall x0+x=x$.

For the second claim, as the base step of an induction, we have
\begin{align*}
  \scr m+0
&=\scr m&&\text{[by the first claim]}\\
&=\scr{(m+0)};&&\text{[again by the first claim]}
\end{align*}
so $\Forall y\scr y+0=\scr{(y+0)}$.

Now, as an inductive hypothesis, suppose $\Forall y\scr y+n=\scr{(y+n)}$.
  Then, for all $m$ in 
  $\N$, we have 
\begin{align*}
  \scr m+\scr n
&=\scr{(\scr m+n)}&&\text{[by definition of addition]}\\
&=\scr{\scr{(m+n)}{}}&&\text{[by inductive hypothesis]}\\
&=\scr{(m+\scr n)}&&\text{[again by definition of addition]}.
\end{align*}
This completes an induction showing $\Forall x\Forall y\scr
  y+x=\scr{(y+x)}$. 
\end{proof}

The second part of the proof showed
$\N=\{x:\Forall y\scr y+x=\scr{(y+x)}\}$:  We have
proved the identity
\begin{equation}\label{eqn:y'+x=(y+x)'}
  \scr y+x=\scr{(y+x)}
\end{equation}
in $\N$ by \textbf{induction on $x$}.\dindexsub{induction}{--- on x@--- on $x$}
Induction on $y$ here does not work directly.  Indeed, suppose
$A=\{y\in\N:\Forall x\scr y+x=\scr{(y+x)}\}$.  To prove that $0\in A$,
we have to show that $\scr 0+n=\scr{(0+n)}$.  From the first part of
the theorem, we know that $\scr{(0+n)}=\scr n$; but we cannot yet say
anything about $\scr 0+n$.  We could prove $\Forall x\scr 0+x=\scr x$
by induction; but it would be more efficient just to start over and prove
Identity~\eqref{eqn:y'+x=(y+x)'} by induction on $x$.

To prove some identities below, one has to choose the right
variable to work with.

\begin{theorem}\label{thm:add}
On $\N$, the following hold.
\begin{compactenum}
  \item
  $\Forall x\scr x=x+1$.
  \item
  Addition is \textbf{commutative:}\dindex{commutative}
  \begin{equation*}
\Forall x \forall y\qsep x+y=y+x.
\end{equation*}
  \item
Addition is \textbf{associative:}\dindex{associative}
\begin{equation*}
\Forall x \forall y\qsep \forall z\qsep (x+y)+z=x+(y+z).
\end{equation*}
\item
Addition admits \textbf{cancellation:}\dindex{cancellation}
\begin{equation*}
\Forall x\Forall y\Forall z(x+z=y+z\lto x=y).
\end{equation*}
\end{compactenum}
\end{theorem}

We may henceforth write $n+1$ instead of $\scr n$.

The \textbf{binomial coefficients}\dindexsub{binomial}{---
  coefficient}\dindexsub{coefficient}{binomial ---} $\binom nm$ can be
  given recursively as follows.  First we define $m\mapsto\binom0m$ by
  \begin{equation*}
    \binom0m=
    \begin{cases}
      1,&\text{ if }m=0,\\
      0,&\text{ if }m\neq0.
    \end{cases}
  \end{equation*}
Then, in terms of of $m\mapsto\binom nm$, we define
$m\mapsto\binom{n+1}m$ recursively by 
\begin{equation*}
  \binom{n+1}m=
  \begin{cases}
    1,&\text{ if }m=0,\\
\binom nk+\binom n{k+1},&\text{ if }m=k+1.
  \end{cases}
\end{equation*}
(See also Exercises \ref{exer:binom} and \ref{exer:bin-thm} in
  \S~\ref{order}.) 

The binary operation $\cdot$ of
  \textbf{multiplication}\dindex{multiplication}\dindexsub{operation}{multiplication} on $\N$  
  is given by: 
  \begin{compactenum}[1)]
    \item
    $n\cdot 0=0$
    \item
    $n\cdot(m+1)=n\cdot m+n$.
  \end{compactenum}
Multiplication is also indicated by juxtaposition, so that $n\cdot m$
is $nm$. 

\begin{lemma}\label{lem:mult}
$\N$ satisfies
\begin{compactenum}[1)]
  \item
  $\Forall x 0x=0$,
  \item
    $\Forall x \forall y\qsep(y+1)x=yx+x$.
\end{compactenum}
\end{lemma}

\begin{theorem}\label{thm:mult}
On $\N$, the following hold.
\begin{compactenum}
  \item
  $\Forall x 1x=x$.
  \item
  Multiplication is commutative ($\Forall x \forall y\qsep xy=yx$).
  \item
  Multiplication \textbf{distributes}\dindex{distributive} over addition:
  \begin{equation*}
\Forall x \forall y\qsep \forall z\qsep (x+y)z=xz+yz.
\end{equation*}
  \item
  Multiplication is associative ($\Forall x \forall y\qsep \forall z\qsep (xy)z=x(yz)$).
\end{compactenum}
\end{theorem}

Finally, exponentiation:
the binary operation
  $(x,y)\mapsto x^y$
on $\N$ is given by:
\begin{compactenum}[1)]
  \item
$n^0=1$;
\item
$n^{m+1}=n^m\cdot n$.  
\end{compactenum}

\begin{theorem}\label{thm:exp}
The following are identities\index{identity} in $\N$:
  \begin{compactenum}[1)]
    \item 
      $x^{y+z}=x^yx^z$;
    \item
      $(x^y)^z=x^{yz}$;
    \item
      $(xy)^z=x^z y^z$.
  \end{compactenum}
\end{theorem}

\begin{proof}
  Exercise.
\end{proof}

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
%\item
%Prove Lemma~\ref{lem:add}.
\item
Prove Theorem~\ref{thm:add}.  In the latter two parts, does induction work on
every variable?
  \item
  Prove that $\binom x1=x$ for all $x$ in $\N$.
\item
Prove Lemma~\ref{lem:mult}.  In the second part, does induction work
on either variable?
\item
Prove Theorem~\ref{thm:mult}.
\item
Prove Theorem~\ref{thm:exp}.
\end{enumerate}

\section{Rational numbers}\label{sect:ZandQ}

\subsection*{The positive rational numbers}

The integers can be constructed from the natural numbers, and the
rational numbers can be constructed from the integers.  However, the
\emph{positive} rational numbers can  also be constructed directly
from the \emph{positive} natural numbers, and indeed we are taught some aspects of
this construction from an early age.  
Let us denote the set of positive natural numbers, $\{1,2,3,\dots\}$, by
\begin{equation*}
\Np.
\end{equation*}
If $a$ and $b$ are natural
numbers, then there is a \textbf{fraction}\index{fraction} denoted by 
\begin{equation*}
\frac ab
\end{equation*}
or $a/b$.
Then there are definitions for adding and multiplying fractions:
\begin{align}\label{eqn:Q}
  \frac ab+\frac cd&=\frac{ad+cb}{bd},&
\frac ab\cdot\frac cd&=\frac{ac}{bd}.
\end{align}
We are taught to \emph{reduce} fractions also: By~\eqref{eqn:Q}
we compute $1/3+1/6=9/18$, which reduces to $1/2$.  In particular,
$9/18$ and $1/2$ are \emph{equal} fractions.  
Equality of fractions may be given by
\begin{equation}\label{eqn:frac=}
\frac ab=\frac cd\iff ad=cb.
\end{equation}
This equation is justified by:

\begin{theorem}\label{thm:sim-eq}
The relation $\sim$ on $\N\times\N$ is an equivalence-relation.
\end{theorem}

\begin{proof}
Reflexivity and symmetry of $\sim$ follow immediately from the
corresponding properties of equality; but transitivity needs more.
Suppose $(a,b)\sim(c,d)$ and $(c,d)\sim(e,f)$.  Then $ad=cb$ and
$cf=ed$, so 
\begin{equation*}
(ad)f=(cb)f=c(bf)=c(fb)=(cf)b=(ed)b
\end{equation*}
by commutativity and associativity of multiplication.  By these
properties and also cancellation, we can go on to conclude 
\begin{equation*}
af=eb,
\end{equation*}
hence $(a,b)\sim(e,f)$.
\end{proof}

The fraction $a/b$ is the equivalence-class $(a,b)/\mathord{\sim}$, where
\begin{equation}\label{eqn:sim}
(a,b)\sim(x,y)\iff ay=bx.
\end{equation}
Let us denote $(\Np\times\Np)/\mathord{\sim}$ by
\begin{equation*}
\Qp.
\end{equation*}
This is the set of \textbf{positive rational numbers.}\index{positive
  rational numbers}

\subsubsection*{Structure}

We are free to define operations $\oplus$ and $\otimes$ on $\N\times\N$ by
\begin{align*}
				(a,b)\oplus(c,d)&=(ad+cb,bd),&
				(a,b)\otimes(c,d)&=(ac,bd).
\end{align*}
What makes these useful is the following:

\begin{theorem}\label{thm:+.-on-Nmod}
If $a/b=a'/b'$
and $c/d=c'/d'$, then
\begin{gather}\notag
(a,b)\oplus(c,d)\sim(a',b')\oplus(c',d'),\\\notag
(a,b)\otimes(c,d)\sim(a',b')\otimes(c',d').
\end{gather}		
\end{theorem}

\begin{corollary}\label{cor:+.-on-Nmod}
On $\Qp$, the equations~\eqref{eqn:Q} define
two binary operations. 
\end{corollary}

\begin{theorem}\label{thm:Q^+}
On $\Qp$, 
\begin{compactenum}[1)]
\item
addition and multiplication are commutative and associative,
\item
multiplication distributes over addition,
\item
$1$ is a \textbf{multiplicative identity:}%
\dindexsub{identity}{multiplicative ---}%
\dindexsub{multiplicative}{--- identity}
\begin{equation*}
\Forall x1\cdot x=x.
\end{equation*}
\end{compactenum}
\end{theorem}

The whole point of defining $\Qp$ is the following: 

\begin{theorem}\label{thm:sol-in-Q+}
There is a well-defined operation $x\mapsto x\inv$ on $\Qp$ given by
\begin{equation*}
\left(\frac ab\right)\inv=\frac ba.
\end{equation*}
This operation is 
\textbf{multiplicative inversion:}%
\dindexsub{inversion}{multiplicative}%
\dindexsub{multiplicative}{--- inversion}
\begin{equation*}
\Forall xx\cdot x\inv=1.
\end{equation*}
\end{theorem}

Therefore, if $r$
and $s$ are in $\Qp$, then the equation $r=s\cdot x$ has the unique
solution $s\inv r$, which is written also as a fraction,
\begin{equation*}
\frac rs.
\end{equation*}
If $a,b,c,d\in\N$, then
\begin{equation*}
\frac{a/b}{c/d}=\frac{ad}{bc},
\end{equation*}
and in particular
\begin{equation}\label{eqn:a/1/c/1}
\frac{a/1}{c/1}=\frac ac.
\end{equation}

\subsubsection*{Numbers and fractions}

By our construction, a positive natural number is not literally a positive
rational number; a positive rational number is a class of ordered pairs of positive natural numbers.  
One way to understand this is shown in Figure~\ref{fig:grid}, 
\begin{figure}[t]
\begin{center}
\psset{unit=3mm}
\begin{pspicture}(12,12)
\psgrid[subgriddiv=1,gridlabels=0,griddots=4](12,12)(12,12)(0,0)
\uput[r](12,12){$O$}
\psline(-1,3.333)(12,12)
\uput[l](-1,3.333){$\displaystyle\frac32$}
\psdots(9,10)(6,8)(3,6)(0,4)
\psset{framesep=0pt}
\uput[dr](9,10){\psframebox*{$(3,2)$}}
\uput[dr](6,8){\psframebox*{$(6,4)$}}
\uput[dr](3,6){\psframebox*{$(9,6)$}}
\uput[dr](0,4){\psframebox*{$(12,8)$}}
\psdots[dotstyle=o](12,12)
\end{pspicture}
\end{center}
\caption{Fractions as straight lines}\label{fig:grid}
\end{figure}
where ordered pairs of natural numbers are depicted as points in a
grid; then a fraction is the class of ordered pairs lying on a
particular straight line through the point $O$.   

A fraction may not literally be a positive natural number; but there are fractions that \emph{behave} like natural numbers:

\begin{theorem}\label{thm:N-in-Q+}
The function $x\mapsto x/1$ is an embedding\index{embedding} 
 of
$(\Np,1,+,\cdot)$ in
$(\Qp,1/1,+,\cdot)$; that is, it is injective, it takes $1$ to $1/1$, and  
\begin{align*}
\frac{x+y}1&=\frac x1+\frac y1,&				
\frac{x\cdot y}1&=\frac x1\cdot\frac y1.
\end{align*} 
\end{theorem}

\begin{proof}
Immediate from the definitions.
\end{proof}

We may therefore forget about the distinction between natural numbers and positive rational numbers: we may \emph{identify} a natural number $n$ with its image $n/1$
in $\Qp$.  By~\eqref{eqn:a/1/c/1}, there will be no ambiguity in writing fractions: a fraction of natural numbers as such will be the same as their fraction as positive rational numbers.

Using the idea in Figure~\ref{fig:grid}, we can arrange the positive
rational numbers along a semicircle, according to their ordering, as
in Figure~\ref{fig:Q+}~\eqref{item:circ}. 
\begin{figure}[t]
\mbox{}\hfill
\begin{inparaenum}[(a)]
\item\label{item:circ}
\begin{pspicture}[](-4.5,-4.5)(0,0)
\psset{unit=1.5cm}
\psgrid[subgriddiv=1,gridlabels=0,griddots=12](0,0)(-3,-3)
\psarc(0,-1){1}{90}{270}
\uput[l](-1,-1){\psframebox*{$1$}}
\psline(-3,-2)(0,0)(-2,-3)
\psdots(-0.923,-0.615)(-0.923,-1.385)(-1,-1)
\uput[l](-0.923,-0.615){$3/2$}
\uput[l](-0.923,-1.385){$2/3$}
\psdots[dotstyle=o](0,0)
\end{pspicture}
\hfill
\item\label{item:line}
\begin{pspicture}[](-4.5,-4.5)(0,0)
\psset{unit=1.5cm}
\psgrid[subgriddiv=1,gridlabels=0,griddots=12](0,0)(-3,-3)
\psline(-3,-2)(0,0)(-2,-3)
\psline(-3,-1)(0,-1)
\psdots(-1.5,-1)(-0.667,-1)(-1,-1)
\uput[ul](-1.5,-1){$3/2$}
\uput[dl](-1,-1){$1$}
\uput[dr](-0.667,-1){$2/3$}
\psdots[dotstyle=o](0,0)
\end{pspicture}
\end{inparaenum}
\hfill\mbox{}
\caption{Positive rationals along a semicircle and a straight
  line}\label{fig:Q+} 
\end{figure}
It is more usual to arrange the positive rational numbers along a
straight line, as in 
Figure~\ref{fig:Q+}~\eqref{item:line}; the point of using a semicircle
is that here, if $k<m$, then $m/k$ lies directly above $k/m$. 
Indeed, in Figure~\ref{fig:geom},
\begin{figure}[ht]
\begin{center}
\begin{pspicture}(-1.5,-3)(0,0)
\psset{unit=1.5cm}
%\psgrid[subgriddiv=1,gridlabels=0](0,0)(-3,-3)
\psarc(0,-1){1}{90}{270}
\psline(-1,0)(0,0)(0,-2)
\psline(0,-2)(-0.923,-0.615)(0,0)
\psline(0,0)(-0.923,-1.385)(-0.923,-0.615)
%\psdots(-0.923,-0.615)(-0.923,-1.385)
\uput[r](0,0){$O$}
\uput[l](-1,0){$A$}
\uput[l](-0.923,-0.615){$B$}
\uput[r](0,-2){$C$}
\uput[l](-0.923,-1.385){$D$}
\end{pspicture}
\end{center}
\caption{Fractions are below their reciprocals}\label{fig:geom}
\end{figure}
since $BDCO$ is a semicircle, the angles $AOB$, $OCB$, and $ODB$ are
equal;  
if also $AOB$ and $COD$ are equal, then $COD$ and $ODB$ are equal, so
the straight lines $BD$ and $OC$ are parallel. 




\subsection*{The integers}

In analogy
with~\eqref{eqn:sim}, let us define $\approx$ on $\N\times\N$ by 
\begin{equation}\label{eqn:approx}
(a,b)\approx(c,d)\iff a+d=b+c.
\end{equation}
Then we have a direct analogue of Theorem~\ref{thm:sim-eq}:

\begin{theorem}\label{thm:approx-eq}
The relation $\approx$ on $\N\times\N$ is an equivalence-relation.
\end{theorem}

Now we can denote $(n,m)/\mathord{\approx}$ by
\begin{equation*}
n-m.
\end{equation*}
Such an equivalence-class
is just an \textbf{integer;}\index{integer} the set of 
all integers is 
\begin{equation*}
\Z.
\end{equation*}
As we have multiplication on $\Qp$, so we 
have:

\begin{theorem}\label{thm:Z-+}
On $\Z$, there are a well-defined operation of addition given by 
\begin{equation*}
(a-b)+(c-d)=(a+c)-(b+d).
\end{equation*}
\end{theorem}

In partial analogy with Theorem~\ref{thm:Q^+}, we have

\begin{theorem}\label{thm:Z-ocm}
On $\Z$, addition is commutative and associative, and $0$ is an
\textbf{additive identity:}%
\dindexsub{additive}{--- identity}%
\dindexsub{identity}{additive ---}
\begin{equation*}
\Forall x0+x=x.
\end{equation*}
\end{theorem}

In analogy with Theorem~\ref{thm:sol-in-Q+}, we have

\begin{theorem}\label{thm:Z-}
There is a well-defined operation $x\mapsto-x$ on $\Z$ given by
\begin{equation*}
-(k-n)=n-k.
\end{equation*}
This operation is 
\textbf{additive inversion:}%
\dindexsub{additive}{--- inversion}%
\dindexsub{inversion}{additive ---}
\begin{equation*}
\Forall xx-x=0.
\end{equation*}
\end{theorem}

If $a$ and $b$ are in $\Z$, then the equation $a=b+x$ has the unique
solution $-b+a$, which is also denoted by 
\begin{equation*}
a-b.
\end{equation*}
If $k,\ell,m,n\in\N$, then
\begin{equation*}
(k-\ell)-(m-n)=(n+k)+(m+\ell).
\end{equation*}

In analogy with Theorem~\ref{thm:N-in-Q+}, we have

\begin{theorem}\label{thm:N-in-Z}
  The function $x\mapsto x-0$ embeds $(\N,0,+)$ in $(\Z,0,+)$.  
 \end{theorem}

We may identify a natural number $n$ with its image $n-0$ in
$\Z$.  
The elements of $\Z$ are usually depicted on a straight line extending
infinitely in both directions.  Alternatively, we can arrange them in
a circle, as in Figure~\ref{fig:Z}, 
\begin{figure}[t]
\begin{center}
\begin{pspicture}(-1.5,-1.5)(1.5,1.5)
\psset{unit=1.5cm}
\pscircle(0,0){1}
\parametricplot[showpoints=true,plotpoints=11,plotstyle=dots]{-5}{5}{
t neg 2 mul t t mul 1 add div
t t mul 1 sub 1 t t mul add div}
\uput[d](0,-1){$0$}
\uput[l](-1,0){$1$}
\uput[r](1,0){$-1$}
\uput[143](-0.8,0.6){$2$}
\uput[37](0.8,0.6){$-2$}
\uput[127](-0.6,0.8){$3$}
\uput[53](0.6,0.8){$-3$}
%\uput[118](-0.471,0.882){$4$}
%\uput[62](0.471,0.882){$-4$}
\psdots[dotstyle=o](0,1)
\end{pspicture}
\end{center}
\caption{Integers on a circle}\label{fig:Z}
\end{figure}
where, if $0<n$, then $-n$ is directly to its right.  The left half of
the circle is the semicircle in
Figure~\ref{fig:Q+}~\eqref{item:circ}. 

Finally, we can extend \textbf{multiplication}\index{multiplication} on $\N$ to $\Z$ as in school, by
\begin{align}\label{eqn:Z-.}
-m\cdot -n&=m\cdot n,& -m\cdot n&=m\cdot -n=-(m\cdot n),
\end{align}
where $m$ and $n$ are in $\N$.

\begin{theorem}\label{thm:Z+.}
Multiplication on $\Z$ is commutative and associative, and it distributes over addition; also $1$ is a multiplicative identity.
\end{theorem}

\begin{proof}
Commutativity on $\Z$ with identity $1$ follows immediately from commutativity on $\N$ with identity $1$, along with the definitions~\eqref{eqn:Z-.}.  Associativity follows from considering the several cases, such as 
\begin{equation*}
(x\cdot-y)\cdot-z=-(x\cdot y)\cdot-z=(x\cdot y)\cdot z=x\cdot(y\cdot z)=x\cdot(-y\cdot-z).
\end{equation*}
For distributivity, we have for example, if $-y+z=w>0$, then $z=w+y$, so $x\cdot z=x\cdot w+x\cdot y$, and therefore
\begin{equation*}
x\cdot(-y+z)=-(x\cdot y)+x\cdot z=x\cdot-y+x\cdot z.\qedhere
\end{equation*}
\end{proof}

\subsection*{The rational numbers}

As we obtained $\Z$ from $\N$, so we can obtain $\Z$ from $\Np$.  The
difference is that the embedding of $\Np$ in $\Z$ is
$x\mapsto(x+1)-1$, and $0$ in $\Z$ is $1-1$. 

We can now obtain $(\Q,0,-,+,\cdot)$ from $(\Qp,+,\cdot)$ just as we
obtain $(\Z,0,-,+,\cdot)$ from $(\Np,+,\cdot)$. 

\begin{theorem}\label{thm:Q}
Addition and multiplication are commutative and associative on $\Q$,
and multiplication distributes over addition.  Addition has the
identity $0$, and multiplication has the identity $1$.  The operation
$x\mapsto-x$ is additive inversion, and there is an operation
$x\mapsto x\inv$ of multiplicative inversion on $\Q\setminus\{0\}$. 
\end{theorem}

Because of this theorem, $\Q$ is called a \textbf{field.}



\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Prove Theorem~\ref{thm:+.-on-Nmod} and Corollary~\ref{cor:+.-on-Nmod}.
\item
Prove Theorem~\ref{thm:Q^+}.
\item
Prove Theorem~\ref{thm:sol-in-Q+}.
\item
Prove Theorem~\ref{thm:approx-eq}.
\item
Prove Theorem~\ref{thm:Z-+}.
\item
Prove Theorem~\ref{thm:Z-ocm}.
\item
Prove Theorem~\ref{thm:Z-}.
\item
Prove Theorem~\ref{thm:N-in-Z}.
\item
Prove Theorem~\ref{thm:Z+.}.
\item
Prove Theorem~\ref{thm:Q}.
\end{enumerate}

\section{More recursion}\label{sect:recursion-gen}

Informally, we define $n!$\glossary{$n!$}, that is,
$n$\textbf{-factorial,}%
\dindexsub{factor}{---ial}%
\dindex{nfactorial@$n$-factorial} 
by
\begin{equation*}
n!=1\cdot 2\cdot 3\cdots(n-1)\cdot n.
\end{equation*}
More precisely, we have the recursive definition
\begin{align}\label{eqn:rec-gen}
0!&=1,&(n+1)!&=n!\cdot(n+1).
\end{align}
However, for this to be a valid definition by the Recursion Theorem as it is, we would have to
express $n!\cdot(n+1)$ as a function of $n!$.
Alternatively,~\eqref{eqn:rec-gen} is a valid recursive definition by the following.

\begin{theorem}[Recursion with
    Parameter]\label{thm:rec-param}\dindexsub{recursion}{R--- Theorem
    with Parameter}\dindexsub{theorem}{Recursion Th--- with
    Parameter}\dindexsub{parameter}{Recursion Theorem with P---}
Suppose $B$ is a set with an element $c$, and $F:\N\times B\to B$.
Then there is a \emph{unique} function
$G$ from $\N$ to $B$ such that $G(0)=c$ and 
\begin{equation}\label{eqn:str-rec}
G(n+1)=F(n,G(n))
\end{equation}
for all $n$ in $\N$.
\end{theorem}


\begin{proof}
  Let $f$ be the function 
  \begin{equation*}
      (x,b)\longmapsto(x+1,F(x,b))
  \end{equation*}
from $\N\times B$ to $\N\times B$.
By recursion, there is a unique function $g$ from $\N$ to
  $\N\times B$ such that $g(0)=(0,c)$ and 
$$g(n+1)=f(g(n))$$ 
for all
  $n$ in $\N$.  Now let $G$ be $\pi\circ g$, where $\pi$ is the
  function 
  \begin{equation*}
      (x,b)\longmapsto b
  \end{equation*}
from $\N\times B$ to $B$.
Then for each $n$ in $\N$ we have $g(n)=(m,G(n))$ for some $m$ in
$\N$.  We can prove by induction that $m=n$.  Indeed, this is the
case when $n=0$, since $g(0)=(0,c)$.  Suppose $g(n)=(n,G(n))$ for some
$n$ in $\N$.  Then
\begin{equation}\label{eqn:strong-rec}
  g(n+1)=f(n,G(n))=(n+1,F(n,G(n))).
\end{equation}
In particular, the first entry in the value of $g(n+1)$ is $n+1$.  This
completes our induction.  

We now know that $g(n)=(n,G(n))$ for all $n$ in $\N$.  Hence in
particular $g(n+1)=(n+1,G(n+1))$.  But we also have
(\ref{eqn:strong-rec}).  Therefore we have (\ref{eqn:str-rec}),
as desired.  Finally, each of $g$ and $G$ determines the other.  Since
$g$ is unique, so is $G$.
\end{proof}

\begin{example}
  We can define a function $f$ on $\N$ by requiring $f(0)=0$ and
  $f(x+1)=x$.  This is a valid recursive definition, by Theorem
  \ref{thm:rec-param}.  Note that $f$ picks out the immediate
  predecessor of a natural number, when this exists.\footnote{Since $f$ is
  unique, we now have a proof that \axu\ follows from the Recursion
  Theorem.}
\end{example}

  For any function $f$ from $\Np$ to $M$, where $M$ is a set equipped
  with addition and multiplication, we can now define the sum 
  $\sum_{k=1}^nf(k)$ and the product $\prod_{k=1}^nf(k)$ recursively
  as follows:
  \begin{align*}
\sum_{k=1}^0f(k)&=0,&\sum_{k=1}^{n+1}f(k)&=\sum_{k=1}^nf(k)+f(n+1),\\
\prod_{k=1}^0f(k)&=1,&\prod_{k=1}^{n+1}f(k)
&=\prod_{k=1}^nf(k)\cdot f(n+1).    
  \end{align*}
See Exercise~\ref{exercise:SP} below.

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}

\item\label{exercise:SP}
Show clearly that the definitions of $\sum_{k=1}^nf(k)$ and
$\prod_{k=1}^nf(k)$ are justified by Theorem~\ref{thm:rec-param}.
\item
Prove the following for all $n$ in $\N$:
\begin{enumerate}
    \item
  $\sum_{k=1}^nk=n(n+1)/2$;
  \item
  $\sum_{k=1}^nk^2=n(n+1)(2n+1)/6$;
  \item
  $\sum_{k=1}^nb^{k-1}=(b^n-1)/(b-1)$;
  \item
  $\sum_{k=1}^n(2k-1)=n^2$;
  \item
  $\prod_{k=1}^n(k/(k+1))=1/(n+1)$.
\end{enumerate}
\end{enumerate}





\section{Ordering of the natural numbers}\label{order}

We can define the binary relation $\leq$ on $\N$ as the set
\begin{equation*}
  \{(x,y)\in\N\times \N:\Exists zx+z=y\}.
\end{equation*}
The associated strict relation $<$ is then $\{(x,y)\in\N\times\N:x\leq
y\land x\neq y\}$. 
Now we have to show that $\leq$ is the linear ordering that we expect:

\begin{lemma}\label{lem:cancel}
  $\N\models\Forall x\Forall y(x+1\leq y+1\lto x\leq y)$.
\end{lemma}

\begin{proof}
  Suppose $a+1\leq b+1$.  Then $a+1+c=b+1$ for some $c$ in
  $\N$, by definition of $\leq$.  This means $a+c+1=b+1$, by
  Lemma~\ref{lem:add}, so
  $a+c=b$, by \axu, and therefore $a\leq b$, again by the definition
  of $\leq$. 
\end{proof}

\begin{lemma}\label{lem:x-leq-0}
$\N$ satisfies:
  \begin{compactenum}[1)]
\item
      $\Forall x(x\leq0\lto x=0)$;
\item
$\Forall x\Forall y(x+y\leq x\lto y=0)$.
  \end{compactenum}
\end{lemma}

\begin{proof}
  Suppose $a\leq0$.  Then $a+b=0$ for some $b$ in $\N$.  Either $a=0$,
  or $a=c+1$ for some $c$ in $\N$, by Lemma~\ref{lem:zero-succ}.
  In the latter case, $c+b+1=0$, which is absurd by \axz.  Hence
  $a=0$, and the first claim is proved. 

Now suppose $a+b\leq a$.  Then $a+b+c=a=a+0$ for some $c$, so $b+c=0$
by cancellation (Theorem~\ref{thm:add}), which means $b\leq0$.  Hence
$b=0$ by the first claim.  The second claim is now proved.
\end{proof}

\begin{lemma}\label{lem:<1}
  $\N$ satisfies:
  \begin{compactenum}[1)]
    \item
$\Forall x\Forall y(x<y\lto x+1\leq y)$;
\item
$\Forall x\Forall y(x<y+1\lto x\leq y)$.
  \end{compactenum}
\end{lemma}

\begin{proof}
  To prove the first claim, by Lemma~\ref{lem:zero-succ}, it is enough
  to show 
  \begin{gather*}
      \Forall x(x<0\lto x+1\leq 0),\\
\Forall x\Forall y(x<y+1\lto x+1\leq y+1).
  \end{gather*}  
The first sentence is trivially
  true in $\N$ by Lemma~\ref{lem:x-leq-0}, since the hypothesis $x<0$
  always fails:  If $n<0$, then $n\leq0$, so
  $n=0$, which means $\lnot(n<0)$.  

For the second sentence, suppose $n<m+1$.  Then $n+\ell=m+1$ for
some $\ell$; but $\ell\neq0$, so $\ell=k+1$ for some $k$.  Hence
$n+k+1=m+1$, that is, $n+1+k=m+1$, so $n+1\leq m+1$.

The proof of the second claim is an exercise.
\end{proof}

\begin{theorem}
  On $\N$, the relation $\leq$ is a linear ordering.
\end{theorem}

\begin{proof}
There are four properties to check:
  \begin{compactitem}
    \item[Reflexivity:]
Since $n+0=n$, we have $n\leq n$ by definition.
\item[Anti-symmetry:]
We show 
\begin{equation*}
n\leq x\land x\leq n\lto n=x
\end{equation*}
by considering that, by
Lemma~\ref{lem:zero-succ}, $x$ is either $0$
or a successor.
If $n\leq0$ and $0\leq n$, then $n\leq0$, so $n=0$ by
Lemma~\ref{lem:x-leq-0}.  Suppose $n\leq m+1$ and $m+1\leq n$.
From the latter inequality, $n=m+1+\ell=m+\ell+1$ for
some $\ell$.  Hence $m+\ell+1\leq m+1$ by the former
inequality, so
$m+\ell\leq m$ by Lemma~\ref{lem:cancel}.  Hence $\ell=0$ by
Lemma~\ref{lem:x-leq-0}, so $n=m+1+0=m+1$.  
\item[Transitivity:]
We show 
\begin{equation*}
  \ell\leq m\land m\leq z\lto \ell\leq z
\end{equation*}
by induction on $z$.
If $\ell\leq m$ and $m\leq0$, then $m=0$ by Lemma~\ref{lem:x-leq-0}, so
$\ell\leq 0$.  As an inductive hypothesis, suppose the claim holds
when $z=n$.
Suppose also $\ell\leq m$ and $m\leq n+1$.  There are two
possibilities.  If $m=n+1$, then 
$\ell\leq n+1$ immediately.   Suppose $m<n+1$.  Then $m\leq n$ by
Lemma~\ref{lem:<1}, so $\ell\leq n$ by inductive hypothesis.  By
definition then, $\ell+k=n$ for some $k$, so $\ell+k+1=n+1$, and
therefore $\ell\leq n+1$.  This completes the induction.
\item[Linearity:]
We show
\begin{equation*}
  x\leq m\lor m\leq x
\end{equation*}
by induction on $x$.  Since
$0+m=m$, we have $0\leq m$.  As an inductive
hypothesis, suppose the claim holds when $x=n$.
Suppose $\lnot(n+1\leq m)$ for some $m$.  Then
$\lnot(n<m)$ by Lemma~\ref{lem:<1}.  By inductive hypothesis, $m\leq
n$.  Also $n\leq n+1$.  By transitivity, $m\leq n+1$.\qedhere
  \end{compactitem}
\end{proof}

Various standard properties can now be proved:

\begin{theorem}\label{thm:ineq}
The following are true in $\N$.
  \begin{enumerate}
\item\label{part:cancellation}
$\Forall x\Forall y\Forall z(x<y\liff x+z<y+z)$.
\item
$\Forall x\Forall y\Forall z(x<y\liff x\cdot(z+1)<y\cdot(z+1))$.
\item\label{part:subtract}
$\Forall x\Forall y \Exists z(x\leq y\liff
  x+z=y)$.
  \end{enumerate}
\end{theorem}

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Complete the proof of Lemma~\ref{lem:<1}.
\item
Prove Theorem~\ref{thm:ineq}.
\item\label{exercise:n<2^n}
Prove $\N\models\Forall x x<2^x$.  (See \S~\ref{sect:equipollence}
\eqref{eqn:n<2^n}.)
\item
Prove the following in $\N$:
\begin{compactenum}
    \item
  $\Forall x\Forall y 1+xy\leq (1+x)^y$,
  \item
  $\Forall x(3<x\lto x^2<2^x)$.
\end{compactenum}
\item
Find the flaw in the following argument, where $\max$ is the function
from $\N\times\N$ to $\N$ such that
$\max(x,y)=y$ if $x\leq y$, and otherwise $\max(x,y)=x$. 
\begin{quote}
If
$\max(x,y)=0$, then  
$x=y$.  Suppose that $x=y$ whenever $\max(x,y)=n$.  Suppose
$\max(z,w)=n+1$.  Then $\max(z-1,w-1)=n$, so $z-1=w-1$ by inductive
hypothesis; therefore $z=w$.  Therefore all natural numbers are equal.
\end{quote}
\item\label{exer:binom}
  Prove that, if $y\leq x$, then $\displaystyle\binom
  xy=\displaystyle\frac{x!}{y!\,(x-y)!}$. 
\item\label{exer:bin-thm}
  Prove the \textbf{Binomial Theorem:}%
\dindexsub{binomial}{B--- Theorem}%
\dindexsub{theorem}{Binomial Th---} 
\begin{equation*}
(x+y)^n=\sum_{i=0}^n\binom ni x^{n-i}y^i.
\end{equation*} 
\item\label{exercise:proper-divisor}
Prove that every proper divisor of a positive integer is less than
that integer.  (A \textbf{proper divisor}\dindexsub{proper}{---
  divisor}\dindexsub{divisor}{proper ---} is a divisor other than
the number itself.)
\end{enumerate}




\section{Real numbers}\label{sect:reals}

From \S~\ref{sect:ZandQ}, we have $\Q$ and its subset $\Qp$.  We can define the relation $<$ on $\Q$ by
\begin{equation*}
x<y\liff y-x\in\Qp.
\end{equation*}

\begin{theorem}\label{thm:<-on-Q}
The relation $<$ is a strict linear ordering of $\Q$ such that
\begin{gather*}
x<y\liff x+z<y+z,\\
x<y\land 0<z\lto z\cdot x<z\cdot y.
\end{gather*}
\end{theorem}

Because of this and Theorem~\ref{thm:Q}, $\Q$ is called an \textbf{ordered field.}

A linear order is \textbf{dense}\dindex{dense} if between any two distinct elements lies a third, that is,
\begin{equation*}
\Forall x\Forall y\Exists z(x<y\lto x<z\land z<y).
\end{equation*}
An \textbf{endpoint}\index{endpoint} of a linear order is a \textbf{maximum}\dindex{minimum} or a \textbf{minimum,}\dindex{minimum} that is, an element $a$ such that no element is greater or no element is less:
\begin{equation*}
\Forall xx\leq a\lor\Forall y a\leq y.
\end{equation*}

\begin{theorem}\label{thm:density}
$(\Q,<)$ and $(\Q+,<)$ are dense linear orders without endpoints.
\end{theorem}

Suppose $(A,<)$ is a dense linear order without endpoints.
A \textbf{cut}\dindex{cut} of $(A,<)$ is a nonempty proper subset $B$ of $A$ whose every element is less than every element of its complement:
\begin{equation*}
\Forall x\Forall y(x\in B\land y\in A\setminus B\lto x<y).
\end{equation*}
If $C\included A$ and $d\in A$, then $d$ is an 
\textbf{upper bound}%
\dindex{upper bound}%
\dindex{bound}
of $C$ if no element of $C$ is greater than $d$:
\begin{equation*}
\Forall x(x\in C\lto x\leq d).
\end{equation*}
A
\textbf{lower bound}%
\dindex{lower bound}%
is defined similarly.
Then $d$ is a 
\textbf{supremum}%
\dindex{supremum} of $C$ if it is a
least upper bound of $C$, that is, $d$ is an upper bound of $C$ and
also a lower bound of the set of upper bounds of $C$.  Likewise, an
\textbf{infimum}%
\dindex{infimum} is a greatest lower bound. 

\begin{theorem}\label{thm:sup-inf}
Suprema and infima are unique when they exist.
If the set of lower bounds of a subset $C$ of a linear order has a supremum, then this is the infimum of of $C$.
\end{theorem}

A linear order is \textbf{complete}\dindex{complete} if every nonempty subset with an upper bound has a supremum; it follows then that every nonempty subset with a lower bound has an infimum.

\begin{theorem}\label{thm:Q-not-complete}
As a linear order, $\Q$ is not complete.
\end{theorem}

\begin{proof}
The set of positive rationals $x$ such that $x^2<2$ has no supremum in $\Q$ (why not?).
\end{proof}

Suppose $A$ is a dense linear order without endpoints.  Let $\bar A$ be the set of cuts of $A$.  If $b\in A$, let
\begin{equation*}
\pred b=\{x\in A\colon x<b\},\glossary{$\pred b$}
\end{equation*}
the set of \textbf{predecessors}\dindex{predecessor} of $b$ in $A$; then $\pred b\in\bar A$. 


\begin{theorem}\label{thm:complete}
Suppose $A$ is a dense linear order without endpoints.  Then
$\bar A$, with respect to inclusion, is a dense linear order without endpoints and is complete with respect to this ordering; also the function $x\mapsto\pred x$ from $A$ to $\bar A$ is an embedding of orders.
\end{theorem}

\begin{proof}
We already know inclusion is a partial ordering of $\bar A$.
If $B$ and $C$ are distinct cuts of $A$, then we may assume $C\setminus B$ has an element $d$; but then $d$ is an upper bound of $B$, and $B\pincluded C$.  Thus inclusion linearly orders $\bar A$.

If $\family D$ is a set of cuts of $A$, then $\bigcup\family D$ is also a cut and is the supremum of $\family D$.

In $A$, if $x<y$, then $\pred x\included\pred y$ and moreover $\pred x\pincluded\pred y$ since $x\in\pred y\setminus\pred x$.
\end{proof}

If $A$ is a dense linear order without endpoints, then $\bar A$ is called the \textbf{completion}\dindex{completion}\footnote{Strictly, to justify this terminology, one should show that $\bar A$ is somehow minimal among the complete dense linear orders without endpoints in which $A$ embeds, and moreover all such minimal orders are somehow isomorphic.}
of $A$.  We can now denote the completion of $\Q$ by
\begin{equation*}
\R;
\end{equation*}
this is the set of 
\textbf{real numbers.}%
\dindexsub{real}{--- number}%
\dindexsub{number}{real ---}
The challenge is to define addition and multiplication on $\R$ and show they have the usual properties.  We can define addition on $\R$ by
\begin{equation*}
X+Y=\bigcup\{\pred{x+y}\colon\pred x\included X\land\pred y\included Y\}.
\end{equation*}
It is easier to define multiplication first on the completion of $\Qp$, which we can denote by
\begin{equation*}
\Rp.
\end{equation*}
Here we define multiplication by
\begin{gather*}
X\cdot Y=\bigcup\{\pred{xy}\colon\pred x\included X\land\pred y\included Y\}.
\end{gather*}
One then extends multiplication to $\R$, just as it is extended from $\Qp$ to $\Q$.  Ultimately one obtains:

\begin{theorem}
$(\R,+,\cdot,<)$ is an ordered field, the function $x\mapsto\pred x$ from $\Q$ to $\R$ is an embedding of ordered fields.
\end{theorem}

Now we can refer to $\R$ as the\footnote{Again the use of the definite article \Eng{the} should be justified by a uniqueness proof.}
\textbf{complete ordered field.}%
\dindexsub{field}{complete ordered ---}%
\dindexsub{order}{complete ---ed field}%
\dindexsub{complete}{--- ordered field}
We consider $\Q$ as an ordered subfield of $\R$.

\begin{theorem}\label{thm:R-uncountable}
$\pow{\N}\injects\R$; in particular,
$\R$ is uncountable.
\end{theorem}

\begin{proof}
There is an embedding $h$ of $\mapset{\Np}{\B}$ in $\R$ given by
\begin{equation*}
h(e_k\colon k\in\Np)=\sup\left\{\sum_{k=1}^n\frac{e_k}{3^k}\colon k\in\N\right\}.\qedhere
\end{equation*}
\end{proof}


\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Prove Theorem~\ref{thm:<-on-Q}.
\item
Prove Theorem~\ref{thm:density}.
\item
Prove Theorem~\ref{thm:sup-inf}.
\item
Prove Theorem~\ref{thm:Q-not-complete}.
\end{enumerate}


\section{Well-ordered sets}\label{sect:well-ordered}

Suppose $(\Omega,\leq)$ is a linear order, and $A\included\Omega$.  A \textbf{least}\dindex{least} element of $A$ is an infimum of $A$ that also belongs to $A$.  By Theorem~\ref{thm:sup-inf}, least elements are unique when they exist.
\emph{The} least element---if it exists---of $A$ can
  be denoted by 
  \begin{equation*}
      \min(A).\glossary{$\min(A)$}
  \end{equation*}
  The linear order $(\Omega,\leq)$:
  \begin{compactenum}[1)]
    \item
\textbf{is well-ordered}\dindexsub{well}{---{}-ordered}\dindexsub{good}{well-ordered}\dindexsub{order}{well-{}---ed}
  if every non-empty subset of $\Omega$ has a least element;
\item
\textbf{admits (proof by) strong induction}\dindexsub{admits}{--- proof by
  strong induction}\dindexsub{proof}{admits --- by strong
  induction}\dindexsub{strong}{admits proof by ---
  induction}\dindexsub{induction}{admits proof by strong ---} if
  $A=\Omega$ whenever $A$ is a subset of $\Omega$ such that
\begin{equation*}
  \pred b\included A\implies b\in A
\end{equation*}
for all $b$ in $\Omega$;
\item
\textbf{admits (definition by) strong
  recursion}\dindexsub{admits}{--- definition by strong
  recursion}\dindexsub{definition}{admits --- by strong
  recursion}\dindexsub{strong}{admits definition by ---
  recursion}\dindexsub{recursion}{admits definition by strong ---} if,
  for every set $B$ and function $h$ from $\pow B$ to $B$, there
  is a unique function $G$ from $\Omega$ to $B$ such that
  \begin{equation*}
    G(c)=h(G\setimb{\pred c})
  \end{equation*}
for all $c$ in $\Omega$.
  \end{compactenum}

We shall see presently that these three conditions are equivalent.
Meanwhile, we can observe that $(\N,\leq)$ satisfies one of the
conditions.

\begin{lemma}\label{lem:Npred}
  $\pred{n+1}=\pred n\cup\{n\}$ for all $n$ in $\N$.
\end{lemma}

\begin{proof}
  Since $n<n+1$, we have $\pred n\cup\{n\}\included\pred{n+1}$.  For
  the reverse inclusion, suppose $a\in\pred{n+1}$, so 
  that $a<n+1$.  Then
  $a\leq n$ by Lemma~\ref{lem:<1}, so $a=n$ or $a<n$; in either case,
  $a\in\pred n\cup\{n\}$.  Thus, $\pred{n+1}\included\pred
  n\cup\{n\}$. 
\end{proof}

\begin{theorem}\label{thm:Nsi}
  $(\N,\leq)$ admits strong induction.
\end{theorem}

\begin{proof}
  Suppose $A$ is a subset of $\N$ that contains $n$ whenever it
  includes $\pred n$.  By induction, we shall show that $\pred
  n\included A$ for all $n$ in $\N$; from this, it will follow that $A=\N$.  

Since $\pred0=\emptyset$, and
  $\emptyset\included A$, this means $0\in A$ by assumption.  As an
  inductive
  hypothesis, suppose $\pred n\included A$.  Then $n\in A$ by
  assumption, so $\pred n\cup\{n\}\included A$, that is, $\pred{n+1}\included A$ by
  Lemma~\ref{lem:Npred}.  This completes the induction.  Hence, for
  all $n$, we have $n\in\pred{n+1}\included A$, so $n\in A$.
  Thus $A=\N$.
\end{proof}

Example~\ref{example:si} will show one use of strong induction.

The linearly ordered set $(\Omega,\leq)$ is well-ordered if
and only if every 
subset with no least element is empty.  This formulation will be used
in proving the following theorem.  Also, a subset $A$ of $\Omega$ has
no least element if and only if
\begin{equation*}
  \Forall x(\pred x\cap A=\emptyset\lto x\notin A),
\end{equation*}
that is,
$\Forall x(\pred x\included\Omega\setminus A\lto x\in\Omega\setminus
A)$.  

\begin{theorem}\label{thm:well-ordered}
  The following are equivalent conditions on a linear order.
  \begin{compactenum}
    \item
It is well-ordered.
\item
It admits strong induction.
\item
It admits strong recursion.
  \end{compactenum}
\end{theorem}

\begin{proof}
Let $(\Omega,\leq)$ be a linear order.  We shall show that, if it
admits strong induction \emph{or} strong recursion, then it is
well-ordered, and if
it is well-ordered, then it admits strong induction \emph{and} strong
recursion.  Then the claim will follow from the equivalences
\begin{align*}
\sv P\lor\sv Q\lto\sv R&\sim(\sv P\lto\sv R)\land(\sv Q\lto\sv R),&
\sv R\lto\sv P\land\sv Q&\sim(\sv R\lto\sv P)\land(\sv R\lto\sv Q).
\end{align*}

Suppose $(\Omega,\leq)$ admits strong induction, but $A$ is a subset
of $\Omega$ with no least element.  We shall show that $A$ is empty.
If $a\in\Omega$, and $\pred
a\included\Omega\setminus A$, then $a\in\Omega\setminus A$, since $a$
is not a least element of $A$.  By strong induction,
$\Omega=\Omega\setminus A$, so $A=\emptyset$.  Thus
$(\Omega,\leq)$ is well-ordered.

Suppose $(\Omega,\leq)$ admits strong recursion, but $A$ is a subset
of $\Omega$ with no least element.  Let
\begin{equation*}
  C=\{x\in\Omega\colon\Exists y (y\in A\land y\leq x)\}.
\end{equation*}
Then $C$ has no least element (exercise).  For each $e$ in $\B$, let $G_e$
be the function from $\Omega$ to $\B$ given by
\begin{equation*}
  G_e(x)=
  \begin{cases}
    0,& \text{ if }x\notin C;\\
    e,& \text{ if }x\in C.
  \end{cases}
\end{equation*}
So $G_1$ is the characteristic function%
\index{characteristic function}%
\index{function!characteristic ---} of $C$ on $\Omega$ in 
  the sense of \S~\ref{sect:equipollence}, but $G_0$ is
  the constant function $x\mapsto 0$ on $\Omega$.
Let $h$ be the function from $\pow{\B}$ to $\B$ given by
\begin{equation*}
  h(X)=1\iff 1\in X,
\end{equation*}
that is,
\begin{equation*}
  h(X)=
  \begin{cases}
    0,& \text{ if }X\in\bigl\{\emptyset,\{0\}\bigr\};\\
    1,& \text{ if }X\in\bigl\{\{1\},\{0,1\}\bigr\}.
  \end{cases}
\end{equation*}
Then $G(a)=h(G\setimb{\pred a})$ for all $a$ in $\Omega$, whether $G$
is $G_0$ or $G_1$ (exercise).  By strong recursion, there is a
\emph{unique} such function $G$, so $G_0=G_1$.
Therefore $C=\emptyset$.  Thus $(\Omega,\leq)$ is well-ordered.

Now, conversely, suppose $(\Omega,\leq)$ is well-ordered.  First, let
$A$ be a subset of $\Omega$ such that, if $\pred a\included A$, then
$a\in A$, for all $a$ in $A$.  Consequently, if $\pred a\cap(\Omega\setminus
A)=\emptyset$, then $a\notin\Omega\setminus A$.  Then $\Omega\setminus
A$ has no least element, so it is empty, and $A=\Omega$.  Thus
$(\Omega,\leq)$ admits strong induction.

Finally,
using that $(\Omega,\leq)$ admits strong induction, we
shall follow the proof of the Recursion Theorem, \ref{thm:recursion},
to prove that $(\Omega,\leq)$ admits strong recursion.  
Suppose
$B$ is a set, and $h\colon\pow B\to B$.  Let $\family S$ be the set of
relations $R$ from $\Omega$ to $B$ such that, if $(a,b)\in R$, then there is a function $f$ from $\pred a$ to $B$ such that
\begin{equation*}
b=h(f\setimb{\pred a}).
\end{equation*}
Let $T=\bigcup\family S$.  We show first that $T$ is a function $G$ from $\Omega$ to $B$, that is, for all $x$ in $\Omega$,
\begin{equation*}
\Existsunique yx\mathrel Ty.  
\end{equation*}
Suppose, as a strong inductive hypothesis, that this is true when $x\in\pred a$.  Then there is a function $f$ from $\pred a$ to $B$ such that
\begin{equation*}
f(x)=y\iff x\mathrel Ty.
\end{equation*}
Then $(a,h(f\setimb{\pred a}))\in T$ by definition of $T$; moreover, if $(a,b)\in T$, then $b=h(f\setimb{\pred a})$.  By strong induction, $T$ is a function $G$ from $\Omega$ to $B$.  If now $a\in\Omega$, and $f=G\restriction{\pred a}$, then we must have $G(a)=h(f\setimb{\pred a})$, again by definition of $T$; so
$G(a)=h(G\setimb{\pred a})$.

Suppose also $G'\colon\Omega\to B$ and $G'(a)=h(G'\setimb{\pred a})$ for all $a$ in $\Omega$.
Let
\begin{equation*}
  D=\{x\in\Omega\colon G(x)=G'(x)\}.
\end{equation*}
If $\pred a\included D$, then $G'(a)=h(G'\setimb{\pred a})=
h(G\setimb{\pred a})= G(a)$, so $a\in D$.  By strong induction,
$D=\Omega$, so $G'=G$.  Thus $G$ is the only function on $\Omega$ in
$\family S$, and $(\Omega,\leq)$ admits strong recursion.
\end{proof}

\begin{corollary}\label{cor:strongrec}
  $(\N,\leq)$ is well-ordered and admits strong recursion.
\end{corollary}

\begin{proof}
  Theorem~\ref{thm:Nsi}.
\end{proof}

\subsection*{Interrelations}

What is the force of the word \Eng{strong} in \Eng{strong induction}
and \Eng{strong recursion?}

Structures that admit induction or recursion have a signature that
includes $\{{}\scr{},0\}$.  Structures that admit strong induction or strong
recursion have a signature that includes $\{\leq\}$.  The next theorem
establishes one possible connexion between these two kinds of structures:


\begin{theorem}\label{thm:ind-strong}
  Suppose $(\Omega,{}\scr{},0)$ admits induction and has a partial ordering
  $\leq$ such that $a<\scr a$ for all $a$ in $\Omega$.  Then $\leq$ is
  a linear ordering, and $\N$ and $\Omega$ are isomorphic as structures
  in the signature $\{{}\scr{},0,\leq\}$: in particular,
  $(\Omega,\leq)$ admits strong induction.
\end{theorem}

\begin{proof}
  Since $(\N,{}\scr{},0)$ admits recursion, there is a homomorphism
  $h$ from $(\N,{}\scr{},0)$ to
  $(\Omega,{}\scr{},0)$.  In particular,
  \begin{equation*}
    \scr{h(m)}=h(m+1)
  \end{equation*}
for all $m$ in $\N$.
We shall first show that the function $h$ is
  also a homomorphism
  from $(\N,<)$ to  $(\Omega,<)$; that is,
  \begin{equation}\label{eqn:<<}
    \Forall x(x<n\lto h(x)<h(n))
  \end{equation}
for all $n$ in $\N$.  This is trivially true when
  $n=0$.  
  Suppose it is true when $n=m$, and now $a<m+1$.  Then $a\leq m$.
  Either $a=m$ or $a<m$.
  \begin{compactenum}
    \item
  If $a=m$, then $h(a)=h(m)<\scr{h(m)}=h(m+1)$.
\item
  If $a<m$, then by inductive
  hypothesis, $h(a)<h(m)<\scr{h(m)}=h(m+1)$.  
  \end{compactenum}
In either case, $h(a)<h(m+1)$.
Thus \eqref{eqn:<<} is true when
  $n=m+1$.  By induction, it is true for all $n$ in $\N$.

Also, $h$ is surjective, by induction in $(\Omega,{}\scr{},0)$.
Indeed, $0\in h\setimb{\N}$, and if $a\in h\setimb{\N}$, then $a=h(n)$
for some $n$ in $\N$, so $\scr a=\scr{h(n)}=h(n+1)$, and $\scr a\in
h\setimb{\N}$.  

Since $h$ is a bijection, it is an isomorphism from $\N$ to
$\Omega$ in the signature $\{{}\scr{},0\}$.  To complete the proof, it
is enough to show that $h\inv$ is order-preserving.
If $h(m)\leq h(n)$, then $\lnot(h(n)<h(m))$, so
$\lnot(n<m)$ by~\eqref{eqn:<<}; hence, $m\leq n$.
\end{proof}

Thus, roughly,
\begin{equation}\label{eqn:i-strong}
  \text{induction} \amp \text{ordering} \implies \text{strong induction}.
\end{equation}
It is sometimes suggested\footnote{For example, Epp~\cite[\S~4.4,
    p.~213]{Epp} says that the two methods of proof are
  equivalent; but the proofs use hidden assumptions.} that strong
    induction can be proved from induction alone.  It cannot; there has to
    be an ordering, as in the theorem, and induction alone does not guarantee that there is such an ordering.
Example~\ref{exam:ind-not-imp-rec} gives a structure that admits
induction, but has no ordering such that $\Forall xx<x+1$.

Strong induction on $\N$ is called strong because it involves a
stronger \emph{hypothesis} than ordinary induction.  To prove
$\N\models\Forall x\phi(x)$ by induction, one proves two things, as
described in \S~\ref{sect:sets}:
\begin{compactenum}
  \item
$\N\models\phi(0)$.
\item
$\N\models\Forall x(\phi(x)\lto \phi(x+1))$.
\end{compactenum}
The inductive hypothesis is here is $\phi(x)$.
To make the proof by strong induction, one proves one thing:
\begin{compactenum}
  \item
$\N\models\Forall x(\Forall y(y<x\lto\phi(y))\lto\phi(x))$.
\end{compactenum}
Here the \textbf{strong inductive hypothesis}\dindexsub{strong}{---
  inductive hypothesis}\dindexsub{inductive}{strong ---
  hypothesis}\dindexsub{hypothesis}{strong inductive ---} is $\Forall
y(y<x\lto\phi(y))$.  If $x$ is $0$, then this hypothesis is trivially true;
if $x$ is not $0$, then $x$ is a successor.  Hence we can analyse a
proof by strong induction into two steps, as with ordinary induction:
\begin{compactenum}
  \item
$\N\models\phi(0)$.
\item
$\N\models\Forall x(\Forall y(y\leq x\lto\phi(y))\lto\phi(x+1))$.
\end{compactenum}
In this formulation, the strong inductive hypothesis is $\Forall
y(y\leq x\lto\phi(y))$, that is,
\begin{equation*}
\phi(0)\land\phi(1)\land\dots\land\phi(x); 
\end{equation*}
this is a stronger
assumption than $\phi(x)$ alone.  Sometimes this stronger assumption
is just what one needs:

\begin{example}\label{example:si}
  To prove that every natural number other than $1$ has a prime
  divisor, it seems not enough to use induction.  If $n$ has prime
  divisors, what does that say about $n+1$?  But every positive
  integer divides $0$, so $0$ has prime divisors.  Suppose $n>0$, and
  all of the
  numbers in the set $\{2,3,4,\dots,n\}$ have prime divisors.  If
  $n+1$ is prime, then it is its own prime divisor.  If $n$ is
  composite, then it has a divisor in the set just named, by
  Exercise~\ref{exercise:proper-divisor} in \S~\ref{order}.  By strong
  inductive hypothesis, this divisor has a prime divisor, which is
  then a divisor of $n+1$.
\end{example}

From the theorem, there follows a connexion between recursion and strong recursion:

\begin{corollary}
  Every structure $(\Omega,{}\scr{},0)$ that admits recursion has a
  partial ordering $\leq$ such that $a<a+1$ for all $a$ in
  $\Omega$.  If $\leq$ is any such ordering on $\Omega$,
  then $\leq$ is linear, and $(\Omega,\leq)$ admits strong recursion.
\end{corollary}

\begin{proof}
  Every structure that admits recursion satisfies the Peano axioms, by
  Corollary~\ref{cor:rec-Peano}; in particular, it has a linear
  ordering as defined in \S~\ref{order}, so it admits strong recursion
  by Corollary~\ref{cor:strongrec}.  If $\leq$ is just a partial
  ordering of the structure such that $\Forall xx<x+1$, then the
  theorem applies, showing that the structure is isomorphic to $\N$
  and so admits strong recursion.
\end{proof}

In short then, 
\begin{equation}\label{eqn:r-strong}
  \text{recursion} \implies \text{strong recursion}.
\end{equation}
That is, logically, recursion is at least as strong as strong
recursion.  
The converses of~\eqref{eqn:r-strong}
and~\eqref{eqn:i-strong} fail.  To show this, some more definitions
will be useful.  Let $(\Omega,\leq)$ be a well-ordered set.  We can
use $0$ as a name for $\min(\Omega)$.  An element $a$ of $\Omega$ is a
\textbf{limit}\dindex{limit} if
\begin{compactenum}[1)]
  \item
$a\neq0$, and
\item
$\Forall x\Exists y(x<a\lto x<y<a)$.
\end{compactenum}
In short, $a$ is a limit if it is not zero and has no immediate
predecessor.

\begin{examples}\label{examples:limits}
\mbox{}
\begin{asparaenum}
    \item
$(\N,\leq)$ has no limits.
\item
Extend $\leq$ so that it well-orders $\N\cup\{\infty\}$ by defining
$n<\infty$ for all $n$ in $\N$.  Then $\infty$ is a limit.
\end{asparaenum}
\end{examples}

A \textbf{greatest}\dindex{greatest}
element of a subset $A$ of $\Omega$ is a supremum of $A$ that belongs to $A$.
Suppose $\Omega$ itself has no
 greatest element.  Then every element $a$ of $\Omega$ has a successor, $\scr a$, given by
  \begin{equation*}
    \scr x=\min(\{y\in\Omega\colon x<y\}).
  \end{equation*}
In this case, the limits of $\Omega$ are just those elements not in
$\{0\}\cup\{\scr x\colon x\in\Omega\}$, that is, the non-zero elements of
$\Omega$ that are not successors.

\begin{theorem}\label{thm:wo-ind-rec}
  Every well-ordered set with no greatest element and no limits admits
  induction and recursion.
\end{theorem}

\begin{proof}
We shall show that such structures satisfy the Peano axioms.
In such structures, we always have $0\leq x<\scr x$.  In particular, $0\neq\scr x$.  Thus \axz{} is satisfied.  Also, if $a<b$, then $\scr a\leq
b<\scr b$; so \axu{} is satisfied.
Finally, suppose $A$ is a proper subset of such a structure $\Omega$,
and $0\in A$.  Then $\Omega\setminus A$ has a least element $b$, which
is not $0$, so it must be a successor $\scr c$.  Then $c\in A$, but
$\scr c\notin A$.  Contrapositively,\index{contrapositive} if $0\in
A$, and $\Forall x(x\in A\lto\scr x\in A)$, then $A=\Omega$.  That is,
\axi{} is satisfied. 
\end{proof}

If a well-ordered set does have a greatest element, then this can have
no successor, so induction and recursion are meaningless.  If the
well-ordered set $\Omega$ has no greatest element, but does have
limits, let $\ell$ be its \emph{least} limit.  Then $\pred{\ell}$
satisfies the hypotheses of Theorem~\ref{thm:wo-ind-rec}, so it admits
induction and recursion; but the whole structure $\Omega$ does not
(exercise).\footnote{Rotman \cite{Rotman} gives an intuitive argument,
  based tacitly on induction and the ordering, for why $\N$ is
  well-ordered; then he claims to \emph{prove} induction, seemingly
  from well-ordering alone.  The hidden assumption is that every
  non-zero element of $\N$ is a successor.}

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Supply the missing details in the proof of
Theorem~\ref{thm:well-ordered}. 
\item
Show that there are well-ordered sets with no greatest element that do
not admit induction or recursion.
\item
Find a formula $\psi(x,y)$ containing no quantifiers such that the
sentence $\Forall x\Exists y\psi(x,y)$ is logically equivalent to
$\Forall x(\Forall y(y<x\lto\phi(y))\lto\phi(x))$.
\end{enumerate}

\section{Ordinal numbers}\label{sect:ordinals}

A class that includes each of its elements is called
\textbf{transitive.}\dindexsub{transitive}{--- class}
So $\class C$ is transitive if and only if
\begin{equation*}
A\in B\land B\in\class C\implies A\in\class C.
\end{equation*}
By the definition given in \S~\ref{sect:sets}, an 
\textbf{ordinal,} or
\textbf{ordinal number,}%
\dindex{ordinal}%
\dindexsub{number}{ordinal}
is a \emph{set} that is transitive and is \emph{strictly} well-ordered
by membership.  The class of ordinals is denoted by
\begin{equation*}
\on.
\end{equation*}
The Greek letters $\alpha$,
$\beta$, $\gamma$, \dots\ will henceforth denote ordinals.

\begin{lemma}\label{lem:on-trans}
$\on$ is transitive, that is, every element of an
  ordinal is an ordinal.  Also every ordinal \emph{properly} includes
  its elements.
\end{lemma}

\begin{proof}
Suppose $\alpha\in\on$ and $b\in\alpha$.  Then
$b\included\alpha$ by transitivity of $\alpha$, so $b$ is well-ordered by membership.
Suppose $c\in b$ and $d\in c$.  Then
$c\in\alpha$, so $c\included\alpha$, and hence
$d\in\alpha$.  Since $d\in c$ and $c\in b$, and
all are elements of $\alpha$, where membership is a transitive
relation, we have $d\in b$.  Thus $b$ is transitive, so it
is an ordinal.  Therefore $\alpha\included\on$.  So $\on$ is transitive.   

Finally, $b\pincluded\alpha$ simply because membership is a
\emph{strict} ordering of $\alpha$.
\end{proof}

\begin{lemma}
  Every ordinal contains every ordinal that it properly includes.
\end{lemma}

\begin{proof}
  Suppose $\beta\pincluded\alpha$.  Then $\alpha\setminus\beta$
  contains some $\gamma$.  Then $\beta\included\gamma$; indeed, if
  $\delta\in\beta$, then, since $\gamma\notin\beta$, we have $\gamma\notin\delta$ (by transitivity of $\beta$) and $\gamma\neq\delta$, so $\delta\in\gamma$ (since $\alpha$ is strictly linearly ordered by membership).  Suppose
  $\beta\pincluded\gamma$.  Then $\gamma\setminus\beta$ contains some
  $\delta$, so by what we have just shown, $\beta\included\delta$.
  But $\delta\pincluded\gamma$ by the last lemma, so $\gamma$ was not the least element
  of $\alpha\setminus\beta$ (since $\delta$ must be less).  Therefore the least element
  of $\alpha\setminus\beta$ must be $\beta$; in particular, $\beta\in\alpha$.
\end{proof}

\begin{theorem}[Burali-Forti Paradox \protect{\cite{Burali-Forti}}]
$\on$ is transitive and well-ordered by membership; so it is not a set.
\end{theorem}

\begin{proof}
Because membership is a \emph{strict} ordering of an ordinal, membership is irreflexive on $\on$.  Because each ordinal is transitive, membership is a transitive relation on $\on$.
Let $\alpha$ and $\beta$ be two distinct ordinals such that $\beta\notin\alpha$.  By strong induction in $\alpha$, we have $\alpha\in\beta$.
Indeed, say $\gamma\in\alpha$ and $\gamma\included\beta$.  Then $\gamma\neq\beta$, so $\gamma\in\beta$ by the last lemma.  
Therefore $\on$ is strictly linearly ordered by membership.
In particular, if $\alpha\in\on$, then
$\alpha\neq\on$; so $\on$ is not an ordinal. 

If $a$ is a set of ordinals
with an element $\beta$, then the least element of $a$ is the least
element of $a\cap\beta$, if this set is nonempty; otherwise it is
$\beta$.  Thus $\on$ is well-ordered by membership.  Since however
$\on$ is not an ordinal, it must not be a set. 
\end{proof}

Since, on $\on$ and hence on every ordinal, the relations of
membership and proper inclusion are the same, these can be denoted by
$<$. 

\begin{theorem}
$\on$ contains $0$ and is closed under $x\mapsto x\cup\{x\}$.
\end{theorem}

For $\alpha\cup\{\alpha\}$, we may write
\begin{equation*}
\vscr{\alpha};
\end{equation*}
this is the \textbf{successor}\dindex{successor} of $\alpha$.  The operation $x\mapsto\vscr x$ on $\on$ is \textbf{succession.}\dindex{succession}
By the definition in \S~\ref{sect:sets}, $\vnn$ is the class of ordinals that neither \emph{are} limits nor
\emph{contain} limits.\footnote{One could say, `$\vnn$ is the class of ordinals
  that neither \emph{are} nor \emph{contain} limits'; but this would violate the principles laid down in \cite[Cases]{MEU}
  and reaffirmed in \cite{MEU2}. 
  In the original sentence, the second instance of \emph{limits} is the direct
  object of \emph{contain,} so it is notionally in the `objective
  case'; but the first instance of \emph{limits} is is not an object
  of \emph{are} (which does not take objects), but is in the
  `subjective case', like the subject, \emph{that,} of the relative
  clause.  On similar grounds, the common 
  expression `$x$ is less than or equal to $y$' is objectionable,
  unless \emph{than,} like \emph{to,} is construed as a preposition.
  However, allowing \emph{than} to be used as a preposition can cause
  ambiguity: does `She likes tea better than me' mean `She likes tea
  better than she likes me', or `She likes tea better than I do'?
  There it is recommended in \cite[Than 6]{MEU} and (less strongly)
  in \cite{MEU2} that \emph{than} not be used as a preposition.  Then 
  `$x\leq y$' should be read as `$x$ is less than $y$ or [$x$ is]
  equal to $y$.'}

An alternative form of the Axiom of Infinity, \ref{ax:infinity}, is the following.

\begin{axiom}[Infinity \protect{[second form]}]%
\dindex{axiom}{A--- of Infinity}%
\dindex{infinity}{Axiom of I---}
$\vnn$ is set.
\end{axiom}

This formulation \emph{implies} the earlier one, by the following.

\begin{theorem}
$\vnn$ contains $0$ and is closed under succession, and the following hold.
\begin{enumerate}
	\item 
	%The number 
	$0$ is not a successor.
	\item
	Succession on $\vnn$ is injective.
	\item
	$(\vnn,0,{}')$ admits induction.  
\end{enumerate}
\end{theorem}

\begin{proof}
\begin{asparaenum}
\item
Successors are nonempty.
\item
If $\alpha$ and $\beta$ are distinct ordinals, then we may assume
$\alpha\in\beta$, so that $\beta\notin\alpha'$; but
$\beta\in\beta'$, so $\alpha'\neq\beta'$. 
\item
Suppose $A$ is a proper subset of $\vnn$.  Then
$\vnn\setminus A$ has a least element $\alpha$.  Either
$\alpha=0$ or else $\alpha=\beta'$ for some $\beta$ in $A$.
Hence $A$ either does not contain $0$ or else is not closed
under succession. 
\qedhere
\end{asparaenum}
\end{proof}

The first form of the Axiom implies the second form, once we have the following.

\begin{axiom}[Replacement]
The image of a set under a function is a set.
\end{axiom}

In proving the Recursion Theorem~\ref{thm:recursion}, we never need that $A$ and $f$ are sets; they need only be classes.  By the Theorem then, there is a homomorphism from $(\N,0,{}\scr{})$ into $(\on,0,{}\vscr{})$; the image of $\N$ under this homomorphism is $\vnn$, so $\vnn$ is a set.

\begin{theorem}\label{thm:vnn-in-on}
$\vnn\in\on$.
\end{theorem}

We now have ordinals beyond $\vnn$, namely $\vscr{\vnn}$, $\vscr{\vscr{\vnn}}$, and so on.  These are usually written as $\vnn+1$, $\vnn+2$, and so on.  By recursion, there is a function $n\mapsto\vnn+n$ from $\vnn$ into $\on$; the image of $\vnn$ under this function is denoted by one of
\begin{align*}
&\vnn+\vnn,&&\vnn\cdot2.
\end{align*}
Continuing these ideas, we can develop
an arithmetic of ordinals, according to which we can list the
ordinals as
\begin{equation*}
  0,1,2,3,\dots;\vnn,\vnn+1,\vnn+2,\dots;\vnn\cdot 2,
  \dots; \vnn^2,\dots;\vnn^{\vnn};\dots 
\end{equation*}
Thus we have a way to extend the ordinary list \Eng{first, second,
  third, \dots} of ordinal numbers.
  
\begin{theorem}\label{thm:ord-sup}
For every set of ordinals, there is an ordinal that is greater than every ordinal in the set.  Indeed, the union of a set of ordinals is the supremum of the set.
\end{theorem}


\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Prove Theorem~\ref{thm:vnn-in-on}.
\item
Prove Theorem~\ref{thm:ord-sup}.
\end{enumerate}


\section{Cardinal numbers}\label{sect:cardinality}

By the definition in \S~\ref{sect:equipollence}, if $A$ is a finite set, then $A\equip n$ for some $n$ in $\vnn$.  By Theorem~\ref{thm:no-subset}, this $n$ is unique; so we can call it the \textbf{cardinality}\dindex{cardinality} of $A$, and we 
may write
\begin{equation}\label{eqn:finite-size}
  \size A=n.\glossary{$\left|A\right|=n$}
\end{equation}
We aim to define a cardinality $\size A$ for all sets $A$.

\begin{lemma}\label{lem:f-s}
  If $A$ is finite, and there is a surjective function from $A$ onto
  $B$, then $B$ is finite.
\end{lemma}

\begin{proof}
  Use induction on the cardinality of $A$.  The claim is trivially
  true if $\size A=0$.  Suppose it is true when $\size A=n$, but now
  $\size A=n+1$, and $f$ is a surjection from $A$ onto $B$.  We may
  assume that $A$ is just $\pred$.  Let $c=f(n)$.  There are
  two possibilities:
  \begin{asparaenum}
    \item
If also
  $c=f(m)$ for some $m$ in $\pred n$, then $f\restriction{\pred n}$ is
  still surjective on $B$, so $B$ is finite by inductive hypothesis.
\item
  Suppose $f\setimb{\pred n}\included B\setminus\{c\}$.  Then
  $f\restriction{\pred n}$ is a surjection on $B\setminus\{c\}$, so
  this set is finite, again by inductive hypothesis.  In this case,
  there is a bijection $h$ from $\pred k$ onto $B\setminus\{c\}$ for
  some $k$ in $\N$.  Then $h\cup\{(k,c)\}$ is a bijection from
  $\pred{(k+1)}$ onto $B$, so $B$ is finite.
  \end{asparaenum}
The induction is complete.
\end{proof}

\begin{theorem}\label{thm:fin}
Suppose $A\injects B$.  
  If $B$ is finite, then $A$ is finite.
\end{theorem}

\begin{proof}
  It is enough to show that if $A\included B$, and $B$ is finite, then
  $A$ is finite.
If $A$ is empty, then $\size A=0$.  Suppose $A$ contains $c$.  Define
  $f$ from $B$ to $A$ by:
\begin{equation*}
  f(x)=
  \begin{cases}
    x,&\text{ if }x\in A;\\
c,&\text{ if }x\notin A.
  \end{cases}
\end{equation*}
Then $f$ is surjective, so the claim follows by Lemma~\ref{lem:f-s}.
\end{proof}

Contrapositively, if $A\injects B$, and $A$ is infinite, then so is $B$.
Hence we can show that a set $A$ is infinite if we
can find an injective 
function $G$ from $\vnn$ to 
$A$.  
Does the converse hold?
That $G$ is injective means precisely that
\begin{equation*}
  G(n+1)\in A\setminus\{G(0),\dots,G(n)\}
\end{equation*}
for all $n$ in $\vnn$.
Now, if $A$ is infinite, then in each case the set
\begin{equation*}
A\setminus\{G(0),\dots,G(n)\}  
\end{equation*}
is not empty by Lemma~\ref{lem:f-s}, so there is some hope
that the function $G$ exists.
Does strong recursion (that is, Corollary \ref{cor:strongrec}) give us such a
function $G$?  It does, \emph{if} there is a function $h:\pow{A}\to A$
such that $h(X)\notin X$ when $X\neq A$.  However, we have no reason,
so far, to assert that such a function exists.
That functions like $h$ exist is a consequence of:

\begin{axiom}[Choice]\label{ax:choice}\dindexsub{axiom}{A--- of Choice}%
  \dindexsub{choice}{Axiom of C---}
  For every set $A$, there is a function $f\colon \pow A\to A$ such that
  $f(C)\in C$ whenever $C\neq0$.
\end{axiom}

It was proved by G\"odel that this axiom is consistent with our other axioms; it was proved by Paul Cohen~\cite{MR0232676} that the Axiom of Choice is not \emph{entailed} by our other axioms.

A function $f$ as in the Axiom of Choice is called a
\textbf{choice-function.}
\dindexsub{choice}{---{}-function}
\dindexsub{function}{choice-{}---}  

\begin{theorem}\label{thm:inf-vnn}
If $A$ is infinite, then $\vnn\injects A$.
\end{theorem}

\begin{proof}
Let $f$ be a choice-function for $A$, and define $h$ on $\pow A\setminus\{A\}$ by
\begin{equation*}
h(X)=f(A\setminus X).
\end{equation*}
Then strong recursion gives us the desired embedding of $\vnn$ in $A$, as suggested above.
\end{proof}

 Now we can prove the converse of Theorem~\ref{thm:no-subset}.

\begin{corollary}
Every infinite set is equipollent to a proper subset of itself.
\end{corollary}

\begin{proof}
If $A$ is infinite, we may assume $\vnn\included A$, and then we can define $f$ on $A$ by
\begin{equation*}
f(x)=\begin{cases}
x+1,&\text{ if }x\in\vnn,\\
x,&\text{ if }x\in A\setminus\vnn.
\end{cases}
\end{equation*}
This shows $A\equip A\setminus\{0\}$, a proper subset.
\end{proof}

Similarly, we have:

\begin{corollary}
  If $A$ is infinite, then $A\cup\{A\}\equip A$.
\end{corollary}

\begin{proof}
The claim is trivially true if $A\in A$; so suppose $A\notin A$, and
$f$ is an injection from $\vnn$ to $A$.  Define
  a function $g$ from $A\cup\{A\}$ to $A$ by:
  \begin{equation*}
      g(x)=
  \begin{cases}
    f(0),& \text{ if }x=A;\\
x,&\text{ if }x\in A\setminus f\setimb{\vnn};\\
f(f\inv(x)+1),&\text{ if }x\in f\setimb{\vnn}.
  \end{cases}
  \end{equation*}
Then $g$ is a bijection.
\end{proof}

The converse of this theorem is true,
a proper subset of $A\cup\{A\}$.  Suppose if possible that
$A=A\cup\{A\}$.  Then $A\in A$, which is very strange, and which is
ruled out by:

\begin{axiom}[Foundation]\dindexsub{axiom}{A--- of Foundation}
  \dindexsub{foundation}{Axiom of F---}
Every non-empty set $A$ has a subset that has no elements in common
with $A$:
  \begin{equation*}
      \exists X\qsep(X\in A\land X\cap A=\emptyset)
  \end{equation*}
for all non-empty sets $A$.  
\end{axiom}
Here, if we 
replace $A$ with $\{A\}$, then this set has the single element $A$, so
$A\cap\{A\}=\emptyset$, which means $A\notin A$.

\begin{theorem}\label{thm:set-ord}
Every set is equipollent with some ordinal.
\end{theorem}

\begin{proof}
Supposing $f$ is a choice-function for $A$, define $h$ as in the proof of Theorem~\ref{thm:inf-vnn}.  Let $\class C$ be the class of ordinals $\alpha$ for which there is a function $g$ from $\alpha$ to $A$ given by
\begin{equation*}
g(\beta)=h(g\setimb{\beta}).
\end{equation*}
Such a function $g$ is unique (by Strong Recursion) and can be denoted
by $g_{\beta}$.  Moreover, if $\beta$ and $\gamma$ are both in $\class
C$ and $\beta\leq\gamma$, then $g_{\beta}\included g_{\gamma}$.
Therefore, if $\class C$ has no upper bound in $\on$, then
$\bigcup_{\beta\in\class C}g_{\beta}$ is an embedding of $\on$ in $A$,
which is absurd, since $\on$ is a proper class.  Hence $\class C$ has
a least upper bound, say $\gamma$; then
$\bigcup_{\beta<\gamma}g_{\beta}$ is a bijection from $\gamma$ to
$A$. 
\end{proof}

This theorem lets us adapt the proof of the Completeness
Theorem,~\ref{thm:1-completeness}, to the case where $\lang$ is
uncountable: we just to index the sentences of $\lang$ by the ordinals
less than some ordinal, and then we can obtain the sets
$\Gamma_{\alpha}$ by transfinite recursion. 

Our main purpose now is to define $\size A$, the
\textbf{cardinality}\dindex{cardinality} of $A$, as the \emph{least}
ordinal that is equipollent with $A$.  Then the cardinalities are
well-ordered; in particular, we have: 

\begin{corollary}[Schr\"oder--Bernstein Theorem\footnote{This theorem is
      commonly attributed to Schr\"oder and Bernstein, who, according
  to \cite[p.~81]{MR83e:04002}, proved the theorem independently in
  the 1890s.  But the theorem is attributed to Cantor in \cite[\S~8.3,
  p.~171]{MR2001a:03072}.}]\label{thm:Sch--B}
   For all sets $A$
  and $B$,
  \begin{equation*}
  A\injects B\amp B\injects A\implies A\equip B.
  \end{equation*}
\end{corollary}

The \textbf{cardinals} or \textbf{cardinal numbers}%
\dindex{cardinal}%
\dindexsub{number}{cardinal ---} 
are the ordinals that are cardinalities of some set.  The cardinals compose the class
\begin{equation*}
\cn.
\end{equation*}
Most infinite ordinals are \emph{not} cardinals; but by strong recursion, there is an
order-preserving bijection 
\begin{equation*}
  \alpha\longmapsto\aleph_{\alpha}\glossary{$\aleph_{\alpha}$}
\end{equation*}
from the class of ordinals to the class of infinite cardinals.  
Here $\aleph$
is the Hebrew letter \emph{aleph.}\index{aleph} 
By definition,
$\aleph_{\beta}$ is the least infinite cardinal that is greater than each of the cardinals $\aleph_{\alpha}$ such that $\alpha<\beta$.  If $\beta$ is a limit ordinal, then by Theorem~\ref{thm:ord-sup}, $\aleph_{\beta}$ is just $\bigcup_{\alpha<\beta}\aleph_{\alpha}$; but if $\beta=\gamma+1$, then $\aleph_{\beta}$ is the least cardinal $\kappa$ such that
\begin{equation*}
\aleph_{\gamma}<\kappa\leq\size{\pow{\aleph_{\gamma}}}.
\end{equation*}

\begin{theorem}
$\R\equip\pow{\N}$.
\end{theorem}

\begin{proof}
By Theorem~\ref{thm:R-uncountable} and the Schr\"oder--Bernstein Theorem, it is enough to show that $[0,1)\injects\mapset{\Np}{\B}$, where $[0,1)=\{x\in\R\colon 0\leq x<1\}$.  Given an element $a$ of $[0,1)$, we can, by strong recursion, define a function $k\mapsto a_k$ from $\Np$ to $\B$ so that, for each $n$ in $\Np$,
\begin{equation*}
\sum_{k=1}^n\frac{a_k}{2^k}\leq a<\sum_{k=1}^n\frac{a_k}{2^k}+\frac1{2^n}.
\end{equation*}
The function $a\mapsto(a_k\colon k\in\Np)$ is injective.  Indeed, suppose $a$ and $b$ are distinct elements of $[0,1)$.  For some $n$ in $\Np$ we have
\begin{equation*}
\frac1{\size{a-b}}<2^n,
\end{equation*}
so $1/2^n<\size{a-b}$.
If $(a_1,\dots,a_{n-1})=(b_1,\dots,b_{n-1})$, then $a_n\neq b_n$.
\end{proof}

We know $\size{\R}=\aleph_{\alpha}$ for some nonzero ordinal $\alpha$;
but we do not know what $\alpha$ is.  The set $\R$ is called the
\textbf{continuum,}\dindex{C--- Hypothesis} and the statement that
$\size{\R}=\aleph_1$ is called the 
\textbf{Continuum Hypothesis.}%
\dindexsub{hypothesis}{Continuum H---}
G\"odel showed that there are models of the axioms of set-theory in
which the Continuum Hypothesis is true; Cohen, false. 

\subsection*{Exercises}
\begin{enumerate}
\renewcommand{\labelenumi}{\theenumi.}
\item
Prove that the union of two finite sets is finite, and if $A$
  and $B$ are 
  finite, then 
$\size{A\cup B}+\size{A\cap B}=\size A+\size B$.
\item
  If $A\injects\vnn$ and $B\equip\vnn$, show that $A\times
  B\equip\vnn$. 
\item
Show that, if $A\injects\vnn$, and $n\in \vnn$, then $A^n\injects N$.
\item
Show that, if $A\injects \vnn$, then $\bigcup_{n\in\vnn}A^n\injects N$.
\item
  Show that $\R$ is equipollent with the set of functions from $\N$ to
  $\N$. 
\item
  Show that $\R\times\R\equip\R$.
\item
A real number $\alpha$ is
\textbf{algebraic}\dindex{algebraic}\dindexsub{number}{algebraic ---} if
there is no positive 
integer $n$ for which there is an $n$-tuple $\tuple a$ of rational
numbers such that
\begin{equation*}
  \sum_{k<n}a_k\alpha^k+\alpha^n=0.
\end{equation*}
A real number that is not algebraic is
\textbf{transcendental.}\dindex{transcendental
  number}\dindexsub{number}{transcendental ---}  Show that there are
uncountably many transcendental numbers.
\end{enumerate}


\appendix

%\input{chapter-analytics.tex}

\chapter{Aristotle's \emph{Analytics}}\label{Aristotle}

{\it
Below is a translation from the first few pages of the
Aristotelian work called the \emph{Prior Analytics}.  Like all of
Aristotle's extant works, the text appears to consist of students'
lecture notes; perhaps these notes were never edited by Aristotle
himself.  

I only want to observe three features of the text:
\begin{compactenum}[1)]
  \item
the absence of any special notation;
\item
the definition of \textsl{proposition};
\item
the use of \emph{proofs}.
\end{compactenum}

The
translation here is mine, from the text in the Loeb edition \cite{Aristotle-LI}.  Some
of the wording is from the English translation by Tredennick in that edition, but there are deviations.  For example, where I have `proposition',
Tredennick has `premiss'.  The typography is
entirely my own, based on the conception of the text \emph{as}
lecture-notes; the Greek text indicates no special line-breaks.
Likewise, my English is highly abbreviated and `telegraphic', as is
the original Greek.

Here then is Aristotle:
}

\subsection*{}

First, to say what our study (\Gk{sk'eyis}) is \emph{about} and \emph{of}:
\begin{compactenum}[1)]
  \item
it is about demonstration (\Gk{>ap'odeixis}), and
\item
it is of demonstrative science (\Gk{>epist'hmh >apodeiktik\~h}).
\end{compactenum}
Next, to define:
\begin{compactenum}[1)]
  \item
\textsl{proposition} (\Gk{pr'otasis}),
\textsl{term} (\Gk{<'oros}), and
\textsl{syllogism} (\Gk{sullogism'os}), and 
\item
which kinds [of syllogism] are
\textsl{complete} (\Gk{t'eleios}) and
\textsl{incomplete} (\Gk{>atel'hs}).
\end{compactenum}
After these:
\begin{compactenum}[1)]\setcounter{enumi}{2}
\item
what it is for one thing \textsl{to be or not to be wholly} (\Gk{t`o >en
  <'olw| e\char0218nai >`h m`h e\char0218nai}) in another, and
\item
what we mean by \textsl{being predicated} (\Gk{kathgore\~isjai}) of all
or of none.
\end{compactenum}
A \textbf{proposition} is a statement affirming (\Gk{katafatik'os}) or
denying (\Gk{>apofatik'os}) something of something.  It is 
\textsl{universal} (\Gk{kaj'olou}),
\textsl{particular} (\Gk{>en merei}), or
\textsl{indefinite} (\Gk{>adi'oristos}).
\begin{compactenum}[1)]
  \item
By \textbf{universal}, I mean applying (\Gk{<up'arqein}) to all or none;
\item
by \textbf{particular}, applying to some, or not to some, or not
to all;
\item
by \textbf{indefinite}, applying or not applying, without reference to
whole or part, as in
  `The same science studies contraries' or
  `Pleasure is not good.'
\end{compactenum}

[\emph{I skip some further discussion of propositions.}]

A \textbf{term} is what a proposition is divided into, namely
\begin{compactenum}[1)]
  \item
that which is predicated, and
\item
that of which it is predicated,
\end{compactenum}
[a form of] \Eng{to be} or \Eng{not to be} being added or removed.

A \textbf{syllogism} is a `piece of language' (\Gk{l'ogos}) in which,
some things being assumed (\Gk{tej'entwn tin\~wn}), because of these
(\Gk{t\char0196\ ta\~uta e\char0218nai}), 
something different from what was laid down (\Gk{ta keim'ena})
necessarily follows.  By saying:
\begin{compactenum}[1)]
  \item
`because of these', I mean it follows \emph{through} these
(\Gk{di`a ta\~uta});
\item
`it follows through these', no additional term is needed for the
  necessity to come about.
\end{compactenum}

I call a syllogism:
\begin{compactenum}[1)]
  \item
\textbf{complete}, if it needs nothing else, apart from what it
     [already] contains, for the necessary [conclusion] to be
     evident;
\item
\textbf{incomplete}, if it needs one or more [propositions] not included
among the [given] propositions, although they are necessary through
the terms that have been laid down.
\end{compactenum}

These are the same:
\begin{compactenum}[1)]
  \item
for \emph{this} \textbf{to be wholly} in \emph{that};
\item
for \emph{that} to be predicated of all of \emph{this}.
\end{compactenum}
We say that [\emph{that} is] predicated of all [of \emph{this}] when
nothing of \emph{this} can be taken of which \emph{that} cannot be
said.  Similarly if [\emph{that}] is predicated of \emph{none} [of
  \emph{this}]. 

Now, every proposition is 
\begin{compactenum}[1)]
  \item
an application (\Gk{<up'arqein}), or 
\item
a \emph{necessary} (\Gk{>ex >an'ankhs}) application, or
\item
a \emph{potential} (\Gk{to\~u >end'eqesjai}) application.  
\end{compactenum}
Of these, 
\begin{compactenum}[1)]
  \item
some are affirmative (\Gk{katafatik'os}), 
\item
some negative (\Gk{>apofatik'os}), 
\end{compactenum}
according to each application.

Again, of the affirmative and negative, some are universal, some
particular, some indefinite.

A universal
\begin{compactenum}[1)]
  \item
\emph{negative} (\Gk{sterhtik'os}) application is
necessarily convertible (\Gk{>anti\-str'efein}) in terms; for example,
if no pleasure is a good thing, then no good thing is a pleasure;
\item
\emph{affirmative} (\Gk{kathgorik'os}) is necessarily convertible, not
universally, but particularly.  For example, if every pleasure is
good, then some good is a pleasure.
\end{compactenum}

Of the particular:
\begin{compactenum}[1)]
  \item
the \emph{affirmative} is necessarily convertible particularly; for, if some
pleasure is good, then some good will be a pleasure;
\item
the \emph{negative,} not necessarily; for it does not follow that, if
\Eng{man} does not apply to some animal, then \Eng{animal} does not
apply to some man.
\end{compactenum}

First, let the proposition $AB$ be negative universal.  If then $A$
applies to nothing of $B$, then $B$ will apply to nothing of $A$.  For
if to something, say $C$, then it will not be true that $A$ applies to
nothing of $B$, for $C$ is of $B$.

If $A$ applies to all $B$, then $B$ applies to some $A$.  For if not,
then $A$ will apply to no $B$; but it was supposed to apply to all.

Similarly if the proposition is particular:

If $A$ to some of $B$, then $B$ to some of $A$ necessarily applies;
for if not, then $A$ to nothing of $B$.

But if
some of $B$ does not apply to $A$, there is no necessity that some of $A$
should not be $B$. For example, suppose $B$ is animal and $A$ is
man; \Eng{man} not to every animal, but \Eng{animal} to every man applies.   


%\addcontentsline{toc}{chapter}{Bibliography}
%\bibliographystyle{plain}% \label{bibliog}
%\bibliography{../../../../../TeX/references}
%\bibliography{../../references}
%\bibliography{references}

\def\cprime{$'$} \def\cprime{$'$}
\begin{thebibliography}{10}

\bibitem{Metafizik}
Aristoteles.
\newblock {\em Metaf{}izik}.
\newblock Sosyal Yay\i nlar, Ca\u galo\u glu--{\.I}stanbul, 1996.
\newblock Second printing. {T}urkish translation by Ahmet Arslan.

\bibitem{Aristotle-LI}
Aristotle.
\newblock {\em Categories, On Interpretation, and Prior Analytics}, volume 325
  of {\em Loeb Classical Library}.
\newblock Harvard University Press and William Heinemann Ltd, Cambridge,
  Massachusetts and London, 1973.
\newblock with an English translation by H. P. Cooke and H. Tredennick.

\bibitem{Aristotle-XVII}
Aristotle.
\newblock {\em The Metaphysics, Books I--IX}, volume XVII of {\em Loeb
  Classical Library}.
\newblock Harvard University Press and William Heinemann Ltd., Cambridge,
  Massachusetts, and London, 1980.
\newblock with an English translation by Hugh Tredennick. First printed 1933.

\bibitem{Boole}
George Boole.
\newblock {\em Collected Logical Works. {V}olume {II}: The Laws of Thought}.
\newblock The Open Court Publishing Company, Chicago and London, 1940.
\newblock First published 1854. With a note by Philip E.B. Jourdain.

\bibitem{Burali-Forti}
Cesare Burali-Forti.
\newblock A question on transfinite numbers (1897).
\newblock In Jean van Heijenoort, editor, {\em From Frege to G{\"o}del}, pages
  104--12. Harvard University Press, 1976.

\bibitem{Burris}
Stanley~N. Burris.
\newblock {\em Logic for Mathematics and Computer Science}.
\newblock Prentice Hall, Upper Saddle River, New Jersey, USA, 1998.

\bibitem{MR0409165}
C.~C. Chang and H.~J. Keisler.
\newblock {\em Model theory}.
\newblock North-Holland Publishing Co., Amsterdam, 1973.
\newblock Studies in Logic and the Foundations of Mathematics, Vol. 73.

\bibitem{MR2319486}
Ian Chiswell and Wilfrid Hodges.
\newblock {\em Mathematical logic}, volume~3 of {\em Oxford Texts in Logic}.
\newblock Oxford University Press, Oxford, 2007.

\bibitem{MR18:631a}
Alonzo Church.
\newblock {\em Introduction to mathematical logic. {V}ol. {I}}.
\newblock Princeton University Press, Princeton, N.~J., 1956.

\bibitem{MR0232676}
Paul~J. Cohen.
\newblock {\em Set theory and the continuum hypothesis}.
\newblock W. A. Benjamin, Inc., New York-Amsterdam, 1966.

\bibitem{Collingwood-Auto}
R.~G. Collingwood.
\newblock {\em An Autobiography}.
\newblock Clarendon Press, Oxford, 1978.
\newblock With a new introduction by Stephen Toulmin; originally written 1938;
  reprinted 2002.

\bibitem{MR0159773}
Richard Dedekind.
\newblock {\em Essays on the theory of numbers. {I}: {C}ontinuity and
  irrational numbers. {II}: {T}he nature and meaning of numbers}.
\newblock authorized translation by Wooster Woodruff Beman. Dover Publications
  Inc., New York, 1963.

\bibitem{Descartes-Geometry}
Descartes.
\newblock {\em The Geometry of {R}en{\'e} {D}escartes}.
\newblock Dover Publications, Inc., New York, 1954.
\newblock Translated from the French and Latin by David Eugene Smith and Marcia
  L. Latham, with a facsimile of the first edition of 1637.

\bibitem{Descartes-Med}
Ren{\'e} Descartes.
\newblock {\em Meditations on First Philosophy}.
\newblock Hackett, Indianapolis, 1979.
\newblock translated from the Latin by Donald A. Cress.

\bibitem{Descartes-Med-Tur}
Ren{\'e} Descartes.
\newblock {\em S{\"o}ylem, Kurallar, Meditasyonlar}.
\newblock {\.I}dea, {\.I}stanbul, 1996.
\newblock translated by Aziz Yard{\i}ml{\i}.

\bibitem{Donne}
John Donne.
\newblock {\em The Complete Poetry and Selected Prose of {J}ohn {D}onne}.
\newblock The Modern Library, New York, 1952.
\newblock Edited with an introduction by {C}harles {M}. {C}offin.

\bibitem{Dries-Mosch}
Lou van~den Dries and Yiannis~N. Moschovakis.
\newblock Is the {E}uclidean algorithm optimal among its peers?
\newblock {\em Bulletin of Symbolic Logic}, 10(3):390--418, September 2004.

\bibitem{Epp}
Susanna~S. Epp.
\newblock {\em Discrete Mathematics with Applications}.
\newblock PWS Publishing Company, Boston, Massachusetts, USA, 1995.
\newblock 2nd edition.

\bibitem{MR17:814b}
Euclid.
\newblock {\em The thirteen books of {E}uclid's {E}lements translated from the
  text of {H}eiberg. {V}ol. {I}: {I}ntroduction and {B}ooks {I}, {I}{I}. {V}ol.
  {I}{I}: {B}ooks {I}{I}{I}--{I}{X}. {V}ol. {I}{I}{I}: {B}ooks
  {X}--{X}{I}{I}{I} and {A}ppendix}.
\newblock Dover Publications Inc., New York, 1956.
\newblock Translated with introduction and commentary by Thomas L. Heath, 2nd
  ed.

\bibitem{MR2095748}
Anita~Burdman Feferman and Solomon Feferman.
\newblock {\em Alfred {T}arski: life and logic}.
\newblock Cambridge University Press, Cambridge, 2004.

\bibitem{MEU2}
H.~W. Fowler.
\newblock {\em A Dictionary of Modern {E}nglish Usage}.
\newblock Oxford University Press, second edition, 1982.
\newblock revised and edited by Ernest Gowers.

\bibitem{MEU}
H.~W. Fowler.
\newblock {\em A Dictionary of Modern {E}nglish Usage}.
\newblock Wordsworth Editions, Ware, Hertfordshire, UK, 1994.
\newblock reprint of the original 1926 edition.

\bibitem{MR0453532}
Paul~R. Halmos.
\newblock {\em Naive set theory}.
\newblock Springer-Verlag, New York, 1974.
\newblock Reprint of the 1960 edition, Undergraduate Texts in Mathematics.

\bibitem{MR92j:01070}
G.~H. Hardy.
\newblock {\em A mathematician's apology}.
\newblock Cambridge University Press, Cambridge, 1992.
\newblock With a foreword by C. P. Snow, Reprint of the 1967 edition.

\bibitem{Heffner}
Richard~D. Heffner.
\newblock {\em A Documentary History of the United States}.
\newblock New American Library, New York, 3rd edition, 1976.
\newblock Expanded and Revised Bicentennial Edition.

\bibitem{MR0033781}
Leon Henkin.
\newblock The completeness of the first-order functional calculus.
\newblock {\em J. Symbolic Logic}, 14:159--166, 1949.

\bibitem{MR94e:03002}
Wilfrid Hodges.
\newblock {\em Model theory}, volume~42 of {\em Encyclopedia of Mathematics and
  its Applications}.
\newblock Cambridge University Press, Cambridge, 1993.

\bibitem{MR1215482}
Jacob Klein.
\newblock {\em Greek mathematical thought and the origin of algebra}.
\newblock Dover Publications Inc., New York, 1992.
\newblock Translated from the German and with notes by Eva Brann, Reprint of
  the 1968 English translation.

\bibitem{MR0472307}
Morris Kline.
\newblock {\em Mathematical thought from ancient to modern times}.
\newblock Oxford University Press, New York, 1972.

\bibitem{Knuth}
Donald~E. Knuth.
\newblock {\em The \TeX book}, volume~A of {\em Computers \&\ Typesetting}.
\newblock Addison Wesley Publishing Company, Reading, Massachusetts, USA, June
  1986.
\newblock Seventh printing.

\bibitem{MR12:397m}
Edmund Landau.
\newblock {\em Foundations of Analysis. {T}he Arithmetic of Whole, Rational,
  Irrational and Complex Numbers}.
\newblock Chelsea Publishing Company, New York, N.Y., third edition, 1966.
\newblock translated by F. Steinhardt; first edition 1951; first German
  publication, 1929.

\bibitem{MR1924429}
Azriel Levy.
\newblock {\em Basic set theory}.
\newblock Dover Publications Inc., Mineola, NY, 2002.
\newblock Reprint of the 1979 original [Springer, Berlin].

\bibitem{MR1924282}
David Marker.
\newblock {\em Model theory: an introduction}, volume 217 of {\em Graduate
  Texts in Mathematics}.
\newblock Springer-Verlag, New York, 2002.

\bibitem{OED}
Murray et~al., editors.
\newblock {\em The Compact Edition of the {O}xford {E}nglish {D}ictionary}.
\newblock Oxford University Press, 1973.

\bibitem{Nesin}
Ali Nesin.
\newblock {\em {\"O}nermeler Mant{\i}{\u g}{\i}}.
\newblock {\.I}stanbul Bilgi {\"U}niversitesi Yay{\i}nlar{\i}, 2001.

\bibitem{LatinDili}
Filiz {\"O}ktem.
\newblock {\em Uygulamal{\i} {L}atin Dili [{P}ractical {L}atin Grammar]}.
\newblock Sosyal Yay{\i}nlar, {\.I}stanbul, 1996.

\bibitem{Peano}
Giuseppe Peano.
\newblock The principles of arithmetic, presented by a new method (1889).
\newblock In Jean van Heijenoort, editor, {\em From {F}rege to {G}{\"o}del},
  pages 83--97. Harvard University Press, 1976.

\bibitem{MR2001a:03072}
Bruno Poizat.
\newblock {\em A course in model theory}.
\newblock Universitext. Springer-Verlag, New York, 2000.
\newblock An introduction to contemporary mathematical logic, Translated from
  the French by Moses Klein and revised by the author.

\bibitem{Post}
Emil~L. Post.
\newblock Introduction to a general theory of elementary propositions.
\newblock {\em Amer. J. Math.}, 43(3):163--185, July 1921.

\bibitem{ZFZB}
Paul Reps and Nyogen Senzaki, editors.
\newblock {\em Zen Flesh, {Z}en Bones}.
\newblock Shambala, Boston, 1994.
\newblock A Collection of {Z}en and Pre-{Z}en Writings.

\bibitem{01461249}
Kenneth~A. Ross and Charles~R.B. Wright.
\newblock {\em {Discrete mathematics. 4th ed.}}
\newblock {Upper Saddle River, NJ: Prentice Hall. xiv, 684 p. \$ 113.04 },
  1999.

\bibitem{MR1800596}
Philipp Rothmaler.
\newblock {\em Introduction to model theory}, volume~15 of {\em Algebra, Logic
  and Applications}.
\newblock Gordon and Breach Science Publishers, Amsterdam, 2000.
\newblock prepared by Frank Reitmaier, translated and revised from the 1995
  German original by the author.

\bibitem{Rotman}
Joseph~J. Rotman.
\newblock {\em A First Course in Abstract Algebra}.
\newblock Prentice Hall Inc., Upper Saddle River, NJ, 2 edition, 2000.

\bibitem{Russell-letter}
Bertrand Russell.
\newblock Letter to {F}rege (1902).
\newblock In Jean van Heijenoort, editor, {\em From Frege to G{\"o}del}, pages
  124--5. Harvard University Press, 1976.

\bibitem{MR817210}
Joseph~H. Silverman.
\newblock {\em The arithmetic of elliptic curves}, volume 106 of {\em Graduate
  Texts in Mathematics}.
\newblock Springer-Verlag, New York, 1986.

\bibitem{S&G}
{\em {S}imon and {G}arfunkel}.
\newblock \"{O}yk\"u Yay\i nc\i l\i k, Sultanahmet, \.{I}stanbul, 1987.
\newblock Words and music of songs of Paul Simon and Art Garfunkel; Turkish
  translation by Devrim Eker.

\bibitem{0458.26001}
Michael Spivak.
\newblock {\em {Calculus. 2nd ed.}}
\newblock {Berkeley, California: Publish or Perish, Inc. XIII, 647 pp.}, 1980.

\bibitem{MR83e:04002}
Robert~R. Stoll.
\newblock {\em Set theory and logic}.
\newblock Dover Publications Inc., New York, 1979.
\newblock corrected reprint of the 1963 edition.

\bibitem{MR0349389}
Patrick Suppes.
\newblock {\em Axiomatic set theory}.
\newblock Dover Publications Inc., New York, 1972.
\newblock Unabridged and corrected republication of the 1960 original with a
  new preface and a new section (8.4).

\bibitem{COD6}
J.~B. Sykes, editor.
\newblock {\em The Concise {O}xford Dictionary of Current English}.
\newblock Clarendon Press, Oxford, sixth edition, 1976.
\newblock Based on the Oxford English Dictionary and its Supplements. First
  edited by {H}. {W}. {F}owler and {F}. {G}. {F}owler.

\bibitem{Tarski-T&P}
Alfred Tarski.
\newblock Truth and proof.
\newblock {\em Scientific American}, pages 63--77, 1969.

\bibitem{Tarski-Intro}
Alfred Tarski.
\newblock {\em Introduction to Logic and to the Methodology of Deductive
  Sciences}.
\newblock Dover, 1995.
\newblock An unabridged republication of the 9th printing, 1961, of the 1946
  second, revised edition of the work originally published by Oxford University
  Press, New York, in 1941.

\bibitem{MR13:419a}
Ivor Thomas, editor.
\newblock {\em Selections illustrating the history of {G}reek mathematics.
  {V}ol. {I}. {F}rom {T}hales to {E}uclid}.
\newblock Harvard University Press, Cambridge, Mass., 1951.
\newblock With an English translation by the editor.

\bibitem{MR13:419b}
Ivor Thomas, editor.
\newblock {\em Selections illustrating the history of {G}reek mathematics.
  {V}ol. {II}. {F}rom {A}ristarchus to {P}appus}.
\newblock Harvard University Press, Cambridge, Mass, 1951.
\newblock With an English translation by the editor.

\bibitem{MR0209111}
Jean van Heijenoort.
\newblock {\em From {F}rege to {G}\"odel. {A} source book in mathematical
  logic, 1879--1931}.
\newblock Harvard University Press, Cambridge, Mass., 1967.

\bibitem{MR0263601}
Jean van Heijenoort, editor.
\newblock {\em Frege and {G}\"odel. {T}wo fundamental texts in mathematical
  logic}.
\newblock Harvard University Press, Cambridge, Mass., 1970.

\bibitem{von-Neumann}
John von Neumann.
\newblock On the introduction of transfinite numbers (1923).
\newblock In Jean van Heijenoort, editor, {\em From {F}rege to {G}{\"o}del},
  pages 346--354. Harvard University Press, 1976.

\bibitem{MR734177}
Andr{\'e} Weil.
\newblock {\em Number theory}.
\newblock Birkh\"auser Boston Inc., Boston, MA, 1984.
\newblock An approach through history, From Hammurapi to Legendre.

\bibitem{Zinn}
Howard Zinn.
\newblock {\em A People's History of the United States: 1492--Present}.
\newblock Harper Collins, New York, 2nd edition, 1995.

\end{thebibliography}


\addchap{Symbols}
\label{symbols}
%\addcontentsline{toc}{chapter}{Symbols}
%\input{notes.glo}

%\begin{multicols}{2}
\begin{longtable}{c r}
\input{math-111-notes.glo}
\end{longtable}
%\end{multicols}

%\input{symbols}

\label{index}
\printindex

\end{document}
