\documentclass[%
version=last,%
a5paper,
10pt,%
headings=small,%
bibliography=totoc,%
index=totoc,%o
twoside,%
leqno,%
cleardoublepage=empty,%
%open=any,%
parskip=half,%
%draft=false,%
draft=true,%
DIV=classic,%
%DIV=12,%
headinclude=true,%
pagesize]%{scrreprt}
{scrartcl}

%\usepackage[notref,notcite]{showkeys}
\renewcommand{\theequation}{\fnsymbol{equation}}

\usepackage[headsepline]{scrpage2}
\pagestyle{scrheadings}
\clearscrheadings
\ohead{\pagemark}
\ihead{L\"owenheim--Skolem Theorem}

\usepackage{hfoldsty,verbatim}
\usepackage[neverdecrease]{paralist}
\usepackage{url}


\usepackage{relsize} % Here \smaller scales by 1/1.2; \relscale{X} scales by X

\renewenvironment{quote}{\begin{list}{}
{\relscale{.90}\setlength{\leftmargin}{0.05\textwidth}
\setlength{\rightmargin}{\leftmargin}}
\item[]}
{\end{list}}


\usepackage{amsmath,amsthm,amssymb,bm,upgreek,mathrsfs}
%\usepackage[all]{xy}

\usepackage{stmaryrd} % for \mapsfrom

\newcommand{\Forall}[1]{\forall#1\;}
\newcommand{\Exists}[1]{\exists#1\;}
\let\oldvec\vec
\renewcommand{\vec}[1]{\bm{#1}}

\usepackage[mathcal]{euscript}
\newcommand{\tf}[1]{\mathcal{#1}} % ``Tarski's font''

\newcommand{\F}{\mathbf F}
\newcommand{\EF}{\mathbf{EF}}
\newcommand{\AF}{\mathbf{AF}}
\newcommand{\AC}[1][]{\mathbf{AC}_{#1}}
\newcommand{\AT}{\mathbf{AT}}
\newcommand{\ACL}{\mathbf{ACL}}

\DeclareMathOperator{\spectrum}{Spec}
\newcommand{\spec}[1]{\spectrum(#1)}

\newcommand{\conc}{\mathbin{\hat{\ }}}
\newcommand{\sig}{\mathscr S}
\newcommand{\str}[1]{\mathfrak{#1}}
\newcommand{\size}[1]{\lvert#1\rvert}
\DeclareMathOperator{\setMod}{Mod}
\newcommand{\sMod}[1]{\setMod(#1)}
\newcommand{\Mod}[2][]{\mathbf{Mod}_{#1}(#2)}
\newcommand{\Moda}[1]{\mathbf{Mod}^*(#1)}
\newcommand{\Str}{\mathbf{Str}}
\DeclareMathOperator{\sentences}{Sen}
\newcommand{\Sen}{\sentences}
\newcommand{\modsim}{/\mathord{\sim}}
\newcommand{\pow}[1]{\mathscr P(#1)}
\newcommand{\inv}{{}^{-1}}

\newcommand{\Cat}[1]{\bm{\mathsf{#1}}}
\newcommand{\Top}{\Cat{Top}}
\newcommand{\Frm}{\Cat{Frm}}
\newcommand{\Loc}{\Cat{Loc}}
\newcommand{\Grp}{\Cat{Grp}}
\newcommand{\Set}{\Cat{Set}}
\DeclareMathOperator{\domain}{dom}
\newcommand{\dom}[1]{\domain(#1)}
\DeclareMathOperator{\points}{pt}
\newcommand{\pt}[1]{\points(#1)}
\DeclareMathOperator{\homoms}{Hom}
\newcommand{\Hom}[1]{\homoms(#1)}
\newcommand{\bel}[1]{\left[#1\right]}

\newcommand{\Sn}[1][\sig]{\sentences_{\sig}}
\newcommand{\Sna}[1][\sig]{\sentences^*_{\sig}}
\DeclareMathOperator{\formulas}{Fm}
\newcommand{\Fm}[1][\sig]{\formulas_{#1}}
\DeclareMathOperator{\freevariables}{fv}
\newcommand{\fv}[1]{\freevariables(#1)}
\DeclareMathOperator{\variables}{v}
\newcommand{\vr}[1]{\variables(#1)}
\DeclareMathOperator{\theory}{Th}
\newcommand{\Th}[1]{\theory(#1)}
\newcommand{\Tha}[1]{\theory^*(#1)}
\renewcommand{\emptyset}{\varnothing}
\renewcommand{\setminus}{\smallsetminus}
\renewcommand{\leq}{\leqslant}
\renewcommand{\nleq}{\nleqslant}
\renewcommand{\geq}{\geqslant}
\renewcommand{\phi}{\varphi}
%\newcommand{\Or}{\;\lor\;}
\newcommand{\Or}{\;\mathrel{\text{\textsc{or}}}\;}
\renewcommand{\models}{\vDash}
\newcommand{\nmodels}{\nvDash}
\newcommand{\included}{\subseteq}
\newcommand{\includes}{\supseteq}
\newcommand{\pincluded}{\subset}
\newcommand{\liff}{\Leftrightarrow}
\newcommand{\lto}{\Rightarrow}


\newtheorem{theorem}{Theorem}
\newtheorem{definition}[theorem]{Definition}

\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{lemma}{Lemma}

\theoremstyle{definition}

\newtheorem{example}[theorem]{Example}

\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}

\begin{document}

\title{L\"ownheim--Skolem Theorem}
\author{David Pierce}
\date{September 17, 2014}
\publishers{Mimar Sinan Fine Arts University\\
%Istanbul\\
\url{http://mat.msgsu.edu.tr/~dpierce/}}
\maketitle

These notes are part of a general investigation of the Compactness Theorem.
They are in response to remarks of Dawson \cite{MR1212899}.
Dawson distinguishes and names four theorems
(the typography is mine, the words are Dawson's):
\begin{quote}
  \begin{description}
  \item[The completeness theorem:] Every valid sentence is provable. 
\item[The Skolem--Godel theorem:] 
A set of sentences is (syntactically) consistent 
if and only if it is satisfiable in some model. 
\item[The compactness theorem:] 
A set of sentences is satisfiable 
if and only if every finite subset of it is satisfiable 
(for short: if and only if the set of sentences is finitely satisfiable). 
\item[The (downward) Lowenheim--Skolem theorem:] 
If a set of sentences is satisfiable at all, 
it is satisfiable in a structure whose cardinality is at most
that of the number of symbols in the underlying language.
\end{description}
\end{quote}
The first three theorems are, respectively, Theorems I, IX, and X
of G\"odel \cite{Goedel-compl},
except that the sets of sentences that G\"odel considers are countable.
Concerning these theorems, Dawson writes:
\begin{quote}
Godel's proofs employed Skolem's methods; 
but, unlike Skolem, 
Godel carefully distinguished between syntactic and semantic notions. 
The relation between the works of the two men 
has been examined by Vaught (\emph{1974,} 157--159) 
and, in great detail, by van Heijenoort and Dreben 
\cite{van-Heijenoort-Dreben-note}\nocite{van-Heijenoort-Dreben-note}. 
All three commentators agree 
that both the completeness and compactness theorems 
were implicit in Skolem \cite{Skolem-some-remarks}, 
but that no one before Godel drew them as conclusions, 
not even \emph{after} Hilbert and Ackermann, 
in their 1928 book \emph{Grundz\"uge der theoretischen Logik}[,]
singled out first-order logic for attention 
and explicitly posed the question of its completeness. 
\end{quote}
How is completeness implicit in Skolem's 1923 paper?%%%%%
\footnote{The paper is labelled with the year of 1922 in \cite{MR1890980},
presumably because the paper is based on an address 
to the Fifth Congress of Scandinavian Mathematicians, 
Helsinki, 4--7 August 1922.
But publication was apparently not until 1923,
as the bibliography of \cite{MR1890980} indicates.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section*{The 1920 paper}

I start with Skolem's 1920 paper \cite{Skolem-LS}.
Right away Skolem refers to L\"owenheim's use of the term
\emph{first-order expressions.}
Skolem will prefer \emph{first-order proposition.}
At the beginning he is not clear about free variables:
are we talking about \emph{sentences,} 
or arbitrary \emph{formulas} in our sense?
But the meaning of \emph{first order} is the one that has come down to us,
though the terminology used for describing the meaning is different:
\begin{quote}
  By a first-order expression L\"owenheim understands
an expression constructed from relative coefficients
by means of the five fundamental logical operations,
namely, in Schr\"oder's terminology,
identical multiplication and addition, negation, productation, and summation.
The five operations mentioned are denoted by a dot
(or simply juxtaposition), the sign $+$, the bar $\overline{\phantom x}$,
and the signs $\mathit{\Pi}$ and $\mathit{\Sigma}$, respectively.
\end{quote}
The examples that Skolem goes on to give allow construction of a dictionary:
\begin{equation*}
  \begin{array}{c|c|c|c|c|c}
    .&+&\overline{\phantom x}&\mathit{\Pi}&\mathit{\Sigma}
&\mathit{\Pi}_x\mathit{\Sigma}_yR_{xy}\\\hline
\land&\lor&\lnot&\forall&\exists
&\Forall x\Exists yRxy
  \end{array}
\end{equation*}
Skolem's \textbf{normal form}
is our \emph{Skolem normal form:} an $\forall\exists$ formula.
I quote his theorems verbatim.

\begin{theorem}
  If $U$ is an arbitrary first-order proposition,
there exists a first-order proposition $U'$ in normal form
with the property that $U$ is satisfiable in a given domain whenever $U'$ is,
and conversely.
\end{theorem}

\begin{theorem}
  Every proposition in normal form either is a contradiction
or is already satisfiable in a finite or denumerably infinite domain.
\end{theorem}

To prove this, we start with the simplest nontrivial case,
a sentence $\Forall x\Exists y\phi(x,y)$,
where $\phi$ is an open (quantifier-free) formula.
We assume the sentence has a model $\str B$.
(Skolem says, ``this proposition is satisfied in a given domain
for certain values of the relatives.'')

Using the Axiom of Choice (Skolem says ``principle of choice''),
we obtain an operation $x\mapsto x'$ on $B$
such that
\begin{equation*}
\str B\models\Forall x\phi(x,x').
\end{equation*}
Let $a\in B$, 
and let $A$ be the intersection of the collection of all subsets of $B$ 
that contain $a$ and are closed under $x\mapsto x'$.
By ``Dedekind's theory of chains in \emph{1888}'' \cite{MR0159773},
$A$ is countable, and
\begin{align*}
  \str A&\models\Forall x\phi(x,x'),&\str A&\models\Forall x\Exists y\phi(x,y).
\end{align*}
More generally, we may have a sentence
\begin{equation}\label{eqn:SNF}
  \forall{x_0}\cdots\Forall{x_{m-1}}\exists{y_0}\cdots\Exists{y_{n-1}}
\phi(x_0,\dots,x_{m-1},y_0,\dots,y_{n-1}),
\end{equation}
again assumed to have a model $\str B$.
Again by the Axiom of Choice we obtain a function $\vec x\mapsto\vec x'$
from $B^m$ to $B^n$ such that
\begin{equation*}
  \str B\models\Forall{\vec x}\phi(\vec x,\vec x').
\end{equation*}
(Skolem's indices in tuples start with $1$, 
and there is no notation for sets of tuples of given length.)
Again if we start with $a$ and close under $\vec x\mapsto\vec x'$,
we obtain a countable set $A$ such that 
$\str A\models\Forall{\vec x}\Exists{\vec y}\phi(\vec x,\vec y)$.
Skolem takes to lemmas to show that $A$ is countable.

\begin{quote}
  Theorem 2 admits generalizations of high order.
Thus, it is not difficult to prove the following:
Either it is contradictory to suppose
that a simply infinite sequence of first-order propositions in normal form
is simultaneously satisfiable
or the sequence is already simultaneously satisfiable
in a denumerably infinite domain.
\end{quote}
Skolem gives the proof for sentences in normal form
in which only two variables occur,
and he considers the ``logical product'' or ``propositional product''
\begin{equation*}
  \mathit{\Pi}_{x_1}\mathit{\Sigma_{y_1}}U^1_{x_1y_1}
  \mathit{\Pi}_{x_2}\mathit{\Sigma_{y_2}}U^2_{x_2y_2}
\cdots\text{ad infinitum}
\end{equation*}
The formal statement is,
\begin{theorem}
  If a proposition can be represented
as a product of a denumerable set of first-order propositions,
it either is a contradiction
or is already satisfiable in a denumerable domain.
\end{theorem}
Skolem continues in this vein, 
with infinite ``sums'' (disjunctions),
and then repeated taking of sums and products.
He also considers propositions
\begin{equation*}
    \mathit{\Pi}_{x_1}\mathit{\Pi}_{x_2}\cdots\mathit{\Pi}_{x_m}
\mathit{\Sigma_{y_1}}\mathit{\Sigma_{y_2}}
\cdots\text{ad inf. }
U_{x_1\cdots x_my_1y_2\cdots\text{ad inf.}}
\end{equation*}
and countably infinite products of these (Theorem 8).
There are no examples or concluding remarks.

\section*{The 1922 paper}

Skolem's 1922 paper \cite{Skolem-some-remarks}
addresses eight points about Zermelo's set-theory.
First,
\begin{quote}
  If we adopt Zermelo's axiomatization,
we must, strictly speaking,
have a general notion of domains in order to be able to provide
a foundation for set theory.
The entire content of this theory is, after all, as follows:
for every domain in which the axioms hold,
the further theorems of set theory also hold.
But clearly it is somehow circular
to reduce the notion of set to a general notion of domain.
\end{quote}
The basic concern seems to be that you cannot do set theory
without already knowing what sets are.
He seems to be mistaken:
We start out with a notion of (as I say) \emph{collection,}
and then in particular \emph{class}:
a ``domain'' for set theory
is a class, not a set.

Skolem's second point is that Zermelo is deficient
for not defining ``definite proposition.''
The allusion is to
\begin{quote}
  \textsc{Axiom III.}
(Axiom of separation.)
Whenever the propositional function $\mathfrak E(x)$ is definite
for all elements of a set $M$,
$M$ possesses a subset $M_{\mathfrak E}$
containing as elements precisely those elements $x$ of $M$
for which $\mathfrak E(x)$ is true.\hfill\cite[p.~202]{Zermelo-invest}
\end{quote}
Here $\mathfrak E(x)$ ought to be,
in our terms, 
a first-order proposition
in the signature $\{\in\}$ (with equality).

Skolem continues with the \textbf{Skolem paradox:}
\begin{quote}
  This third point is the most important:
\emph{If the axioms are consistent,
there exists a domain $B$ in which the axioms hold
and whose elements can all be enumerated
by means of the positive finite integers.}

\dots So far as I know,
no one has called attention 
to this peculiar and apparently paradoxical state of affairs.
\end{quote}
Skolem immediately resolves the paradox.
Meanwhile, he has proved it
using L\"owenheim's theorem,
proved now without the Axiom of Choice:
We are again considering a satisfiable sentence in Skolem normal form
as in \eqref{eqn:SNF}.
There are some ``solutions''---models---of the sentence
\begin{equation*}
  \Exists{\vec y}\phi(0,\dots,0,\vec y).
\end{equation*}
Moreover, some such model will have the universe $1+n$,
that is, $\{0,\dots,n\}$,
because this set
is just large enough to allow the $y_j$ to be distinct from one another
and from $0$.
We designate each such model by $L_{j,1}$ for some $j$.
Then there are models $L_{j,2}$ of
\begin{equation*}
  \bigwedge_{\vec x\in{}^m(1+n)}\phi(\vec x,\vec y)
\end{equation*}
that have universe $1+n+n\cdot(n+1)^m$.
We continue thus: the $L_{j,3}$ will be models of
\begin{equation*}
  \bigwedge_{\vec x\in{}^m(1+n+n\cdot(n+1)^m)}\phi(\vec x,\vec y)
\end{equation*}
with universe $1+n+n\cdot(n+1)^m+n\cdot(1+n+n\cdot(n+1)^m)^m$, and so on.

Let the universe of $L_{j,n}$ be $A_n$.  Thus
\begin{align*}
  A_0&=1,&A_{k+1}&=A_k+n\cdot A_k{}^m,
\end{align*}
but this is of little importance.
Now we order the $L_{j,n}$ for fixed $n$.
First we order linearly the relation symbols occurring in $\phi$.%%%%%
\footnote{At one point Skolem calls these,
``class and relation symbols
(class and relative coefficients in the sense of Schr\"oder)''.}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Then $L<L'$ if and only if,
when $m$ is minimal such that both $L$ and $L'$
include \emph{no} common $L_{j,m}$,
then,
for the least $R$ that has different interpretations in $L$ and $L'$,
for the least tuple of elements of $A_n$ (in the lexicographic ordering)
where the interpretations differ, $R$ fails for that tuple in $L$,
but not $L'$.
Then the $L_{j,n}$ are numbered so that
\begin{equation*}
  L_{0,n}<L_{1,n}<\cdots
\end{equation*}
If $k<n$, there is $a_k^n$ such that
\begin{equation*}
  L_{a_k^n,k}\subset L_{1,n}.
\end{equation*}
As $n$ grows (while $k$ is fixed),
$a_k^n$ can only increase:
this is because
\begin{multline*}
  L_{i,n}\subset L_{i^*,n+1}\And L_{j,n}\subset L_{j^*,n+1}\And L_{i^*,n+1}<L_{j^*,n+1}\\
\implies L_{i,n}\leq L_{j,n}.
\end{multline*}
The point is that the various $L_{i,n}$ compose a \emph{tree}
when ordered by $\subset$;
and we can refine this into a linear ordering with the desired property.
Then the $L_{1,n}$ have a ``limit,'' which is a model of the original sentence.

Now we generalize to a countably infinite list of sentences.
Skolem hardly even alludes to the details;
his main point is to observe that
when Zermelo's ``Axiom III (axiom of separation)
can be replaced by an infinite sequence of simpler axioms''\dots%%%%%
\footnote{Editing this document on June 18, 2015,
I have added the ellipsis and suppressed (``commented out'')
the ensuing section, called ``Old,''
whose material, including a derivation of the Skolem normal form, 
seems to have been transferred to 
(or rewritten in)
my notes on G\"odel's Completeness Theorem.}

\begin{comment}
  

\section*{Old}


A formula is in \textbf{prenex normal form}
if all of its quantifiers are in front.
A \textbf{quantifier} is a string $\forall x$ or $\exists x$.
We may then assume that no variable occurs in two different quantifiers.
The formula is in \textbf{Skolem normal form} if, further,
no existential quantifier follows a universal quantifier.

G\"odel's proof of the Completeness Theorem
relies on \emph{Skolem normal forms.}


\begin{theorem}
Every formula is equivalent to a formula in prenex normal form.
\end{theorem}

\begin{proof}
Then a formula not in prenex normal form
is equivalent to a formula of one of the forms
\begin{align*}
&\phi\lor\Forall x\psi,&
&\phi\land\Forall x\psi,
\end{align*}
or to the negation of such a formula.
Let $y$ be a variable not occurring freely in $\phi$ or $\psi$,
but substitutable for $x$ in $y$.
(We could just let $y$ not occur at all in $\phi$ or $\psi$.)
The formulas above are equivalent respectively to
\begin{align*}
&\Forall y(\phi\lor\psi^x_y),&
&\Forall y(\phi\land\psi^x_y).
\end{align*}
Now repeat, if $\phi\lor\psi^x_y$ or $\phi\land\psi^x_y$
is not in prenex normal form.
Since in any case it is shorter than the original formula,
the repetitions must stop,
at a formula in prenex normal form.
\end{proof}

\begin{theorem}
  For every signature $\sig$,
for every sentence $\sigma$ of $\sig$,
there is a larger signature $\sig^*$
and a sentence $\sigma^*$ of $\sig^*$ in Skolem normal form
such that, for all structures $\str A$ of $\sig$,
the following conditions are equivalent:
\begin{itemize}
\item 
$\str A\models\sigma$.
\item
For every expansion $\str A^*$ of $\str A$ to $\sig^*$, $\str A^*\models\sigma^*$.
\end{itemize}
\end{theorem}

\begin{proof}
By the previous theorem, we may assume $\sigma$
is in prenex normal form.
We can write this as
\begin{equation*}
\Exists{\vec x}\Forall y\mathsf Q\;\theta,
\end{equation*}
where $\mathsf Q$ is a string of quantifiers, 
and $\theta$ is quantifier-free.  
Introduce a new predicate $R$,
and let $\sigma_1$ be the sentence
\begin{equation*}
\Exists{\vec x}(\Forall y(\mathsf Q\;\theta\lto R\vec xy)\lto\Forall yR\vec xy).
\end{equation*}
\begin{enumerate}
\item 
Suppose $\str A\models\sigma$
and $\str A^*$ is an expansion of $\str A$ to $\sig\cup\{R\}$.
For some $\vec a$ from $A$,
\begin{equation*}
\str A\models\Forall y\mathsf Q\;\theta^{\vec x}_{\vec a}.
\end{equation*}
Hence, 
if $\str A^*\models\Forall y(\mathsf Q\;\theta\lto R\vec xy)^{\vec x}_{\vec a}$, 
this means
\begin{equation*}
  \str A^*\models\Forall y(\mathsf Q\;\theta^{\vec x}_{\vec a}\lto R\vec ay),
\end{equation*}
and therefore $\str A^*\models\Forall yR\vec ay$.
Thus $\str A^*\models\sigma_1$.
\item
But if $\str A\nmodels\sigma$,
then by defining
\begin{equation*}
  R^{\str A^*}=(\mathsf Q\;\theta)^{\str A},
\end{equation*}
we have $\str A^*\nmodels\sigma_1$.
\end{enumerate}
Thus $\sigma_1$ has the desired property of $\sigma^*$,
but it may not be in Skolem normal form.
However,
if $z$ is a variable not occurring in $\theta$,
then $\sigma_1$ is equivalent to each of the following:
\begin{gather*}
\Exists{\vec x}(\Exists y(\mathsf Q\;\theta\land\lnot R\vec xy)\lor\Forall yR\vec xy),\\
\Exists{\vec x}\Exists y((\mathsf Q\;\theta\land\lnot R\vec xy)\lor\Forall zR\vec xz),\\
\Exists{\vec x}\Exists y(\mathsf Q\;(\theta\land\lnot R\vec xy)\lor\Forall zR\vec xz),\\
\Exists{\vec x}\Exists y\mathsf Q\;((\theta\land\lnot R\vec xy)\lor\Forall zR\vec xz),\\
\Exists{\vec x}\Exists y\mathsf Q\;\forall z((\theta\land\lnot R\vec xy)\lor R\vec xz),
\end{gather*}
This last sentence is in prenex normal form, though perhaps not in Skolem normal form.  
Still, the number of universal quantifiers that precede existential quantifiers has decreased.  
So the process terminates in a sentence that must be in Skolem normal form. 
\end{proof}

G\"odel's proof of the Completeness Theorem proceeds as follows.
At least, we can understand it as follow.

Taking it for granted 
that there is a complete proof-system for \emph{propositional} logic,
we might as well let every tautology
be an axiom of our proof-system for first-order logic.
G\"odel works with arbitrary formulas.
We shall restrict to sentences,
while allowing new constants to be introduced
(so these will be like the free variables in G\"odel's formulas).
Strictly then we are defining a proof system for each signature.
Additional axioms are
\begin{description}
  \item[Specialization:]
$\Forall x\phi(x)\lto\phi(c)$, for any constant $c$;
we may use this in the form
\begin{equation*}
  \phi(c)\lto\Exists x\phi(x).
\end{equation*}
\item[Change of Bound Variable:]
$\sigma\lto\sigma'$,
where $\sigma'$ results from $\sigma$ 
by changing a bound variable in a subformula.
\item[Distribution of Quantifier:]
$\Forall x(\sigma\lto\phi(x))\lto\sigma\lto\Forall x\phi(x)$.
\item[Equality:]
  \begin{itemize}
  \item 
$\Forall xx=x$,
\item
$\Forall x\Forall y(x=y\lto\phi(x)\lto\phi(y))$.
  \end{itemize}
\end{description}
The rules of inference are
\begin{description}
  \item[Detachment:]
from $\sigma$ and $\sigma\lto\tau$, infer $\tau$;
\item[Generalization:]
from $\phi(c)$, infer $\Forall x\phi(x)$,
provided $c$ is not in the signature.
\end{description}

We have to show that a sentence is provable 
if and only if its Skolem normal form is provable.

We assume our predicates have no operation symbols.

Given a sentence $\sigma$,
we eliminate the equal sign as follows.
We introduce a new binary predicate $\equiv$, 
and we replace each occurrence of $=$ in $\sigma$ 
with this new predicate $\equiv$, obtaining a new sentence $\sigma'$.  
Now let $(R_0,\dots,R_m)$ be a list of all predicates 
(including $\equiv$) occurring in $\sigma'$, 
and let $\sigma''$ be the sentence
\begin{equation*}
\sigma'\land\Forall{\vec x}\Forall{\vec y}
\left(\vec x\equiv\vec y\lto
\bigwedge_{j\leq m}(R_j\vec x_j\lto R_j\vec y_j)\right).
\end{equation*}
(Here $\vec x_j$ and $\vec y_j$ are initial segments, of appropriate length, 
of $\vec x$ and $\vec y$ respectively; 
and $\vec x$ and $\vec y$ are long enough to make this possible.)  
We should show $\sigma\lto\sigma''$ is a provable,
and if $\sigma''$ is provable, then so is $\sigma$.

Now suppose $\sigma$ is a sentence
\begin{equation*}
  \Exists{\vec x}\Forall{\vec y}\theta(\vec x,\vec y),
\end{equation*}
where $\vec x$ is a $p$-tuple of variables and
$\vec y$ is a $q$-tuple of variables,
all mutually distinct, and $\theta$ is quantifier-free.
(and without operation symbols and the equal sign).
In particular, $\sigma$ is in Skolem normal form.
Suppose it is not provable.
We shall construct a model of $\lnot\sigma$.

Let $A$ be a countably infinite set of new constants.
There are:
\begin{itemize}
\item 
a bijection $k\mapsto\vec c_k$ from $\upomega$ onto $A^p$, and
\item
an injection $k\mapsto\vec d_k$ from $\upomega$ into $A^q$ such that
$\vec d_k$ never shares any entry with $\vec c_0\cdots\vec c_k$.
\end{itemize}
None of the sentences $\theta(\vec c_k,\vec d_k)$ is provable,
since, if it is, then the following are provable:
\begin{gather*}
  \theta(\vec c_k,\vec d_k),\\
\Forall{\vec y}\theta(\vec c_k,\vec y),\\
\Forall{\vec x}\lnot\Forall{\vec y}\theta(\vec x,\vec y)
\lto\lnot\Forall{\vec y}\theta(\vec c_k,\vec y),\\
\Forall{\vec y}\theta(\vec c_k,\vec y)
\lto\Exists{\vec x}\Forall{\vec y}\theta(\vec x,\vec y),\\
\Exists{\vec x}\Forall{\vec y}\theta(\vec x,\vec y),
\end{gather*}
which is $\sigma$.
%Hence none of the  $\theta(\vec c_k,\vec d_k)$ is tautology.
Therefore neither is any sentence
\begin{equation*}
  \theta(\vec c_0,\vec d_0)\lor\dots\lor\theta(\vec c_k,\vec d_k)
\end{equation*}
provable BUT THIS TAKES WORK!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Call this sentence $\tau_k$.
Each negation $\lnot\tau_k$ has a model $\str A_k$ whose universe is $A$.
Indeed, an $\sig$-structure with universe $A$
is determined by arbitrary choice of truth-value 
for each atomic sentence of $\sig(A)$.

We define $\str A_k$ more carefully though.
Let $\Gamma_k$ be the set of atomic sentences occurring in $\tau_k$.
There are finitely many functions from $\Gamma_k$ to $2$.
Hence there is one of them, say $f$, 
such that for infinitely many $\ell$,
there is a model $\str B$ of $\lnot\tau_{\ell}$ with universe $A$
such that $f$ is $\alpha\mapsto\alpha^{\str B}$.
Thus we can ensure that whenever $k\leq\ell$ and $\alpha\in\Gamma_k$,
\begin{equation*}
  \str A_k\models\alpha\iff\str A_{\ell}\models\alpha.
\end{equation*}
Now we can define $\str A$ with universe $A$ so that,
whenever $\alpha$ is an atomic sentence occurring in some $\tau_k$,
\begin{equation*}
  \str A\models\alpha\iff\str A_k\models\alpha.
\end{equation*}
Then for all $k$, $\str A\models\lnot\tau_k$, that is,
\begin{equation*}
  \str A\models\lnot\phi(\vec c_k,\vec d_k),
\end{equation*}
and therefore
\begin{equation*}
  \str A\models\Exists{\vec y}\lnot\phi(\vec c_k,\vec d_k).
\end{equation*}
But every element of $A^p$ is $\vec c_k$ for some $k$.  Therefore
\begin{equation*}
  \str A\models\Forall{\vec x}\Exists y\lnot\phi(\vec x,\vec y),
\end{equation*}
so $\str A\models\lnot\sigma$.

G\"odel adapts this procedure to show that,
if we are given, for each $k$ in $\upomega$, a sentence
\begin{equation*}
  \Forall{\vec x^k}\Exists{\vec y^k}\phi_k(\vec x^k,\vec y^k),
\end{equation*}
where $\phi_i$ is quantifier-free,
and every finite set of these sentences has a model,
then the set of all of these sentences has a model.
For, as before,
we can let $A$ be a countably infinite set of new constants,
and for each $k$ in $\upomega$, 
we can introduce a bijection $j\mapsto\vec c^k_j$ from $\upomega$ onto $A^n$, 
where $n$ is the length of $\vec x^k$.

\end{comment}

\bibliographystyle{plain}
\bibliography{../references}


\end{document}