\documentclass[11pt,reqno]{amsart}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{graphicx}
%\usepackage{epstopdf}
\usepackage{hyperref}
\usepackage[left=1in,right=1in,top=0.9in,bottom=0.9in]{geometry}
\usepackage{multirow}
\usepackage{verbatim}
\usepackage{fancyhdr}
\usepackage{harvard}
\usepackage{pdfpages}
%\usepackage[small,compact]{titlesec}
%\usepackage{pxfonts}
%\usepackage{isomath}
\usepackage{mathpazo}
%\usepackage{arev} % (Arev/Vera Sans)
%\usepackage{eulervm} %_ (Euler Math)
%\usepackage{fixmath} % (Computer Modern)
%\usepackage{hvmath} %_ (HV-Math/Helvetica)
%\usepackage{tmmath} %_ (TM-Math/Times)
%\usepackage{cmbright}
%\usepackage{ccfonts} \usepackage[T1]{fontenc}
%\usepackage[garamond]{mathdesign}
%\usepackage{libertine}
\usepackage{color}
\usepackage[normalem]{ulem}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{conjecture}{Conjecture}[section]
\newtheorem{corollary}{Corollary}[section]
\newtheorem{lemma}{Lemma}[section]
\newtheorem{proposition}{Proposition}[section]
\newtheorem{assumption}{}[section]
\renewcommand{\theassumption}{A\arabic{assumption}}
\theoremstyle{definition}
\newtheorem{definition}{Definition}[section]
\newtheorem{step}{Step}[section]
\newtheorem{remark}{Comment}[section]
\newtheorem{example}{Example}[section]
\newtheorem*{example*}{Example}
\linespread{1.1}
\pagestyle{fancy}
%\renewcommand{\sectionmark}[1]{\markright{#1}{}}
\fancyhead{}
\fancyfoot{}
%\fancyhead[LE,LO]{\tiny{\thepage}}
\fancyhead[CE,CO]{\tiny{\rightmark}}
\fancyfoot[C]{\small{\thepage}}
\renewcommand{\headrulewidth}{0pt}
\renewcommand{\footrulewidth}{0pt}
\fancypagestyle{plain}{%
\fancyhf{} % clear all header and footer fields
\fancyfoot[C]{\small{\thepage}} % except the center
\renewcommand{\headrulewidth}{0pt}
\renewcommand{\footrulewidth}{0pt}}
\makeatletter
\renewcommand{\@maketitle}{
\null
\begin{center}%
\rule{\linewidth}{1pt}
{\Large \textbf{\textsc{\@title}}} \par
{\small \textsc{Paul Schrimpf}} \par
{\small \textsc{\@date}} \par
{\small \textsc{University of British Columbia}} \par
{\small \textsc{Economics 628: Topics in econometrics}} \par
\rule{\linewidth}{1pt}
\end{center}%
\par \vskip 0.9em
}
\makeatother
\newcommand{\argmax}{\operatornamewithlimits{arg\,max}}
\newcommand{\argmin}{\operatornamewithlimits{arg\,min}}
\def\inprobLOW{\rightarrow_p}
\def\inprobHIGH{\,{\buildrel p \over \rightarrow}\,}
\def\inprob{\,{\inprobHIGH}\,}
\def\indist{\,{\buildrel d \over \rightarrow}\,}
\def\F{\mathbb{F}}
\def\R{\mathbb{R}}
\newcommand{\gmatrix}[1]{\begin{pmatrix} {#1}_{11} & \cdots &
{#1}_{1n} \\ \vdots & \ddots & \vdots \\ {#1}_{m1} & \cdots &
{#1}_{mn} \end{pmatrix}}
\newcommand{\iprod}[2]{\left\langle {#1} , {#2} \right\rangle}
\newcommand{\norm}[1]{\left\Vert {#1} \right\Vert}
\newcommand{\abs}[1]{\left\vert {#1} \right\vert}
\renewcommand{\det}{\mathrm{det}}
\newcommand{\rank}{\mathrm{rank}}
\newcommand{\spn}{\mathrm{span}}
\newcommand{\row}{\mathrm{Row}}
\newcommand{\col}{\mathrm{Col}}
\renewcommand{\dim}{\mathrm{dim}}
\newcommand{\prefeq}{\succeq}
\newcommand{\pref}{\succ}
\newcommand{\seq}[1]{\{{#1}_n \}_{n=1}^\infty }
\renewcommand{\to}{{\rightarrow}}
\providecommand{\En}{\mathbb{E}_n}
\providecommand{\Gn}{\mathbb{G}_n}
\providecommand{\Er}{{\mathrm{E}}}
\renewcommand{\Pr}{{\mathrm{P}}}
\providecommand{\set}[1]{\left\{#1\right\}}
\providecommand{\plim}{\operatornamewithlimits{plim}}
\newcommand\indep{\protect\mathpalette{\protect\independenT}{\perp}}
\def\independenT#1#2{\mathrel{\setbox0\hbox{$#1#2$}%
\copy0\kern-\wd0\mkern4mu\box0}}
\renewcommand{\cite}{\citeasnoun}
\title{Structural Dynamic Models}
\date{\today}
\begin{document}
\maketitle
\section{Introduction}
These notes cover dynamic structural models. In these models, agents
are forward looking and maximize expected payoffs. The models are
structural in that they describe agents' preferences. Agents'
preferences will be estimated using the principle of revealed
preference.
\subsection{Notation}
We will use the same notation as \cite{am2010}. Time is discrete and
indexed by $t$. Agents are indexed by $i$. The time horizon, $T$, may
be finite or infinite. $s_{it} \in S$ is a vector of state variables that
are known at time $t$. The state variables could include time to allow
for non-stationary models (we must include $t$ in the finite horizon
case). The state variables can also include
time-invariant individual characteristics. $a_{it} \in A$ is an action
chosen at time $t$. Preferences are additively separable over time and
discounted at rate $\beta$. That is, preferences over possible
sequences of $s$ and $a$ are given by
\[ \sum_{j=0}^\infty \beta^j U(a_{i,t+j},s_{i,t+j}). \]
Agent's have rational expectations about the evolution of state
variables. State variables follow a controlled Markov process, so that
the distribution of $s_{i,t+1}$ given all information at time $t$ only
depends on $s_{i,t}$ and $a_{i,t}$. Let $F(s_{i,t+1}|a_{it},s_{it})$
denote the transition distribution. Each period, the agent chooses
$a_{it}$ to maximize expected utility.
\[ a_{it} \in \argmax_{a \in A} \Er\left[ \sum_{j=0}^\infty \beta^j
U(a_{i,t+j},s_{i,t+j}) | a_{it}=a,s_{it} \right]. \]
The Bellman equation for this problem is
\[ V(s_{it} ) = \max_{a \in A} U(a,s_{it}) + \beta \Er[V(s_{i,t+1}) |
a, s_{it} ] \]
It will also be useful to define the choice specific value function,
\[ v(a,s_{it}) = U(a,s_{it}) + \beta \Er[V(s_{i,t+1}) |
a, s_{it} ]. \]
In the finite horizon case, the value function, $V$, exists under very
weak conditions, see \cite{rust1994} (essentially we just need the
maximization problem at each time to have a solution). Note that the
value functions must depend on time when the horizon is finite. We
have made this dependence implicit by including $t$ in $s_{it}$.
We will let
\[ \alpha(s) \in \argmax_{a \in A} U(a,s_{it}) + \beta
\Er[V(s_{i,t+1}) | a, s_{it} ] \]
denote the policy function.
\subsection{Existence and properties of value and policy functions}
When $T$ is infinite, the existence of the value function requires
some assumptions. The easiest case to prove is when $U$ is bounded and
continuous. Let $C(S)$ be the space of bounded continuous function
from $S$ to $\R$. $C(S)$ is complete under the sup norm. Then
$T:C(S) \to C(S)$ defined by
\[ T(f)(s) = \max_{a \in A} U(s,a) + \beta \Er[f(s')|a,s] \]
is a contraction mapping. Since $C(S)$ is complete, $T$ has a unique
fixed point. It is easy to show that unique fixed point solves the
original non-recursive problem, see \cite{rust1994}.
\[ V(s_{it}) = \max_{a \in A} \Er\left[ \sum_{j=0}^\infty \beta^j
U(a_{i,t+j},s_{i,t+j}) | a_{it}=a,s_{it} \right]. \]
This result is nice, but in typical applications $U$ is not
bounded. There are similar results for some unbounded $U$, see
\cite{rust1994} or \cite{stokeyLucas1989}. Additionally, we
will need the policy function,
\[ \alpha(s) \in \argmax_{a \in A} U(a,s_{it}) + \beta
\Er[V(s_{i,t+1}) | a, s_{it} ] \]
to be nonrandom and to be approximable in the sense that $V_n \to V$
implies that $\alpha_n \to \alpha$ where
\[ \alpha_n \in \argmax_{a \in A} U(a,s_{it}) + \beta
\Er[V_n(s_{i,t+1}) | a, s_{it} ]. \]
This and other properties of $\alpha$ follow from appropriate
assumptions about $U$ and $A$. See \cite{stokeyLucas1989} for
details.
\subsection{Data}
We have panel data on $N$ individuals, each observed for $T_i$
periods. We observe actions, $a_{it}$, and a sub-vector of the state
variable, $x_{it}$. The unobserved state variable will be
$\epsilon_{it}$, so $s_{it} = (x_{it}, \epsilon_{it})$. We also
observe some payoff variable,
\[ y_{it} = Y(a_{it}, x_{it}, \epsilon_{it}), \]
that contains information about $U(s,a)$, but is not one of the state
variables. For example, $y_{it}$ could be revenues of a firm, or an
individual's earnings.
\subsection{Examples}
The following examples are taken from \cite{am2010}.
\begin{example}[Retirement]
Consider the choice of when to retire. Let $a_{it} = 1$ if an agent
is working and $a_{it}= 0$ if retired. Suppose $T$ is the age at
death. The payoff function could be
\[ U(a_{it},x_{it},\epsilon_{it}) = \Er[ c_{it}^{\theta_1} | a_{it},
x_{it} ] \exp \left(\theta_2 + \theta_3 h_{it} + \theta_4
\frac{t}{1+t} \right) - \theta_5 a_{it} + \epsilon(a_{it}) \]
where $c_{it}$ is consumption, $\theta_1$ is the coefficient of
relative risk aversion, $h_{it}$ is health, and the expression in
the $\exp$ captures the idea that the marginal utility of
consumption could vary with health and age. $-\theta_5 a_{it}$
captures the disutility of working.
\end{example}
\begin{example}[Entry/exit]
A firm is deciding whether to operate in a market. Its per-period
profits are
\begin{align*}
U(a_{it}) = a_{it} \left( \theta_R \log (S_t) - \theta_N
\log\left( 1 + \sum_{j \neq i} a_{jt} \right) - \theta_F -
\theta_E(1- a_{i,t-1}) + \epsilon_{it} \right)
\end{align*}
where $a_{it}$ is whether the firm operates at time $t$. $S_t$ is
the size of the market, $\sum_{j \neq i} a_{jt} $ is the number of
other firms operating. $\theta_F$ is a fixed operating cost, and
$\theta_E$ is an entry cost.
\end{example}
\section{Identification}
\subsection{General non-identification}
\cite{rust1994} shows that without some restrictions, the above model
is not identified. In the data, we observe
\[ \alpha(s) = \argmax_{a \in A} v(a,s). \]
\cite{rust1994} calls this the reduced form of a Markov decision
problem. The structure of the a Markov decision problem is the
mapping, $\Lambda: \{\beta,U,F \} \to \alpha$ defined by
\[ \alpha(s) = \argmax_{a \in A} v(a,s) \]
where
\[ v(a,s) = U(a,s) + \beta \Er[ \max_{a' \in A} v(a',s') | a,s]. \]
Our goal is to identify the structure, $(\beta,U,F )$.
\begin{definition} \label{d:oe}
Primitives $(\beta,U,F)$ and $(\tilde{\beta},\tilde{U},\tilde{F})$
are \textbf{observationally equivalent} if
\[ \Lambda(\beta,U,F) =
\Lambda(\tilde{\beta},\tilde{U},\tilde{F}). \]
\end{definition}
It's clear that $\Lambda(\beta,U,F) = \Lambda(\beta,aU + b,F)$, so we
can at most identify $U$ up to a linear
transformation. \cite{rust1994} shows that we can identify even less.
\begin{lemma}\label{lem:rustNoId}
Let $f$ be any measurable function of $s$. Define:
\[ \tilde{U}_f(a,s) = U(a,s) + f(s) - \beta \Er[ f(s') | a, s] \].
Then
\[ \Lambda(\beta,U,F) =
\Lambda(\beta,\tilde{U}_f,F). \]
\end{lemma}
\begin{proof}
Let $v(a,s)$ be the choice specific value function for
$(\beta,U,F)$. We can guess and verify that
\[ \tilde{v}_f(a,s) = v(a,s) + f(s) \]
is the choice specific value function for $(\beta,\tilde{U}_f,F)$.
\begin{align*}
\tilde{v}_f(a,s) = & \tilde{U}_f(a,s) + \beta \Er[ \max_{a' \in A}
\tilde{v}_f(a',s') | a,s] \\
= & U(a,s) + f(s) - \beta \Er[ f(s') | a, s] + \beta \Er[
\max_{a' \in A} v(a',s') + f(s') | a, s] \\
= & U(a,s) + f(s) + \beta \Er[ \max_{a' \in A} v(a',s') | a, s]
\\
= & v(a,s) + f(s).
\end{align*}
Since $f(s)$ does not depend on $a$,
\[ \argmax_{a \in A} v(a,s) = \argmax_{a \in A} v(a,s) + f(s), \]
so
\[ \alpha(s) = \tilde{\alpha}_f(s) \]
and the models are observationally equivalent.
\end{proof}
This lemma shows that given any $U$, there are many observationally
equivalent $\tilde{U}_f$. Even knowing $\beta$ and $F$, $U$ is not
identified. \cite{rust1994} also shows that given any reduced form
policy function, there is a $U$ that generates it.
\begin{lemma}\label{lem:rustNoTest}
Let $\alpha:S \to A$ and $A$ be discrete. Then $U(a,s) = 1\{a =
\alpha(s)\} - \beta$ along with any $F$ and $\beta$ results in
\[ \Lambda(\beta,U,F) = \alpha. \]
\end{lemma}
\begin{proof}
Guess and verify that $v(a,s)= 1\{a = \alpha(s) \}$.
\end{proof}
This lemma shows that having a Markov decision problem places no
restrictions on the observed policy function (other than it being
Markovian).
\subsection{Identification in dynamic discrete decision models \label{s:idd}}
\cite{magnacThesmar2002} expand on the non-identification result of
\cite{rust1994}, and give conditions under which identification is
possible. \cite{magnacThesmar2002} specifically focus on dynamic
discrete decision models that satisfy the following assumptions. These
assumptions have been used in most applications.
\begin{assumption}[Discrete actions] \label{id:da}
$A$ is discrete and finite.
\end{assumption}
\begin{assumption}[Additive separability] \label{id:as}
Instantaneous utility functions are given by
\[ U(a,x,\epsilon) = u(a,x) + \epsilon(a) \]
where
\[ \Er[\epsilon(a)|x] = 0 \]
for each $a \in A$. The cdf of $\epsilon$, denoted $G$, is
absolutely continuous
with respect to Lebesgue measure in $\R^{|A|}$.
\end{assumption}
\begin{assumption}[Conditional independence] \label{id:ci}
For any $t \neq t'$, $\epsilon_{it}$ and $\epsilon_{it'}$ are
independent conditional on $x$ and $a$.
\end{assumption}
As a result of these assumptions,
\begin{align*}
v(a,x,\epsilon) = & U(a,x,\epsilon) + \beta \Er[ \max_{a \in A}
v(a',x',\epsilon')| x, a, \epsilon] \\
= & u(a,x) + \epsilon(a) + \beta \Er[ \max_{a \in A} \tilde{v}(a,x)
+ \epsilon(a) | x,a] \\
= & \tilde{v}(a,x) + \epsilon(a)
\end{align*}
\cite{magnacThesmar2002} assume that the support of $x$ is discrete
and finite as well. However, this assumption is unnecessary, as shown
by \cite{bchn2009}. In any case, let
\[ \Pr(a|x) = \Pr\left(a \in \argmax \tilde{v}(a,x) + \epsilon(a)
\right). \]
\cite{hotzMiller1993} show that these equations can be inverted to
yield
\[ \tilde{v}(a,x) - \tilde{v}(0,x) = q(a,\Pr(\cdot|x);G) \]
where $0 \in A$ is some reference action, and
$q$ depends on the distribution of $\epsilon$, $G$. In many
applications, it is assumed that $\epsilon(a)$ has an extreme value
distribution,
\[ G_a(\epsilon ) = \frac{e^\epsilon} {1+e^\epsilon}. \]
In this case,
\[ \tilde{v}(a,x) - \tilde{v}(0,x) = \log(\Pr(a|x)) -
\log(\Pr(0|x)). \]
Regardless, we can write
\begin{align*}
\tilde{v}(0,x) = & u(0,x) + \beta \Er[ \max_{a' \in A} \tilde{v}(a',x') +
\epsilon(a') | a, x] \\
= & u(0,x) + \beta \Er[ \max_{a' \in A} \tilde{v}(a',x') - \tilde{v}(0,x') +
\epsilon(a') | 0, x] + \beta \Er[\tilde{v}(0,x')|0,x]
\end{align*}
The middle term is some function of $G$ and the observed choice
probabilities, say
\[ M(x,\Pr(\cdot|x),G) = \beta \Er[ \max_{a' \in A} \tilde{v}(a',x') -
\tilde{v}(0,x') + \epsilon(a') | 0, x]. \]
Suppose we normalize $u(0,x) = 0$. Then we have
\begin{align*}
\tilde{v}(0,x) = & M(x,\Pr(\cdot|x),G) + \beta \Er[\tilde{v}(0,x')|0,x]
\end{align*}
If the model is stationary (in particular the supports of $x$ and $x'$
coincide), then it is easy to show that this equation has a unique
solution for $\tilde{v}(0,x)$. If the model is not stationary, then in
the finite horizon case, we can begin with $t=T$ and set
$\tilde{v}(0,x) = M(x,\Pr(\cdot|x),G)$. In either case,
$\tilde{v}(0,x)$ is identified if $M$, which depends on $G$ and
observable data, and $\beta$ are known.
\begin{lemma}
Suppose $G$ and $\beta$ are known, and $u(0,x) = 0$, then
$\tilde{v}(0,x)$ is identified.
\end{lemma}
Finally, observe that
\begin{align*}
\tilde{v}(a,x) = & u(a,x) + \beta \Er[ \max_{a' \in A}
\tilde{v}(a',x') + \epsilon(a') | a ,x] \\
u(a,x) = & \tilde{v}(a,x) - \beta \Er[ \max_{a' \in A}
\tilde{v}(a',x') + \epsilon(a') | a ,x]
\end{align*}
so $u$ is also identified.
\begin{theorem}[Identification]
If assumptions \ref{id:da}-\ref{id:ci} hold, and $G$, $\beta$, and
$u(0,x) = 0$ are known, and the model is infinite horizon and
stationary, or finite horizon and we observe all time period, then
$u(a,x)$ is identified.
\end{theorem}
This theorem shows that assuming errors are additively separable with
a known distribution, knowing the discount factor, and normalizing
$u(0,x)$ is sufficient for identification. \cite{magnacThesmar2002}
show that given assumptions \ref{id:da}-\ref{id:ci}, knowing $G$, $\beta$,
and $u(0,x)$ is also necessary in that $G' \neq G$ or $\beta' \neq
\beta$ or $u(0,x)' \neq u(0,x)$, there exists observationally
equivalent $u(a,x)$. It is easy to see this result from our discussion
above. Every step that we took was constructive. We explicitly found
$u(a,x)$ given $\beta$, $G$, and $u(0,x)$. If we change any of those
three things, we will still end up with some $u(a,x)$, but it will be
different.
\subsection{Identification in dynamic decision models with
continuous actions}
\cite{schrimpf2011} gives an analogous identification result for
dynamic games with continuous actions.
\section{Estimation}
Given the above identification results, we will begin by focusing on
estimating models that satisfy assumptions \ref{id:da}, \ref{id:as},
and \ref{id:ci}. We will also assume that the distribution of
$\epsilon$, $G$, is known. Let's also suppose that the payoff function
has been parametrically specified, $u(a,x;\theta_u)$, and the transition
distribution is also parametrically specified, so that in particular,
the conditional expectation of any function of $x'$ and $a'$ given $x$
and $a$ can be written as $\Er[\cdot | x, a; \theta_p]$. Given data on
$a_{it}$ and $x_{it}$ we want to estimate $\theta_u$ and
$\theta_p$. To begin with, we will treat $\theta_u$ and $\theta_p$ as
finite dimensional parameters, but we will discuss nonparametric
estimation later (i.e.\ allow $\theta_p$ and/or $\theta_u$ to be
infinite dimensional). Throughout, we will focus on infinite horizon
problems.
\subsection{Nested fixed point}
This subsection is largely based on \cite{rust1994}.
The nested fixed point algorithm is a maximum likelihood estimator for
$\theta$. It is computationally intensive because it requires
numerically maximizing the likelihood, and each time the likelihood is
evaluated, the value function is solved for. Thus there are nested
fixed points being solve. The inner fixed point is the value function,
the outer one is the maximization of the likelihood.
\subsubsection{Solving for the value function}
For any given value of $\theta = (\theta_u,\theta_p)$, we can compute
the value function by
solving the Bellman equation.
There are many ways to do this. The most
straightforward method is value function iteration. That is, begin
with some guess for the value function, say
$V_0(x,\epsilon;\theta)$. Then update the guess by setting
\begin{align}
V_1(x,\epsilon;\theta) = \max_{a \in A} u(a,x;\theta_u) + \epsilon(a)
+ \beta \Er[V_0(x',\epsilon';\theta) | x, a ;\theta_p ] \label{e:bell}
\end{align}
Repeat until convergence. This method of computing $V$ is called value
function iteration. Value function iteration is stable and globally
convergent, but only converges at a geometric rate equal to
$\beta$.
\[ \norm{V_{i+1} - V} \lesssim \beta \norm{V_i - V} \]
As a result, when $\beta$ is near one we may need many iterations
before convergence.
If $x$ or $\epsilon$ are continuous, we cannot represent
$V(x,\epsilon;\theta)$ in a computer. Instead, we can only work with
some approximation of $V$. The most common approach is to discretize
$x$ and $\epsilon$. That is, divide the support of $x$ and $\epsilon$
into a finite number of bins, and approximate $V$ as being constant in
each of those bins. Then $V$ can be represented by a vector consisting
of the values of $V(x,\epsilon;\theta)$ for each $x,\epsilon$ bin. $V$
could also be approximated by a series or kernel.
Once you have chosen an way to approximate $V$, the Bellman equation
\ref{e:bell} becomes a system of non-linear equations. Rather than
using value function iteration, you could solve this system of
equation using Newton's method. Newton's method locally converges
quadratically, so it theoretically requires fewer iterations. That is,
if $V_i$ is close enough to $V$, then
\[ \norm{V_{i+1} - V} \lesssim \norm{V_i - V}^2. \]
However, Newton's method need not be globally convergent, so if the
initial $V_0$ is far from, $V$, it can take longer than value function
iteration.
One method that is equivalent to Newton's method is policy
iteration. In policy iteration, you begin with an initial guess for
the policy function, $\alpha_0(x,\epsilon)$. Then you compute the corresponding
value function,
\begin{align*}
V_{\alpha}(x,\epsilon;\theta) = u(\alpha(x,\epsilon),x;\theta_u) +
\epsilon(\alpha(x,\epsilon))
+ \beta \Er[V_0(x',\epsilon';\theta) | x, \alpha(x) ;\theta_p ].
\end{align*}
This is much easier than solving the Bellman equation, because there
is no maximization. Given a way of approximating $V$, this equation
can often be written as
\begin{align*}
\mathbf{V}_\alpha = \mathbf{U}(\theta_u) + \beta P(\theta_p) \mathbf{V}_\alpha
\end{align*}
where $\mathbf{V}$ and $\mathbf{U}(\theta_u)$ are vectors and
$P(\theta_p)$ is a square matrix. You can then solve for $V_\alpha$ as
\begin{align*}
\mathbf{V}_\alpha = (I - \beta P(\theta_p) )^{-1}
\mathbf{U}(\theta_u).
\end{align*}
After solving for $V_\alpha$, you update the policy function by
setting
\begin{align*}
\alpha_1(x,\epsilon) = \argmax_{a \in A} u(x,a;\theta) + \epsilon(a)
+ \beta \Er[V_{\alpha_0}(x',\epsilon')|x,a],
\end{align*}
and repeat until convergence. Like Newton's method, policy function
iteration locally converges quadratically, but is not globally
convergent or stable. In practice, it is often effective to begin with
value iteration and then switch to policy iteration.
There are many details that we have left unspecified in this
discussion. It is difficult to get a good understanding of how solving
dynamic programs works without going through some examples in
detail. On homework 6, you will solve a dynamic program by
discretizing the state space. On my website,
\url{http://faculty.arts.ubc.ca/pschrimpf/14.170/programming.html},
there is an example that uses series functions to approximate the
value and policy functions.
\subsubsection{The likelihood}
Once we have computed $V(x,\epsilon;\theta)$, we can easily compute
the likelihood. The probability of $a$ conditional on $x$ is
\begin{align}
\Pr(a|x;\theta) = \Er_{\epsilon}\left[ 1\{ a = \argmax_{\tilde{a} \in
A} u(a,x;\theta_u) + \epsilon(a) + \beta
\Er[V(x',\epsilon';\theta)|\tilde{a},x;\theta_p] \} \right].
\end{align}
Then the log likelihood is
\begin{align*}
\mathcal{L}(\theta) = & \frac{1}{N} \sum_{i=1}^N \sum_{t=1}^{T_i} \log
\Pr(a_{it} | x_{it};\theta)
\end{align*}
We estimate $\theta$ by maximizing the likelihood. \cite{rust1994}
suggests using either the BHHH [\cite{bhhh1974}] or BFGS algorithm to
maximize the likelihood. Both of these algorithm are quasi-Newton
methods that do not require explicitly calculating the Hessian, but
still converge at a faster than linear (although not quite quadratic
rate). The BHHH approximates the Hessian using the outer-product of
the gradient. From the information equality, we know that this
approximation will be exact in an infinite sample. The BFGS algorithm
approximates the Hessian by the change in the gradient at various
evaluations of the likelihood.
\subsubsection{Two and three step nested fixed point}
The nested fixed point algorithm is computationally intensive. One way
to slightly reduce the amount of computation is to first estimate
$\theta_p$. Recall that $\theta_p$ enters $\Er[\cdot | x,
a;\theta_p]$. Since $x$ and $a$ are observed, this conditional
expectation functional can be estimated without using the full
model. Thus, we can first estimate $\theta_p$, and then maximize the
likelihood with respect to $\theta_u$ only. This maximization should
require fewer iterations because it can search over a lower
dimensional space. The resulting two-step estimates will not be as
efficient as the one-step estimates. However, they will be
consistent. As always with consistent estimates, we can then perform
one (or more) Newton step(s) of the full likelihood to obtain
efficient estimates.
\subsection{Hotz and Miller's CCP method}
Even the two-step version of the nest fixed point algorithm can be
computationally infeasible for large problems. \cite{hotzMiller1993} propose
an estimator that is much easier to compute. Suppose that the payoff
function is linear in parameters,
\[ u(a,x;\theta_u) = z(a,x)'\theta_u, \]
where $z(a,x)$ is known. Then the choice specific value functions are
given by
\begin{align*}
\tilde{v}(a,x) = & z(a,x)' \theta_u + \beta \Er\left[ \max_{a' \in
A} \tilde{v}(a',x') + \epsilon(a)
| a, x \right] \\
= & z(a,x)' \theta_u + \beta \Er\left[\sum_{a' \in A} v(a',x')
\Pr(a'|x') | a, x \right] + \beta \Er[ \Er[\epsilon(a')|\alpha(x',\epsilon) =
a']|a,x] \\
= & z(a,x)' \theta_u + \beta \Er\left[\sum_{a' \in A} v(a',x')
\Pr(a'|x') | a, x \right] + \beta \Er[\sum_{a' \in A}
\Er[\epsilon(a')|\tilde{v}(a',x') + \epsilon(a') \geq
\tilde{v}(a'',x')+\epsilon(a'') \forall a'' \in A]\Pr(a'|x') |a,x]
\end{align*}
Let
\[ e(a,x) = \beta \Er[\sum_{a' \in A}
\Er[\epsilon(a')|\tilde{v}(a',x') + \epsilon(a') \geq
\tilde{v}(a'',x')+\epsilon(a'') \forall a'' \in A]|a,x]. \]
Then it is clear that we can write
\[ \tilde{v}(a,x) = \tilde{z}(a,x)' \theta_u + \tilde{e}(a,x) \]
where
\[ \tilde{z}(a,x) = z(a,x) + \beta \Er[\sum_{a' \in A}
\tilde{z}(a',x') \Pr(a'|x') | a,x] \]
and
\[ \tilde{e}(a,x) = e(a,x) + \beta \Er[\sum_{a' \in A}
\tilde{e}(a',x') \Pr(a'|x') | a,x]. \]
Note that $\Er[\epsilon(a')|\tilde{v}(a',x') + \epsilon(a') \geq
\tilde{v}(a'',x')+\epsilon(a'') \forall a'' \in A]|a,x]$ only depends
on the distribution of $\epsilon$, $G$, and $\tilde{v}(a',x') -
\tilde{v}(a'',x')$. As in the identification section,
\cite{hotzMiller1993} show that
\[ \tilde{v}(a',x') -
\tilde{v}(a'',x') = q(a',a'',\Pr(\cdot|x),G) \]
for some known function $q$. $\Pr(a|x)$ can be estimated from the
observed $a$ and $x$. $\Er[ \cdot | a, x]$ can also be estimated from
the observed $a$ and $x$. Therefore, estimates of $\tilde{z}$ and
$\tilde{e}$ can be formed before estimating $\theta$. Denote these by
$\hat{z}$ and $\hat{e}$. Once we have $\hat{z}$ and $\hat{e}$, we can
form estimates of the probability of any action conditional on $x$
given any $\theta$.
\[ \hat{\Pr}(a|x;\theta) = \Pr\left(a = \argmax_{a' \in A} \hat{z}(a',x)'
\theta + \hat{e}(a',x) \right) \]
\cite{hotzMiller1993} propose estimating $\theta$ by GMM using moment
conditions of the form
\[ \sum_{i=1}^N \sum_{t=1}^{T_i} f(x_{it}) \left(1\{a_{it}=a\} -
\hat{\Pr}(a|x_{it};\theta) \right). \]
\cite{am2002} show that if you use the pseudo maximum likelihood to
estimate $\theta$ with pseudo-likelihood function,
\[ \tilde{\mathcal{L}}(\theta) = \sum_{i=1}^N \sum_{t=1}^{T_i} \log
\hat{P}(a_{it}|x_{it};\theta), \]
then $\hat{\theta}$ has the same asymptotic distribution as when you
use the two-step nested fixed point estimator. However,
\cite{am2010} describe Monte Carlo evidence that in finite samples,
this pseudo maximum likelihood estimator can have large bias.
\begin{itemize}
\item Nested pseudo likelihood.
\item Using simulation.
\end{itemize}
\section{Dynamic games}
The above methods can be applied to dynamic games as well as dynamic
decision problems. As above, let's restrict our attention to games
with discrete states and actions. Suppose there are $N$ players
indexed by $i$. Each player chooses a discrete action $a_{it} \in A$
given the current observed state $x_t = (x_{1t}, ..., x_{Nt})$ and a
private shock $\epsilon_{it}$. $\epsilon_{it}$ is only known to player
$i$. $x_t$ is common knowledge among all players. The payoff of player
$i$ depends on the actions of all players, $a_t = (a_{1t}, ...,
a_{Nt})$, the state, $x_t$, and the private shock,
$\epsilon_{it}$. Assume that payoffs are additively separable in
$\epsilon$,
\[ U_i(a_t,x_t,\epsilon_{it}) = u_i(a_t,x_t) + \epsilon_{it}(a_{it}). \]
We will assume that $\epsilon$ is iid across time and players with CDF
$G$. We also assume that the evolution of $x_t$ does not depend on
$\epsilon$, $F(x_{t+1} | a_t,x_t,\epsilon_t) = F(x_{t+1}|a_t,x_t)$.
We restrict our attention to Markov perfect equilibria, so strategies
only depend on the current state. Let $\alpha:(X \times \R)^N \to
A^N$ denote a vector of strategies. $\alpha_i$ is the strategy of
player $i$, and $\alpha_{-i}$ is the strategy of the other
players. Let $V_i^\alpha(x_t,\epsilon_{it})$ be the value function for
player $i$. The associated integrated value function is
\begin{align*}
\bar{V}^\alpha(x) = & \int V_i^\alpha(x_t,\epsilon_{it})
dG(\epsilon_{it}) \\
= & \int \left(\max_{a_{it} \in A}v_i^\alpha(x_t,a_{it}) +
\epsilon_{it}(a_{it}) \right) dG(\epsilon_{it})
\end{align*}
where $v_i^\alpha$ is the choice specific integrated value function,
which solves
\begin{align}
v_i^\alpha(a_{it},x_t) = & \Er_{\epsilon_{-i}}\left[
u_i(a_{it},\alpha_{-i}(x_t,\epsilon_{-it}),x_t) + \beta
\Er_x[\bar{V}_i^\alpha(x_{t+1}) | a_{it},
\alpha_{-i}(x_t,\epsilon_{-it}),x_t ] \right] \label{e23}
\end{align}
where the outer expectation is over $\epsilon_{-it}$ and the inner one
is over $x_{t+1}$. As in the single agent case, we can define
conditional choice probabilities,
\begin{align*}
\Pr^\alpha_i(a_i|x) = & \Pr\left( a_i = \argmax_{j \in A}
v_i^\alpha(j,x) + \epsilon_{it}(j) | x \right) \\
= & \int 1\left\lbrace a_i = \argmax_{j \in A}
v_i^\alpha(j,x) + \epsilon_{it}(j) \right\rbrace
dG(\epsilon_{it}).
\end{align*}
In the single agent case, the restrictions of the model end here. The
conditional choice probabilities must be consistent with maximizing
the value function. In a dynamic game, we have an additional
restriction: the conditional choice probabilities should form an
equilibrium. To add this constraint, rewrite the expectation over
$\epsilon_{-i}$ in (\ref{e23}) as
\begin{align}
v_i^\alpha(a_{it},x_t) \equiv v_i^P(a_{it},x_t) = & \sum_{a_{-i} \in
A^{N-1}} \Pr_{-i}(a_{-i} | x_t) \left( u_i(a_{it},a_{-i},x_t) +
\beta \Er_x[\bar{V}_i^\alpha(x_{t+1}) | a_{it},a_{-i},x_t ]
\right) \label{eqv}
\end{align}
where
\[ \Pr_{-i}(a_{-i}|x) = \prod_{j \neq i}^N \Pr(a_j|x). \]
Let
\[ \Lambda(a|v_i^P(\cdot,x_t)) = \int 1\left\lbrace a_i = \argmax_{j
\in A}
v_i^P(j,x) + \epsilon_{it}(j) \right\rbrace
dG(\epsilon_{it}). \]
Then the equilibrium condition is that
\[ \Pr_i(a|x) = \Lambda(a|v_i^P(\cdot,x)) \]
\newcommand{\Pb}{\mathbf{\mathrm{P}}}
\newcommand{\Lb}{\mathbf{\Lambda}}
or in vector form $\Pb = \Lb(v^P)$ where $\Pb$ is the vector of
conditional choice probability functions for all players, and $\Lb$ is
similarly defined.
Viewed as a function of $\Pb$, $\Lb$ is a mapping from $[0,1]^{N|X|}$
to $[0,1]^{N|X|}$. Moreover, $\Lb$ is continuous, and the unit cube in
$\R^{N|X|}$ is convex compact set, so by Brouwer's fixed point
theorem, there exists at least one equilibrium. There are often
multiple equilibria.
\subsection{Identification}
The identification argument for single-agent dynamic decision problems
shows that given $G$, $\beta$, and
$\Er_\epsilon[u(0,\alpha_{-i}(x,\epsilon_{-i}),x_t) ] = 0$, we can
identify the expectation over other player's actions of the payoff
function,
\begin{align*}
\Er_\epsilon[u(a_i,\alpha_{-i}(x,\epsilon_{-i}),x) ] =
\sum_{a_{-i}} \Pr(a_{-i}|x) u(a_i,a_{-i},x)
\end{align*}
When $x$ and $a$ are discrete, the left hand side of this equation
takes $|A| |X|$ identified values. On the right side, we know
$\Pr(a_{-i} | x)$ is known, but $u(a_i,a_{-i},x)$ is not, and it can
take $|A|^N |X|$ values. Therefore, we need some restriction if want
to identify the payoff function. The usual approach is to assume that
there are some state variables that enter the payoff of the other
players, but not player $i$'s payoff directly. Then $\Pr(a_{-i}|x)$
depends on all state variable, but $u(a_i,a_{-i},x_i)$ does not. Then
$u$ is identified if the system of equations
\begin{align*}
\Er_\epsilon[u(a_i,\alpha_{-i}(x,\epsilon_{-i}),x) ] =
\sum_{a_{-i}} \Pr(a_{-i}|x) u(a_i,a_{-i},x_i)
\end{align*}
has a unique solution for $u(a_i,a_{-i},x_i)$.
\subsection{Estimation}
Each of the estimation approaches described for single agent problems
above can also be used for dynamic games. As before, let $\theta =
(\theta_u,\theta_p)$ be the parameters of the payoff function and
transition distribution. Let's suppose we have data on $M$ markets
each with $N$ players and $T_m$ time periods. We will assume that all
of the data is generated by a single equilibrium. There may be
multiple equilibria given the parameters of the model, but we only
observe one.
\subsubsection{Maximum likelihood}
The likelihood can be written
\begin{align*}
\mathcal{L}(\theta,\Pb) = \sum_{m=1}^M \sum_{t=1}^{T_m} \sum_{i=1}^N
\log \Lambda(a_{imt} | v_i^P(\cdot, x_{mt};\theta) ).
\end{align*}
The maximum likelihood estimator can be written
\begin{align*}
\hat{\theta}_{MLE} = \argmax_{\theta \in \Theta} \sup_{\Pb \in
(0,1)^N|X|} \mathcal{L}(\theta,\Pb) \text{ s.t. } \Pb = \Lb(v^P(\theta))
\end{align*}
This estimator is often very difficult to compute. In addition to the
difficulty of repeatedly solving the dynamic programming problem for
each player, we must find all equilibria for each $\theta$ at which we
evaluate the constraint. In general, there multiple equilibria cannot
be ruled out theoretically. There is also not any very easy method to
compute all equilibria. As in the single agent case, we could perform
two or three step maximum likelihood if there are some parameters that
can be estimated in a first step without solving the full model.
\subsubsection{Pseudo likelihood}
As in the single agent case, we can also estimate $\theta$ using
(nested) pseudo maximum likelihood. Given an initial consistent
estimate of $\Pb$, say $\hat{\Pb}$. The pseudo maximum likelihood
estimate is
\begin{align*}
\hat{\theta}^{PMLE}_{(0)} = \argmax_{\theta \in \Theta} \mathcal{L}(\theta,\hat{\Pb})
\end{align*}
As above, we can repeat this process to get an iterated pseudo
likelihood estimator. Let
\[ \hat{\Pb}_{(1)} = \Lb\left(v^{\hat{\Pb}}(\hat{\theta}^{PMLE}_{(0)})
\right). \]
Given that $\hat{P}$ is consistent, $\hat{\theta}^{PMLE}_{(0)}$ will
be as well. Then $\hat{\Pb}_{(1)}$ is also consistent. We can then define
\begin{align*}
\hat{\theta}^{PMLE}_{(1)} = \argmax_{\theta \in \Theta} \mathcal{L}(\theta,\hat{\Pb}_{(1)}).
\end{align*}
Repeating this $k$ times, we get the $k$-step pseudo maximum likelihood
estimate, $\hat{\theta}^{PMLE}_{(k)}$. We can repeat this process to
convergence. Unfortunately, this limit need not be the full maximum
likelihood estimate. It only needs to solve
\begin{align*}
\hat{\theta}^{PMLE}_{(\infty)} = \argmax_{\theta \in \Theta}
\mathcal{L}(\theta,\hat{\Pb}_{(\infty)}) \text{ s.t. }
\hat{\Pb}_{(\infty)} =
\Lb(v^{\hat{\Pb}_{(\infty)}}(\hat{\theta}^{PMLE}_{(\infty)} ) ).
\end{align*}
To get the maximum likelihood estimate, we must take the maximum of
all the limit points of iterated pseudo maximum likelihood
estimates. \cite{am2007} call this the nested pseudo likelihood
estimator.
\subsubsection{Asymptotic distribution of likelihood estimators}
Unlike in the single agent case, two-step maximum likelihood and
pseudo maximum likelihood do not have the same asymptotic
distribution. Pseudo maximum likelihood, iterated pseudo likelihood,
and nested pseudo likelihood also have different distributions. To
simplify the asymptotics, we will look at the case where $T$ and $N$
are ﬁxed, $M \to \infty$, and observations across markets are
independent. The analysis can easily be adapted to other cases.
Let’s begin by looking at the pseudo maximum likelihood estimator,
$\hat{\theta}^{PLE}_{(0)}$. Taking the usual mean value expansion of
the ﬁrst order condition, we have
\begin{align*}
0 = & \nabla_\theta \mathcal{L}(\hat{\theta}^{PLE}_{(0)},\hat{\Pb}) \\
= & \nabla_\theta \mathcal{L}(\theta_0,\Pb_0) +
\nabla^2_{\theta,\theta} \mathcal{L} (\hat{\theta}^{PLE}_{(0)} - \theta_0) +
\nabla^2_{\theta,P } \mathcal{L} (\hat{\Pb}-\Pb_0) + o_p(M^{-1/2}) \\
\sqrt{M} (\hat{\theta}^{PLE}_{(0)} - \theta_0) = & - \Omega_{\theta,\theta} \left(
\sqrt{M} \nabla_\theta \mathcal{L} + \Omega_{\theta,P} \sqrt{M}
(\hat{\Pb}-\Pb_0) \right)+ o_P(1).
\end{align*}
Note that if we knew $\Pb_0$, we could maximize the likelihood respect
to $\theta$ given the known $\Pb$ to obtain an infeasible $\hat{\theta}^{IMLE}$
maximum liklehood estimate.
\[ \hat{\theta}^{IMLE} = \argmax_{\theta \in \Theta}
\mathcal{L}(\theta,\Pb_0) \]
This infeasible estimate would have a similar asymptotic expansion as $\hat{\theta}^{PLE}_{(0)}$, but without the
second term,
\[ \sqrt{M} (\hat{\theta}^{IMLE} - \theta_0) = -
\Omega_{\theta,\theta} \sqrt{M} \nabla_\theta \mathcal{L}. \]
Let’s assume that
\[ \sqrt{M} \nabla_\theta \mathcal{L} \indist
N(0,\Omega_{\theta,\theta}) \] Since $\hat{\theta}^{IMLE}$ is the
maximum likelihood estimate of $\theta$ when $\Pb$ is known (and the
constraint that $\Pb$ must correspond to equilibrium choice
probabilities given $\theta$ has not been imposed), it is efﬁcient
(among estimates that do not impose the constraint).
$\hat{\theta}^{PLE}_{(0)}$ is another consistent estimator. As usual,
the difference between an efﬁcient estimate and another estimate must
be uncorrelated with the efﬁcient estimate (If not, we could construct
a more efﬁcient estimate). Therefore, if we assume that
\[ \sqrt{M} (\hat{\Pb} - \Pb_0) \indist N(0,\Sigma_0) \]
then
\[ \sqrt{M} (\hat{\theta}^{PLE}_{(0)} - \theta_0) \indist N\left(0,
\Omega_{\theta,\theta}^{-1} + \Omega_{\theta,\theta}^{-1}
\Omega_{\theta,P} \Sigma \Omega_{\theta,P}'
\Omega_{\theta,\theta}^{-1} \right)
\]
From this, we see that $\hat{\theta}^{PLE}_{(0)}$ has variance equal
to that of the infeasible MLE , $\Omega_{\theta,\theta}^{-1}$, plus an
additional term due the estimation of $\Pb$.
From the above, we can easily get an expression for the asymptotic
distribution of iterated pseudo likelihood. We instantly get that
\begin{align}
\sqrt{M}(\hat{\theta}^{PLE}_{(K)} - \theta_0) \indist N\left(0,
\Omega_{\theta,\theta}^{-1} + \Omega_{\theta,\theta}^{-1}
\Omega_{\theta,P} \Sigma_k \Omega_{\theta,P}'
\Omega_{\theta,\theta}^{-1} \right) \label{e:vk}
\end{align}
where $\Sigma_k$ is the asymptotic variance of $\hat{\Pb}_k$. We can
compute $\Sigma_k$ using the delta method. For $k=1$ we have
\begin{align*}
\sqrt{M} (\hat{\Pb}_{(1)} - \Pb_0) = \sqrt{M} \left(
\Lb(\hat{\theta}^{PLE}_{(0)},
\hat{P}_0) - \Lb(\theta_0, \Pb_0) \right) \\
= & \sqrt{M} \Lb_\theta (\hat{\theta}^{PLE}_{(0)} - \Pb_0) + \Lb_P
(\hat{\theta}^{PLE}_{(0)} - \Pb_0) +
o_P(1) \\
\indist & N\left( 0, \Lb_\theta \Omega_{\theta,\theta}^{-1}
\Lb_{\theta}' + \left( \Lb_\theta \Omega_{\theta,\theta}^{-1}
\Omega_{\theta,P} + \Lb_P\right) \Sigma_0 \left( \Lb_\theta
\Omega_{\theta,\theta}^{-1} \Omega_{\theta,P} + \Lb_P\right)'
\right)
\end{align*}
We can plug this result into \ref{e:vk} to get the asymptotic variance
of $\hat{\theta}^{PLE}_{(k)}$. However, there does not seem to be any
nice way to simplify this expression. It is generally not even
possible to show whether iterating decreases or increases the
variance.
The nest pseudo likelihood estimate satisfies both
$\Lambda(\hat{\theta}^{NPL}, \hat{\Pb}^{NPL}) = \Pb^{NPL}$ and the
first order condition for maximizing the likelihood. Combining these
two conditions and rearranging, we can show that
\begin{align*}
\sqrt{M} (\hat{\theta}^{NPL} - \theta_0) \indist & N\left( 0, \left(
\Omega_{\theta,\theta} + \Omega_{\theta,P} (I - \Lb_P)^{-1}
\Lb_\theta \right)^{-1} \Omega_{\theta,\theta} {\left(
\Omega_{\theta,\theta} + \Omega_{\theta,P} (I - \Lb_P)^{-1}
\Lb_\theta \right)^{-1}}' \right)
\end{align*}
\cite{am2007} show that if the eigenvalues of $\Lb_P$ are between $0$
and $1$, then this variance is smaller than
$\Omega_{\theta,\theta}^{-1}$. However, there no particular reason
for this condition to hold. \cite{ks2009} give some examples where the
eigenvalues of $Lb_P$ are outside of this range.
\paragraph{Comparison with single-agent model.} In the single agent
case, the pseudo likelihood estimate and iterated versions of it all
have the same asymptotic distribution. The reason is the
$\Omega_{\theta,P} = 0$ in single agent models. The reason this
derivative is zero is that it depends on the derivative of $v$ with
respec to $P$. Since $P$ maximizes $v$, this derivative is zero. In a
game, $v_i^P$ still has zero derivative with respect to $P_i$, but it
doees not have zero derivative with respect to
$P_{-i}$. Therefore, $\Omega_{\theta, P} \neq 0$ in dynamic games.
% \cite{rust1994} nested fixed point: definition, asymptotic distribution.
% \cite{hotzMiller1993}
% \section{Applications}
% \cite{rust1986}: Harold Zurcher bus engine replacement.
% \section{}
% Use \cite{am2010} notation and structure?
% \cite{pakes1994}: continuous controls and when Euler equations are
% possible.
\newpage
\bibliographystyle{econometrica}
\bibliography{../628}
\end{document}