\documentclass[a4paper,10pt]{article}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{amsthm}
\usepackage{setspace}
\usepackage{harvard}
\usepackage{aer}
\usepackage{fullpage}
\usepackage{hyperref}
\usepackage{graphicx}
\newcommand{\indep}{\perp\!\!\!\perp}
\newcommand{\argmax}{\operatornamewithlimits{arg\,max}}
\newcommand{\argmin}{\operatornamewithlimits{arg\,min}}
\newcommand{\plim}{\operatornamewithlimits{plim}}
\newcommand{\citefull}[1]{\citename{#1} \citeyear{#1}}
\newcommand{\citeparagraph}[1]{\medskip\noindent\textbf{{\citename{#1} \citeyear{#1}}}}
\newcommand{\cov}{\text{Cov}}
\newcommand{\var}{\text{Var}}
\newcommand{\rank}{\text{rank}}
%\newcommand{\det}{\text{det}}
\def\inprobLOW{\rightarrow_p}
\def\inprobHIGH{\,{\buildrel p \over \rightarrow}\,}
\def\as{\,{\buildrel a.s. \over \rightarrow}\,}
\def\asu{\,{\buildrel a.s.u. \over \rightarrow}\,}
\def\inprob{\,{\inprobHIGH}\,}
\def\indist{\,{\buildrel d \over \rightarrow}\,}
% defined environments
\newtheorem{thm}{Theorem} %[section]
\newtheorem{cor}[thm]{Corollary}
\newtheorem{lem}[thm]{Lemma}
\newtheorem{prop}[thm]{Proposition}
\theoremstyle{remark}
\newtheorem{rem}[thm]{Remark}
\newtheorem{ex}[thm]{Example}
\theoremstyle{definition}
\newtheorem{defn}[thm]{Definition}
\title{14.385 Recitation 4}
\author{Paul Schrimpf}
\begin{document}
\maketitle
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{MLE as efficient GMM}
See lecture 5 notes and Newey and McFadden page 2163+.
%% \begin{defn}
%% $\hat{\theta}$ is \emph{asymptotically efficient} relative to
%% $\tilde{\theta}$ if $P(|\hat{\theta}-\theta_0| \geq K/\sqrt{n}) >
%% P(|\tilde{\theta} - \theta_0| \geq K/\sqrt{n})$ for all $n$ large
%% enough.
%% \end{defn}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Bootstrap}
See MacKinnon's slides for some comparison of bootstrap methods.
See website for bootstrap code in R and Stata.
\subsection{Examples of Failure}
Recall the theorem for the consistency of the bootstrap:
\begin{thm}
$G_n(\cdot,F_n)$ is consistent if for any $\epsilon>0$ and $F_0 \in mathcal{F}$:
\begin{enumerate}
\item[(i)] $\lim P(\rho(F_n,F_0) > \epsilon) = 0$
\item[(ii)] $G_\infty(\tau,F)$ is continuous in $\tau$ for each $F$
\item[(iii)] For any $\tau$ and $\{H_n\}$ such that $\lim
\rho(H_n,F_0) = 0$, we have $G_n(\tau,H_n) \rightarrow
G_\infty(\tau,F_0)$
\end{enumerate}
\end{thm}
Interesting examples where the bootstrap fails usually involve
condition (iii) being violated. Horowitz gives two simple examples
where the bootstrap is inconsistent:\footnote{Note that in both of
these examples, we only show that the above theorem is inapplicable.
We would have to do more to show that the bootstrap is inconsistent.
}
\begin{itemize}
\item \emph{Maximum of a sample:} we talked about this in class.
Horowitz shows directly why it fails. It is also easy to see that
it does not meet condition (iii). Take $H_n$ to be the same as
$F_0$, but truncated to have support $[0,\theta_0 -1/n]$. Then
$\rho(H_n,F_0) \leq F_0(\theta_0) - F_0(\theta_0 - 1/n) \rightarrow
0$, where $\rho$ is the $L^\infty$ metric. However, $T_n(H_n) \leq
-1$, so $G_n(\tau,H_n)$ cannot converge to $G_\infty(\tau,F_0)$ for
$\tau \in (-1,0)$.
\item \emph{Parameter on the Boundary:} suppose we know the population
mean is $\mu \geq 0$. Our estimator is $m_n = \bar{X}
\mathbf{1}(\bar{X}>0)$. The statistic is $T_n = \sqrt{n}(m_n -
\mu)$. Suppose $F_0$ has $\mu=0$. Then $G_\infty(\cdot,F_0)$ is a
normal censored at 0. Take $H_n$ equal to $F_0$ with the mean shifted by
$1/\sqrt{n}$. Then $G_n(\cdot,H_n)$ converges to a normal censored
at $-1$.
\end{itemize}
In both of these examples, the limiting distribution depends on how
$F_n$ approaches $F_0$. Two other common situations in econometrics
with this feature are:
\begin{itemize}
\item Weak instruments, where if $\pi_n = C/\sqrt{n}$, $\hat{\beta} -
\beta_0$ converges to a nonnormal limit distribution that depends on
$C$
\item Unit roots: $y_t = \rho y_{t-1} + e_t$. If $\rho < 1$, then
$\hat{\rho}$ is asymptotically normal. If $\rho=1$, $\hat{\rho}$
has a nonstandard distribution. If $\rho_T = 1 - a/\sqrt{T}$,
$\hat{\rho}$ has another nonstandard distribution that depends on
$a$.
\end{itemize}
\subsection{Subsampling}
Subsampling works in some of the situations where the bootstrap
fails. Let $\{X_i\}_{i=1}^n$ be our data with true distribution
$F_0$. We are interested in a statistic $T_n$ for which $\tau_n(T_n -
\theta_0)$ has distribution function $G_n(\cdot,F_0)$. Let
$\{Y_k\}_{k=1}^{N_n}$ be the $N_n = n \choose b$ subsets of $\{X_i\}$
of size $b$. Let $S_{n,k}$ be the value of $T$ computed using subset
$Y_k$. The subsample approximation to $G_n(\cdot,F_0)$ is:
\[ L_n(\tau) = N_n^{-1} \sum_{k=1}^{N_n} \mathbf{1}[\tau_b(S_{n,k} -
T_n) \leq \tau \]
Politis and Romano (1994) give conditions for $L_n$ to be consistent:
\begin{thm}
Assume $G_n(\cdot,F_0) \leadsto G_\infty(\cdot,F_0)$. Also assume
$\frac{\tau_b}{\tau_n} \rightarrow 0$, $b \rightarrow \infty$ and
$\frac{b}{n} \rightarrow 0$. Then
\begin{enumerate}
\item Wherever $G_\infty(\cdot,F_0)$ is continuous, $L_n(x) \inprob
G_\infty(x,F_0)$
\item If $G_\infty(\cdot,F_0)$ is continuous, then $\sup |L_n(x) -
G_\infty(x,F_0)| \inprob 0$
\end{enumerate}
\end{thm}
\begin{proof}
Rewrite
\[ L_n(\tau) = N_n^{-1} \sum_{k=1}^{N_n} \mathbf{1}[\tau_b(S_{n,k} -
\theta_0) + \tau_b(\theta_0 - T_n) \leq \tau \]
Convince yourself that if
\[ U_n(\tau) = N_n^{-1} \sum_{k=1}^{N_n} \mathbf{1}[\tau_b(S_{n,k} -
\theta_0) ] \]
converges to $G_\infty(\cdot,F_0)$ and $G_\infty(\cdot,F_0)$ is
continuous, then $L_n \inprob G_\infty$ too.
$U_n$ is a U-statistic of degree $b$. Hoeffding showed that
U-statistics of degree $r$ satisfy $P(U - EU \geq t) \leq
e^{-2\frac{n}{r} t^2}$. Here $EU_n = G_b(\cdot,F_0)$ since
$G_b(\cdot,F_0)$ is the distribution of $\tau_b(S_{n,k} -
\theta_0)$. Thus we have
\[ P(U_n(x) - G_b(x,F_0)\geq t) \leq e^{-2 \frac{n}{b} t^2}
\rightarrow 0 \]
The second part of the theorem follows from Polya's lemma, just like
in the proof of the bootstrap.
\end{proof}
Politis and Romano prove a similar theorem for dependent data. The
only difference is that the subsamples must be continguous blocks of
length $b$.
Remarks:
\begin{itemize}
\item The conditions for subsampling to be consistent are very weak.
They are definitely met in the examples of the maximimum, parameter
on a boundary, and unit root above. They may be met in the weak
instruments case.
\item Subsampling converges at best at an $n^{-1/3}$ rate
\item The subsampling theorem above is about pointwise convergence.
Subsampling often does not converge uniformly, i.e.\
\[ \lim_{n \rightarrow \infty} \sup_{F \in \mathcal{F}} P(|L_n -
G_\infty(\cdot,F)|<\epsilon) \neq 0 \]
In practice, this means that subsampling can give poor results in
finite samples for some values of the parameters. Mikusheva (2007)
talks about how subsampling is not uniformly valid in AR models.
Andrews and coauthors have recent papers about how subsampling is
not uniformly consistent for parameters on the boundary and weak
instruments.
\end{itemize}
\end{document}