[proofplan]
The proof rewrites the Dickey-Fuller statistic as a ratio of two normalized functionals of the random walk. The numerator is handled by the identity $X_t^2 - X_{t-1}^2 = 2X_{t-1}Z_t + Z_t^2$, which reduces it to the endpoint of the random walk and the sample second moment of the innovations. The denominator follows from the functional central limit theorem and a Riemann-sum continuous mapping argument, while the residual variance estimator is shown to converge to $\sigma^2$. Slutsky's theorem then gives the asserted ratio.
[/proofplan]
[step:Represent the random walk by its normalized partial-sum process]
Let $D[0,1]$ denote the space of real-valued càdlàg functions on $[0,1]$, equipped with the Skorokhod topology. For each $T \in \mathbb{N}$, define the normalized càdlàg process
\begin{align*}
Y_T : [0,1] &\to \mathbb{R} \\
r &\mapsto \frac{X_{\lfloor Tr \rfloor}}{\sigma\sqrt{T}}.
\end{align*}
Also define the innovation partial-sum process
\begin{align*}
S_T : [0,1] &\to \mathbb{R} \\
r &\mapsto \frac{1}{\sigma\sqrt{T}}\sum_{t=1}^{\lfloor Tr \rfloor} Z_t.
\end{align*}
Since $X_{\lfloor Tr \rfloor}=X_0+\sum_{t=1}^{\lfloor Tr \rfloor}Z_t$, we have
\begin{align*}
Y_T(r) = \frac{X_0}{\sigma\sqrt{T}} + S_T(r), \qquad 0 \leq r \leq 1.
\end{align*}
By the functional central limit theorem (citing a result not yet in the wiki: Donsker's invariance principle), $S_T \xrightarrow{d} W$ in $D[0,1]$ with the Skorokhod topology. Since $X_0/\sqrt{T}\xrightarrow{\mathbb{P}}0$, the deterministic-in-$r$ perturbation $X_0/(\sigma\sqrt{T})$ converges to $0$ in probability in the uniform norm. Hence, by Slutsky's theorem in $D[0,1]$ (citing a result not yet in the wiki: Slutsky theorem in metric spaces),
\begin{align*}
Y_T \xrightarrow{d} W
\end{align*}
in $D[0,1]$.
[/step]
[step:Convert the denominator into the Brownian quadratic functional]
Define the normalized quadratic variation-type denominator
\begin{align*}
D_T := \frac{1}{T^2}\sum_{t=1}^{T}X_{t-1}^2.
\end{align*}
Using $X_{t-1}=\sigma\sqrt{T}\,Y_T((t-1)/T)$, we get
\begin{align*}
D_T
= \sigma^2 \frac{1}{T}\sum_{t=1}^{T}Y_T\left(\frac{t-1}{T}\right)^2.
\end{align*}
For each $T \in \mathbb{N}$, define the Riemann-sum functional
\begin{align*}
F_T : D[0,1] &\to \mathbb{R} \\
f &\mapsto \frac{1}{T}\sum_{t=1}^{T} f\left(\frac{t-1}{T}\right)^2.
\end{align*}
Also define
\begin{align*}
\Phi : C[0,1] &\to \mathbb{R} \\
f &\mapsto \int_0^1 f(r)^2\, d\mathcal{L}^1(r).
\end{align*}
If $f_T \in D[0,1]$ and $f \in C[0,1]$ satisfy $\|f_T-f\|_\infty \to 0$, then
\begin{align*}
\left|F_T(f_T)-F_T(f)\right|
&\leq \|f_T-f\|_\infty\left(\|f_T\|_\infty+\|f\|_\infty\right) \to 0,
\end{align*}
because uniform convergence implies $\sup_T \|f_T\|_\infty < \infty$. Since $f^2$ is continuous on the compact interval $[0,1]$, the left-endpoint Riemann sums satisfy
\begin{align*}
F_T(f) \to \int_0^1 f(r)^2\, d\mathcal{L}^1(r)=\Phi(f).
\end{align*}
Thus $F_T(f_T) \to \Phi(f)$ whenever $f_T \to f$ uniformly and $f$ is continuous. Brownian paths are continuous almost surely, and convergence in the Skorokhod topology to a continuous limit is equivalent to uniform convergence along the usual continuous-mapping argument. Therefore, applying the extended continuous mapping theorem to $Y_T \xrightarrow{d} W$ gives
\begin{align*}
D_T=\sigma^2 F_T(Y_T) \xrightarrow{d} \sigma^2\int_0^1 W(r)^2\, d\mathcal{L}^1(r).
\end{align*}
Since Brownian motion is not identically zero on $[0,1]$ almost surely, the limiting integral is positive almost surely.
[/step]
[step:Rewrite the numerator using the square identity]
Define the normalized numerator
\begin{align*}
N_T := \frac{1}{T}\sum_{t=1}^{T}X_{t-1}\Delta X_t.
\end{align*}
Since $\Delta X_t=Z_t$, we have
\begin{align*}
X_t^2-X_{t-1}^2
&=(X_{t-1}+Z_t)^2-X_{t-1}^2 \\
&=2X_{t-1}Z_t+Z_t^2.
\end{align*}
Summing this identity from $t=1$ to $T$ gives
\begin{align*}
2\sum_{t=1}^{T}X_{t-1}Z_t
= X_T^2-X_0^2-\sum_{t=1}^{T}Z_t^2.
\end{align*}
Dividing by $T$ yields
\begin{align*}
N_T
= \frac{1}{2}\left(\frac{X_T^2}{T}-\frac{X_0^2}{T}-\frac{1}{T}\sum_{t=1}^{T}Z_t^2\right).
\end{align*}
From $Y_T(1)=X_T/(\sigma\sqrt{T})$ and $Y_T(1)\xrightarrow{d}W(1)$, we obtain
\begin{align*}
\frac{X_T^2}{T}\xrightarrow{d}\sigma^2 W(1)^2.
\end{align*}
The assumption $X_0/\sqrt{T}\xrightarrow{\mathbb{P}}0$ gives $X_0^2/T\xrightarrow{\mathbb{P}}0$. By the weak law of large numbers applied to the integrable random variables $Z_t^2$,
\begin{align*}
\frac{1}{T}\sum_{t=1}^{T}Z_t^2 \xrightarrow{\mathbb{P}} \mathbb{E}[Z_1^2]=\sigma^2.
\end{align*}
Applying Slutsky's theorem,
\begin{align*}
N_T \xrightarrow{d} \frac{\sigma^2}{2}\left(W(1)^2-1\right).
\end{align*}
[/step]
[step:Show that the residual variance estimates the innovation variance]
The ordinary least-squares residuals satisfy
\begin{align*}
\widehat{u}_{T,t}=Z_t-\widehat{\gamma}_T X_{t-1}, \qquad 1 \leq t \leq T.
\end{align*}
Therefore
\begin{align*}
\widehat{\sigma}_T^2
&= \frac{1}{T}\sum_{t=1}^{T}Z_t^2
-2\widehat{\gamma}_T\frac{1}{T}\sum_{t=1}^{T}X_{t-1}Z_t
+\widehat{\gamma}_T^2\frac{1}{T}\sum_{t=1}^{T}X_{t-1}^2.
\end{align*}
We use the notation $A_T=O_{\mathbb{P}}(a_T)$ for a sequence of real-valued random variables $(A_T)_{T\in\mathbb{N}}$ and positive deterministic numbers $(a_T)_{T\in\mathbb{N}}$ to mean that $(A_T/a_T)_{T\in\mathbb{N}}$ is tight: for every $\varepsilon>0$ there exist $M>0$ and $T_0\in\mathbb{N}$ such that $\mathbb{P}(|A_T/a_T|>M)<\varepsilon$ for all $T\geq T_0$.
Since
\begin{align*}
\widehat{\gamma}_T
= \frac{\sum_{t=1}^{T}X_{t-1}Z_t}{\sum_{t=1}^{T}X_{t-1}^2}
= \frac{T N_T}{T^2 D_T}
= \frac{N_T}{T D_T},
\end{align*}
and since $N_T=O_{\mathbb{P}}(1)$ while $D_T$ converges in distribution to a positive random variable, we have
\begin{align*}
\widehat{\gamma}_T = O_{\mathbb{P}}\left(\frac{1}{T}\right).
\end{align*}
Also,
\begin{align*}
\frac{1}{T}\sum_{t=1}^{T}X_{t-1}Z_t=N_T=O_{\mathbb{P}}(1),
\end{align*}
and
\begin{align*}
\frac{1}{T}\sum_{t=1}^{T}X_{t-1}^2 = T D_T = O_{\mathbb{P}}(T).
\end{align*}
Consequently,
\begin{align*}
2\widehat{\gamma}_T\frac{1}{T}\sum_{t=1}^{T}X_{t-1}Z_t \xrightarrow{\mathbb{P}}0,
\qquad
\widehat{\gamma}_T^2\frac{1}{T}\sum_{t=1}^{T}X_{t-1}^2 \xrightarrow{\mathbb{P}}0.
\end{align*}
Combining these estimates with the weak law of large numbers for $T^{-1}\sum_{t=1}^{T}Z_t^2$ gives
\begin{align*}
\widehat{\sigma}_T^2 \xrightarrow{\mathbb{P}} \sigma^2.
\end{align*}
Since $\sigma>0$, the continuous mapping theorem gives $\widehat{\sigma}_T\xrightarrow{\mathbb{P}}\sigma$.
[/step]
[step:Assemble the normalized ratio]
By the definition of $\tau_T$,
\begin{align*}
\tau_T
&= \frac{\widehat{\gamma}_T\left(\sum_{t=1}^{T}X_{t-1}^2\right)^{1/2}}{\widehat{\sigma}_T} \\
&= \frac{\sum_{t=1}^{T}X_{t-1}Z_t}{\widehat{\sigma}_T\left(\sum_{t=1}^{T}X_{t-1}^2\right)^{1/2}} \\
&= \frac{T^{-1}\sum_{t=1}^{T}X_{t-1}Z_t}
{\widehat{\sigma}_T\left(T^{-2}\sum_{t=1}^{T}X_{t-1}^2\right)^{1/2}} \\
&= \frac{N_T}{\widehat{\sigma}_T D_T^{1/2}}.
\end{align*}
The preceding arguments give joint convergence, not only marginal convergence. Indeed, $Y_T \xrightarrow{d} W$ in $D[0,1]$, the endpoint map $f\mapsto f(1)$ and the Riemann-sum functionals above are continuous at every continuous path, and
\begin{align*}
\frac{1}{T}\sum_{t=1}^{T}Z_t^2 \xrightarrow{\mathbb{P}} \sigma^2.
\end{align*}
Therefore the vector version of the continuous mapping theorem and Slutsky's theorem give
\begin{align*}
\left(N_T,D_T,\widehat{\sigma}_T\right)
\xrightarrow{d}
\left(
\frac{\sigma^2}{2}\left(W(1)^2-1\right),
\sigma^2\int_0^1 W(r)^2\, d\mathcal{L}^1(r),
\sigma
\right).
\end{align*}
Applying the continuous mapping theorem to the map $(a,b,c)\mapsto a/(c\sqrt{b})$, which is continuous on $\{(a,b,c)\in\mathbb{R}^3:b>0,c>0\}$, and using that the limiting denominator is positive almost surely, gives
\begin{align*}
\tau_T
\xrightarrow{d}
\frac{\frac{\sigma^2}{2}\left(W(1)^2-1\right)}
{\sigma\left(\sigma^2\int_0^1 W(r)^2\, d\mathcal{L}^1(r)\right)^{1/2}}
=
\frac{\frac{1}{2}\left(W(1)^2-1\right)}
{\left(\int_0^1 W(r)^2\, d\mathcal{L}^1(r)\right)^{1/2}}.
\end{align*}
This is the Dickey-Fuller limiting distribution for the no-intercept, no-trend unit-root regression.
[/step]