[proofplan]
We prove the result by comparing second-order Taylor expansions at the relevant points. The expansion of $g$ at $a$ gives a linear part $Dg_a(u)$ and a quadratic part
\begin{align*}
\frac{1}{2}D^2g_a(u,u)
\end{align*}
; substituting this into the expansion of $f$ at $g(a)$ produces the quadratic term for $f \circ g$. The mixed bilinear formula follows by identifying the [bilinear form](/page/Bilinear%20Form) whose diagonal part gives the resulting quadratic approximation.
[/proofplan]
[step:Write the second-order expansions for $g$ and $f$]
Set $b := g(a)$. Define the linear maps
\begin{align*}
L := Dg_a: \mathbb{R}^m \to \mathbb{R}^n
\end{align*}
and
\begin{align*}
M := Df_b: \mathbb{R}^n \to \mathbb{R}^p.
\end{align*}
Define the bilinear maps
\begin{align*}
A := D^2g_a: \mathbb{R}^m \times \mathbb{R}^m \to \mathbb{R}^n
\end{align*}
and
\begin{align*}
B := D^2f_b: \mathbb{R}^n \times \mathbb{R}^n \to \mathbb{R}^p.
\end{align*}
Since $U$ is open and $a \in U$, choose $\rho > 0$ such that the open ball
\begin{align*}
B(x,r) := \{y \in \mathbb{R}^m : |y-x| < r\}
\end{align*}
satisfies $B(a,\rho) \subset U$. By second differentiability of $g$ at $a$, there is a remainder map $r_g: B(0,\rho) \to \mathbb{R}^n$ such that
\begin{align*}
g(a+u)=b+L(u)+\frac{1}{2}A(u,u)+r_g(u)
\end{align*}
for all $u \in B(0,\rho)$, and
\begin{align*}
\frac{|r_g(u)|}{|u|^2}\to 0
\end{align*}
as $u \to 0$.
Since $V$ is open and $b \in V$, define $W := \{y \in \mathbb{R}^n: b+y \in V\}$. By second differentiability of $f$ at $b$, there is a remainder map $r_f: W \to \mathbb{R}^p$ such that
\begin{align*}
f(b+y)=f(b)+M(y)+\frac{1}{2}B(y,y)+r_f(y)
\end{align*}
for all $y \in W$, and
\begin{align*}
\frac{|r_f(y)|}{|y|^2}\to 0
\end{align*}
as $y \to 0$.
[/step]
[step:Substitute the expansion of $g$ into the expansion of $f$]
For $u \in B(0,\rho)$, define
\begin{align*}
y(u):=g(a+u)-b.
\end{align*}
Then $y: B(0,\rho) \to \mathbb{R}^n$ satisfies
\begin{align*}
y(u)=L(u)+\frac{1}{2}A(u,u)+r_g(u).
\end{align*}
Since $L$ is linear and $A$ is bilinear, there is $\delta \in (0,\rho)$ and a constant
\begin{align*}
C_y := \|L\|_{\mathrm{op}}+\frac{1}{2}\|A\|_{\mathrm{op}}+1
\end{align*}
such that
\begin{align*}
|y(u)| \le C_y |u|
\end{align*}
whenever $0<|u|<\delta$. Indeed, after reducing $\delta$ if necessary, $|r_g(u)| \le |u|^2$ for $0<|u|<\delta$, and also $|u|<1$.
Applying the expansion of $f$ to $y(u)$ gives
\begin{align*}
(f\circ g)(a+u)=f(b)+M(y(u))+\frac{1}{2}B(y(u),y(u))+r_f(y(u)).
\end{align*}
[guided]
The goal is to isolate the terms of order $|u|$ and $|u|^2$ in $(f\circ g)(a+u)$. We first name the displacement of $g(a+u)$ from $g(a)$:
\begin{align*}
y(u):=g(a+u)-b.
\end{align*}
The second-order expansion of $g$ says
\begin{align*}
y(u)=L(u)+\frac{1}{2}A(u,u)+r_g(u),
\end{align*}
where $r_g(u)=o(|u|^2)$ as $u \to 0$.
Before substituting into the expansion of $f$, we need one size estimate: $y(u)=O(|u|)$. Since $L$ is linear, $|L(u)|\le \|L\|_{\mathrm{op}}|u|$. Since $A$ is bilinear, $|A(u,u)|\le \|A\|_{\mathrm{op}}|u|^2$. Since $r_g(u)=o(|u|^2)$, after restricting to $0<|u|<\delta$ we have $|r_g(u)|\le |u|^2$. Taking $\delta \le 1$, we obtain
\begin{align*}
|y(u)| \le \|L\|_{\mathrm{op}}|u|+\frac{1}{2}\|A\|_{\mathrm{op}}|u|^2+|u|^2 \le C_y|u|,
\end{align*}
where
\begin{align*}
C_y := \|L\|_{\mathrm{op}}+\frac{1}{2}\|A\|_{\mathrm{op}}+1.
\end{align*}
Now $b+y(u)=g(a+u)\in V$, so the expansion of $f$ at $b$ applies to this $y(u)$. Therefore
\begin{align*}
(f\circ g)(a+u)=f(b)+M(y(u))+\frac{1}{2}B(y(u),y(u))+r_f(y(u)).
\end{align*}
This is the point of the proof: every term on the right can now be sorted by its order in $u$.
[/guided]
[/step]
[step:Identify the first-order and second-order terms]
Define the error map $z: B(0,\delta) \to \mathbb{R}^n$ by
\begin{align*}
z(u):=\frac{1}{2}A(u,u)+r_g(u).
\end{align*}
Then $y(u)=L(u)+z(u)$ and $z(u)=O(|u|^2)$ as $u \to 0$.
By linearity of $M$,
\begin{align*}
M(y(u))=M(L(u))+\frac{1}{2}M(A(u,u))+M(r_g(u)).
\end{align*}
Since $M$ is bounded and $r_g(u)=o(|u|^2)$, we have
\begin{align*}
M(r_g(u))=o(|u|^2).
\end{align*}
By bilinearity of $B$,
\begin{align*}
B(y(u),y(u))=B(L(u),L(u))+B(L(u),z(u))+B(z(u),L(u))+B(z(u),z(u)).
\end{align*}
Since $L(u)=O(|u|)$ and $z(u)=O(|u|^2)$, the last three terms are $o(|u|^2)$. Hence
\begin{align*}
B(y(u),y(u))=B(L(u),L(u))+o(|u|^2).
\end{align*}
Finally, because $r_f(y)=o(|y|^2)$ as $y \to 0$ and $|y(u)| \le C_y|u|$, we have
\begin{align*}
r_f(y(u))=o(|u|^2).
\end{align*}
Combining these estimates yields
\begin{align*}
(f\circ g)(a+u)=f(b)+M(L(u))+\frac{1}{2}\Bigl(B(L(u),L(u))+M(A(u,u))\Bigr)+o(|u|^2).
\end{align*}
[/step]
[step:Read off the second derivative of the composition]
Define the [linear map](/page/Linear%20Map)
\begin{align*}
T: \mathbb{R}^m \to \mathbb{R}^p
\end{align*}
by
\begin{align*}
T(h):=M(L(h)).
\end{align*}
Define the bilinear map
\begin{align*}
Q: \mathbb{R}^m \times \mathbb{R}^m \to \mathbb{R}^p
\end{align*}
by
\begin{align*}
Q(h,k):=B(L(h),L(k))+M(A(h,k)).
\end{align*}
The expansion from the previous step becomes
\begin{align*}
(f\circ g)(a+u)=(f\circ g)(a)+T(u)+\frac{1}{2}Q(u,u)+o(|u|^2).
\end{align*}
By the defining second-order Fréchet expansion and uniqueness of the linear and bilinear terms in that expansion, $f\circ g$ is twice differentiable at $a$, with
\begin{align*}
D(f\circ g)_a=T
\end{align*}
and
\begin{align*}
D^2(f\circ g)_a=Q.
\end{align*}
Therefore, for every $h,k \in \mathbb{R}^m$,
\begin{align*}
D^2(f\circ g)_a(h,k)=D^2 f_{g(a)}(Dg_a(h),Dg_a(k))+Df_{g(a)}(D^2g_a(h,k)).
\end{align*}
This is the desired second-order chain rule.
[/step]