[guided]Fix $t \in \{1,\dots,n-1\}$ and define the observation sigma-algebras
\begin{align*}
\mathcal{F}_t &:= \sigma(Y_1,\dots,Y_t), \\
\mathcal{F}_n &:= \sigma(Y_1,\dots,Y_n).
\end{align*}
The forward Kalman filter gives a conditional Gaussian law for $X_t$ given $\mathcal{F}_t$ and for the one-step prediction $X_{t+1}$ given $\mathcal{F}_t$. Because the transition from $X_t$ to $X_{t+1}$ is linear Gaussian, the pair
\begin{align*}
Z_t :=
\begin{pmatrix}
X_t \\
X_{t+1}
\end{pmatrix}
\end{align*}
is also conditionally Gaussian given $\mathcal{F}_t$. Its conditional mean is
\begin{align*}
m_t :=
\begin{pmatrix}
a_{t|t} \\
a_{t+1|t}
\end{pmatrix},
\end{align*}
and its conditional covariance is
\begin{align*}
\Sigma_t :=
\begin{pmatrix}
P_{t|t} & \operatorname{Cov}(X_t,X_{t+1}\mid \mathcal{F}_t) \\
\operatorname{Cov}(X_{t+1},X_t\mid \mathcal{F}_t) & P_{t+1|t}
\end{pmatrix}.
\end{align*}
By the hypothesis on the linear part of the state transition,
\begin{align*}
\operatorname{Cov}(X_t,X_{t+1}\mid \mathcal{F}_t)=P_{t|t}T_t^\top,
\end{align*}
and therefore
\begin{align*}
\Sigma_t =
\begin{pmatrix}
P_{t|t} & P_{t|t}T_t^\top \\
T_tP_{t|t} & P_{t+1|t}
\end{pmatrix}.
\end{align*}
Now apply the conditional Gaussian regression formula to the block vector $(X_t,X_{t+1})$ conditional on $\mathcal{F}_t$. The required invertibility hypothesis is precisely the assumption that $P_{t+1|t}$ is invertible. Hence
\begin{align*}
\mathbb{E}[X_t \mid X_{t+1},\mathcal{F}_t]
&= a_{t|t}+\operatorname{Cov}(X_t,X_{t+1}\mid \mathcal{F}_t)P_{t+1|t}^{-1}(X_{t+1}-a_{t+1|t}) \\
&= a_{t|t}+P_{t|t}T_t^\top P_{t+1|t}^{-1}(X_{t+1}-a_{t+1|t}).
\end{align*}
Since the smoothing gain is defined by
\begin{align*}
J_t := P_{t|t}T_t^\top P_{t+1|t}^{-1},
\end{align*}
this becomes
\begin{align*}
\mathbb{E}[X_t \mid X_{t+1},\mathcal{F}_t]
= a_{t|t}+J_t(X_{t+1}-a_{t+1|t}).
\end{align*}
The same Gaussian regression formula gives the conditional covariance
\begin{align*}
\operatorname{Cov}(X_t \mid X_{t+1},\mathcal{F}_t)
&= P_{t|t}
-\operatorname{Cov}(X_t,X_{t+1}\mid \mathcal{F}_t)P_{t+1|t}^{-1}
\operatorname{Cov}(X_{t+1},X_t\mid \mathcal{F}_t) \\
&= P_{t|t}-P_{t|t}T_t^\top P_{t+1|t}^{-1}T_tP_{t|t}.
\end{align*}
Using $J_t=P_{t|t}T_t^\top P_{t+1|t}^{-1}$ and the symmetry of $P_{t+1|t}$, this is
\begin{align*}
\operatorname{Cov}(X_t \mid X_{t+1},\mathcal{F}_t)
= P_{t|t}-J_tP_{t+1|t}J_t^\top.
\end{align*}[/guided]