[proofplan]
We center the observations by subtracting the common mean and rewrite the deviations from the sample mean in terms of these centered variables. Expanding the sum of outer products gives an exact algebraic identity: the sample covariance numerator equals the centered second-moment sum minus $n$ times the outer product of the centered sample mean. We then compute expectations entrywise. Independence makes the covariance of the sample mean equal to $\Sigma/n$, so the expected numerator is $(n-1)\Sigma$, and division by $n-1$ gives the result.
[/proofplan]
[step:Center the observations and expand the covariance numerator]
For each $i \in \{1,\dots,n\}$, define the centered random vector $Y_i : \Omega \to \mathbb R^p$ by
\begin{align*}
Y_i &= X_i-\mu.
\end{align*}
Define the centered sample mean random vector $\bar Y : \Omega \to \mathbb R^p$ by
\begin{align*}
\bar Y &= \frac{1}{n}\sum_{i=1}^n Y_i.
\end{align*}
Then $\bar Y=\bar X-\mu$, and therefore $X_i-\bar X=Y_i-\bar Y$ for every $i$.
Using bilinearity of matrix multiplication and the identity $\sum_{i=1}^n Y_i=n\bar Y$, we compute
\begin{align*}
\sum_{i=1}^n (X_i-\bar X)(X_i-\bar X)^\top
&= \sum_{i=1}^n (Y_i-\bar Y)(Y_i-\bar Y)^\top \\
&= \sum_{i=1}^n Y_iY_i^\top
-\sum_{i=1}^n Y_i\bar Y^\top
-\sum_{i=1}^n \bar Y Y_i^\top
+\sum_{i=1}^n \bar Y\bar Y^\top \\
&= \sum_{i=1}^n Y_iY_i^\top
-(n\bar Y)\bar Y^\top
-\bar Y(n\bar Y)^\top
+n\bar Y\bar Y^\top \\
&= \sum_{i=1}^n Y_iY_i^\top
-n\bar Y\bar Y^\top.
\end{align*}
[guided]
The useful variables are the centered observations. For each $i \in \{1,\dots,n\}$, define
\begin{align*}
Y_i &= X_i-\mu,
\end{align*}
so $Y_i : \Omega \to \mathbb R^p$ is a random vector with mean $0$. Define the centered sample mean
\begin{align*}
\bar Y &= \frac{1}{n}\sum_{i=1}^n Y_i.
\end{align*}
Because $\bar X=\frac{1}{n}\sum_{i=1}^n X_i$, we have
\begin{align*}
\bar Y
&= \frac{1}{n}\sum_{i=1}^n (X_i-\mu) \\
&= \bar X-\mu.
\end{align*}
Thus each deviation from the sample mean satisfies
\begin{align*}
X_i-\bar X
&= (X_i-\mu)-(\bar X-\mu) \\
&= Y_i-\bar Y.
\end{align*}
Now expand the covariance numerator. The matrix identity $(a-b)(a-b)^\top=aa^\top-ab^\top-ba^\top+bb^\top$ applies to vectors $a,b \in \mathbb R^p$, so
\begin{align*}
\sum_{i=1}^n (X_i-\bar X)(X_i-\bar X)^\top
&= \sum_{i=1}^n (Y_i-\bar Y)(Y_i-\bar Y)^\top \\
&= \sum_{i=1}^n Y_iY_i^\top
-\sum_{i=1}^n Y_i\bar Y^\top
-\sum_{i=1}^n \bar Y Y_i^\top
+\sum_{i=1}^n \bar Y\bar Y^\top.
\end{align*}
Since $\bar Y=\frac{1}{n}\sum_{i=1}^n Y_i$, the identity $\sum_{i=1}^n Y_i=n\bar Y$ gives
\begin{align*}
\sum_{i=1}^n Y_i\bar Y^\top &= (n\bar Y)\bar Y^\top = n\bar Y\bar Y^\top,\\
\sum_{i=1}^n \bar Y Y_i^\top &= \bar Y(n\bar Y)^\top = n\bar Y\bar Y^\top,\\
\sum_{i=1}^n \bar Y\bar Y^\top &= n\bar Y\bar Y^\top.
\end{align*}
Combining these three terms leaves exactly one negative copy:
\begin{align*}
\sum_{i=1}^n (X_i-\bar X)(X_i-\bar X)^\top
&= \sum_{i=1}^n Y_iY_i^\top-n\bar Y\bar Y^\top.
\end{align*}
[/guided]
[/step]
[step:Compute the expected centered second-moment sum]
Since the $X_i$ are identically distributed and $Y_i=X_i-\mu$, each $Y_i$ has covariance matrix $\Sigma$:
\begin{align*}
\mathbb E[Y_iY_i^\top]=\Sigma.
\end{align*}
Linearity of expectation, applied entrywise to the matrix-valued random variables $Y_iY_i^\top$, gives
\begin{align*}
\mathbb E\left[\sum_{i=1}^n Y_iY_i^\top\right]
= \sum_{i=1}^n \mathbb E[Y_iY_i^\top]
= n\Sigma.
\end{align*}
[/step]
[step:Compute the covariance matrix of the sample mean]
We compute $\mathbb E[\bar Y\bar Y^\top]$ by expanding the double sum:
\begin{align*}
\mathbb E[\bar Y\bar Y^\top]
&= \mathbb E\left[
\left(\frac{1}{n}\sum_{i=1}^n Y_i\right)
\left(\frac{1}{n}\sum_{j=1}^n Y_j\right)^\top
\right] \\
&= \frac{1}{n^2}\sum_{i=1}^n\sum_{j=1}^n \mathbb E[Y_iY_j^\top].
\end{align*}
If $i=j$, then $\mathbb E[Y_iY_i^\top]=\Sigma$. If $i\ne j$, then independence of $Y_i$ and $Y_j$ and the identities $\mathbb E[Y_i]=\mathbb E[Y_j]=0$ imply, entrywise,
\begin{align*}
\mathbb E[(Y_i)_a(Y_j)_b]
= \mathbb E[(Y_i)_a]\mathbb E[(Y_j)_b]
= 0
\end{align*}
for all $a,b \in \{1,\dots,p\}$, hence $\mathbb E[Y_iY_j^\top]=0$. Therefore
\begin{align*}
\mathbb E[\bar Y\bar Y^\top]
&= \frac{1}{n^2}\sum_{i=1}^n \Sigma \\
&= \frac{1}{n}\Sigma.
\end{align*}
[guided]
The only non-mechanical point is where independence is used. Since
\begin{align*}
\bar Y=\frac{1}{n}\sum_{i=1}^n Y_i,
\end{align*}
we expand the outer product as a double sum:
\begin{align*}
\mathbb E[\bar Y\bar Y^\top]
&= \mathbb E\left[
\left(\frac{1}{n}\sum_{i=1}^n Y_i\right)
\left(\frac{1}{n}\sum_{j=1}^n Y_j\right)^\top
\right] \\
&= \frac{1}{n^2}\sum_{i=1}^n\sum_{j=1}^n \mathbb E[Y_iY_j^\top].
\end{align*}
This equality is entrywise: for each pair of coordinates $a,b \in \{1,\dots,p\}$, the $(a,b)$ entry is the expectation of
\begin{align*}
\frac{1}{n^2}\sum_{i=1}^n\sum_{j=1}^n (Y_i)_a(Y_j)_b.
\end{align*}
When $i=j$, the definition of $\Sigma$ gives
\begin{align*}
\mathbb E[Y_iY_i^\top]=\Sigma.
\end{align*}
When $i\ne j$, the random vectors $Y_i=X_i-\mu$ and $Y_j=X_j-\mu$ are independent because they are [measurable functions](/page/Measurable%20Functions) of the independent random vectors $X_i$ and $X_j$. Thus their scalar coordinate random variables $(Y_i)_a$ and $(Y_j)_b$ are independent. Since each $Y_i$ is centered, $\mathbb E[(Y_i)_a]=0$ and $\mathbb E[(Y_j)_b]=0$. Hence
\begin{align*}
\mathbb E[(Y_i)_a(Y_j)_b]
= \mathbb E[(Y_i)_a]\mathbb E[(Y_j)_b]
= 0.
\end{align*}
Because this holds for every coordinate pair $(a,b)$, we have
\begin{align*}
\mathbb E[Y_iY_j^\top]=0
\end{align*}
whenever $i\ne j$.
Therefore only the $n$ diagonal terms in the double sum remain:
\begin{align*}
\mathbb E[\bar Y\bar Y^\top]
&= \frac{1}{n^2}\sum_{i=1}^n \Sigma \\
&= \frac{1}{n}\Sigma.
\end{align*}
[/guided]
[/step]
[step:Divide the expected numerator by $n-1$]
Taking expectations in the identity from the first step and using the two expectation computations above gives
\begin{align*}
\mathbb E\left[\sum_{i=1}^n (X_i-\bar X)(X_i-\bar X)^\top\right]
&= \mathbb E\left[\sum_{i=1}^n Y_iY_i^\top\right]
-n\mathbb E[\bar Y\bar Y^\top] \\
&= n\Sigma-n\left(\frac{1}{n}\Sigma\right) \\
&= (n-1)\Sigma.
\end{align*}
Since
\begin{align*}
S=\frac{1}{n-1}\sum_{i=1}^n (X_i-\bar X)(X_i-\bar X)^\top
\end{align*}
and $n\ge 2$, division by $n-1$ is valid. Therefore
\begin{align*}
\mathbb E[S]
&= \frac{1}{n-1}
\mathbb E\left[\sum_{i=1}^n (X_i-\bar X)(X_i-\bar X)^\top\right] \\
&= \frac{1}{n-1}(n-1)\Sigma \\
&= \Sigma.
\end{align*}
This proves that the sample covariance matrix $S$ is an unbiased estimator of $\Sigma$.
[/step]