diff --git a/exercise.ipynb b/exercise.ipynb index 60b0111..3b23f51 100644 --- a/exercise.ipynb +++ b/exercise.ipynb @@ -663,15 +663,25 @@ "source": [ "#### Solution\n", "\\begin{align}\n", - " p(\\boldsymbol{x_i}) &= \\mathcal{N}\\left(\\boldsymbol{x_i} | \\boldsymbol{\\mu}, \\boldsymbol{\\Sigma} \\right)\\\\\n", + " p(\\boldsymbol{x_i}) \n", + " &= \\mathcal{N}\\left(\\boldsymbol{x_i} | \\boldsymbol{\\mu}, \\boldsymbol{\\Sigma} \\right)\\\\\n", " &= \\dfrac{1}{\\sqrt{\\det \\left(2 \\pi \\boldsymbol{\\Sigma}\\right)}} \\exp\\left( - \\dfrac{(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})}{2}\\right)\n", + " &= \\dfrac{1}{\\sqrt{(2 \\pi)^{K} |\\boldsymbol{\\Sigma}}|} \\exp\\left( - \\dfrac{(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})}{2}\\right)\\\\\n", "\\end{align}\n", "\n", + "With K as the dimensions of the quadratic matrix $\\Sigma$\n", + "\n", "##### calculate the likelihood\n", "\\begin{align}\n", - " \\text{lik}(\\boldsymbol{\\theta};D) &= \\prod_i p(x_i)\\\\\n", - " &= \\prod_i \\dfrac{1}{\\sqrt{\\det \\left(2 \\pi \\boldsymbol{\\Sigma}\\right)}} \\exp\\left( - \\dfrac{(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})}{2}\\right)\\\\\n", - " \\log\\text{lik}(\\boldsymbol{\\theta};D) &= \\sum_i \\log p(x_i)\\\\\n", + " \\text{lik}(\\boldsymbol{\\theta};D) \n", + " &= \\prod_i p(x_i)\\\\\n", + " &= \\prod_i^{N} \\dfrac{1}{\\sqrt{(2 \\pi)^{K} |\\boldsymbol{\\Sigma}}|} \\exp\\left( - \\dfrac{(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})}{2}\\right)\\\\\n", + " &= \\prod_i^{N} 2\\pi^{-\\frac{K}{2}} |\\boldsymbol{\\Sigma}|^{-\\frac{1}{2}} \\exp\\left( - \\dfrac{(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})}{2}\\right)\\\\\n", + " &= (2\\pi)^{-\\frac{NK}{2}} |\\Sigma|^{-\\frac{N}{2}} \\exp\\left( - \\dfrac{\\sum_i^{N}(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})}{2}\\right)\\\\\\\\\n", + " \\log\\text{lik}(\\boldsymbol{\\theta};D) \n", + " &= \\sum_i \\log p(x_i)\\\\\n", + " &= \\log(-\\frac{NK}{2}(2\\pi))-\\frac{N}{2}\\log(|\\Sigma|) - \\frac{1}{2} \\sum_i^{N}(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu}) \\\\\n", + " &= \\text{DELTE HERE AFTER}\\\\\n", " &= \\sum_i \\log(1) - \\log(\\sqrt{\\det(2\\pi\\Sigma)})- \\dfrac{(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})}{2}\\\\\n", " &= \\sum_i^N - \\frac{1}{2}\\log(\\det(2\\pi\\Sigma))- \\dfrac{(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})}{2}\\\\\n", " &= - \\frac{N}{2}\\log(\\det(2\\pi\\Sigma))- \\dfrac{1}{2}\\sum_i^N (\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})\\\\\n", @@ -685,6 +695,23 @@ "##### derive $\\mu$" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\\begin{align}\n", + " \\frac{\\partial \\log\\text{lik}(\\boldsymbol{\\theta};D)}{\\partial\\mu}\n", + " &= \\log(-\\frac{NK}{2}(2\\pi))-\\frac{N}{2}\\log(|\\Sigma|) - \\frac{1}{2} \\sum_i^{N}(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})\\\\\n", + " &= \\text{MYYY SOLLLTUIONNNNNN} \\\\\n", + " &= \\frac{\\partial}{\\partial\\mu}-\\dfrac{1}{2}\\sum_i^N (\\boldsymbol{x_i}^T-\\boldsymbol{\\mu}^T) \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})\\\\\n", + " &= \\frac{\\partial}{\\partial\\mu}-\\dfrac{1}{2}\\sum_i^N \\boldsymbol{x_i}^T\\boldsymbol{\\Sigma}^{-1}\\boldsymbol{x_i} - \\boldsymbol{x_i}^T\\boldsymbol{\\Sigma}^{-1}\\boldsymbol{\\mu} - \\boldsymbol{\\mu}^T\\boldsymbol{\\Sigma}^{-1}\\boldsymbol{x_i} + \\boldsymbol{\\mu}^T\\boldsymbol{\\Sigma}^{-1}\\boldsymbol{\\mu}\\\\\n", + " &= \\frac{\\partial}{\\partial\\mu}-\\dfrac{1}{2}\\sum_i^N \\boldsymbol{x_i}^T\\boldsymbol{\\Sigma}^{-1}\\boldsymbol{x_i} -2\\boldsymbol{x_i}^T\\boldsymbol{\\Sigma}^{-1}\\boldsymbol{\\mu}+ \\boldsymbol{\\mu}^T\\boldsymbol{\\Sigma}^{-1}\\boldsymbol{\\mu}\\\\\n", + " &= -\\dfrac{1}{2}\\sum_i^N -2\\boldsymbol{x_i}^T\\boldsymbol{\\Sigma}^{-1} + 2\\boldsymbol{\\Sigma}^{-1}\\boldsymbol{\\mu}\\\\\n", + " &= -\\sum_i^N -\\boldsymbol{x_i}^T\\boldsymbol{\\Sigma}^{-1} + \\boldsymbol{\\Sigma}^{-1}\\boldsymbol{\\mu}\\\\\n", + " &= -\\boldsymbol{\\Sigma}^{-1}\\sum_i^N -\\boldsymbol{x_i}^T + \\boldsymbol{\\mu}\\\\\n", + "\\end{align}" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -726,8 +753,34 @@ "metadata": {}, "source": [ "\\begin{align}\n", - " \\frac{\\partial \\log\\text{lik}(\\boldsymbol{\\theta};D)}{\\partial \\boldsymbol{\\Sigma}} &=\n", - " \\frac{\\partial}{\\partial \\boldsymbol{\\Sigma}} \\left(-\\frac{N}{2}\\log(\\det(2\\pi\\Sigma))- \\dfrac{1}{2}\\sum_i^N (\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})\\right)\\\\\n", + "\\frac{\\partial \\log\\text{lik}(\\boldsymbol{\\theta};D)}{\\partial \\boldsymbol{\\Sigma}} &= \\frac{\\partial}{\\partial \\boldsymbol{\\Sigma}} \\log(-\\frac{NK}{2}(2\\pi))-\\frac{N}{2}\\log(|\\Sigma|) - \\frac{1}{2} \\sum_i^{N}(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu}) \\\\\n", + "&= \\text{MMMYYY SOLUTTION} \\\\\n", + "&= -\\frac{N}{2}\\frac{\\partial}{\\partial \\boldsymbol{\\Sigma}}\\log(|\\Sigma|) - \\dfrac{1}{2}\\sum_i^N \\frac{\\partial}{\\partial \\boldsymbol{\\Sigma}}(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})\\\\\n", + "\\end{align}\n", + "Because $(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})$ is scalar, we can take its trace and obtain the following form \n", + "\\begin{align}\n", + " &= -\\frac{N}{2}(\\Sigma)^{-1} - \\dfrac{1}{2}\\sum_i^N \\frac{\\partial}{\\partial \\boldsymbol{\\Sigma}} \\text{Tr}\\left((\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})\\right)\\\\\n", + "\\end{align}\n", + "The rule on page 10 (formula 63) of the matrix cookbook allows us to derive the trace\n", + "\\begin{align}\n", + " &= -\\frac{N}{2}(\\Sigma)^{-1} - \\dfrac{1}{2}\\sum_i^N -\\left(\\boldsymbol{\\Sigma}^{-1}(\\boldsymbol{x_i}-\\boldsymbol{\\mu})(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1}\\right)^T\\\\\n", + " &= -\\frac{N}{2}(\\Sigma)^{-1} - \\dfrac{1}{2}\\sum_i^N -\\left(\\boldsymbol{\\Sigma}^{-1}(\\boldsymbol{x_i}-\\boldsymbol{\\mu})(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1}\\right)^T = 0 \\\\\n", + " &= -N \\Sigma^{-1} + \\Sigma^{-1}(\\sum_i^{N}(\\boldsymbol{x_i}-\\boldsymbol{\\mu})(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T) \\Sigma^{-1} = 0\\\\\n", + "\\end{align}\n", + "Multiply from both sides with $\\Sigma$\n", + "\\begin{align}\n", + "-\\Sigma N + \\sum_i^{N}(\\boldsymbol{x_i}-\\boldsymbol{\\mu})(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T &= 0 \\\\\n", + "\\Sigma = \\frac{1}{N} \\sum_i^{N}(\\boldsymbol{x_i}-\\boldsymbol{\\mu})(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T\n", + "\\end{align}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\\begin{align}\n", + " \\frac{\\partial \\log\\text{lik}(\\boldsymbol{\\theta};D)}{\\partial \\boldsymbol{\\Sigma}} \n", + " &= \\frac{\\partial}{\\partial \\boldsymbol{\\Sigma}} \\left(-\\frac{N}{2}\\log(\\det(2\\pi\\Sigma))- \\dfrac{1}{2}\\sum_i^N (\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})\\right)\\\\\n", " &= -\\frac{\\partial}{\\partial \\boldsymbol{\\Sigma}}\\frac{N}{2}\\log(\\det(2\\pi\\Sigma)) - \\frac{\\partial}{\\partial \\boldsymbol{\\Sigma}}\\dfrac{1}{2}\\sum_i^N (\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})\\\\\n", " &= -\\frac{N}{2}(2\\pi\\Sigma)^{-1} - \\frac{\\partial}{\\partial \\boldsymbol{\\Sigma}}\\dfrac{1}{2}\\sum_i^N (\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})\\\\\n", " &= -\\frac{N}{2}(2\\pi\\Sigma)^{-1} - \\dfrac{1}{2}\\sum_i^N \\frac{\\partial}{\\partial \\boldsymbol{\\Sigma}}(\\boldsymbol{x_i}-\\boldsymbol{\\mu})^T \\boldsymbol{\\Sigma}^{-1} (\\boldsymbol{x_i}-\\boldsymbol{\\mu})\\\\\n", @@ -1202,7 +1255,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.7.11" } }, "nbformat": 4,