forked from TH_General/Template_Summary
Vorlesung 2 abgeschlossen
This commit is contained in:
parent
33ca9c486e
commit
61ea298cd7
@ -114,6 +114,9 @@ rightsub = \grq%
|
||||
\usepackage{amsmath}
|
||||
\DeclareMathOperator*{\argmax}{arg\,max}
|
||||
\DeclareMathOperator*{\argmin}{arg\,min}
|
||||
\DeclareMathOperator*{\lik}{lik}
|
||||
\DeclareMathOperator*{\loss}{loss}
|
||||
\DeclareMathOperator*{\loglik}{loglik}
|
||||
%special symbols
|
||||
\usepackage{fontawesome}
|
||||
\usepackage{amssymb}
|
||||
|
@ -105,6 +105,12 @@ Auf der Basis dieser Funktion lässt sich eine Verlustfunktion definieren:
|
||||
\begin{equation} \label{eq:sigmoid_loss_function}
|
||||
L(\bm{w}) = \sum_i (\nomeq{sigmoid}(f(\bm{x}_i)) - c_i)^2 = \sum_i (\nomeq{sigmoid}(\bm{w}^T\bm{x}_i + b) - c_i)^2
|
||||
\end{equation}
|
||||
Die \noms{sigmoid} hat zudem folgende Eigenschaften:
|
||||
\begin{itemize}
|
||||
\item \nomsym{sigmoid} ist begrenzt: $\nomeq{sigmoid}(a)\in(0,1)$
|
||||
\item \nomsym{sigmoid} ist symmetrisch: $1- \nomeq{sigmoid}(a) = \nomeq{sigmoid}(-a)$
|
||||
\item der Gradient kann einfach bestimmt werden: $\nomeq{sigmoid}'(a) = \frac{\exp(-a)}{(1+\exp(-a))^2} = \nomeq{sigmoid}(a)(1-\nomeq{sigmoid}(a))$
|
||||
\end{itemize}
|
||||
|
||||
\subsubsection{Log-Likelihood}%
|
||||
\label{ssub:Log-Likelihood}
|
||||
@ -231,4 +237,59 @@ aber auch zielgerichteter als beim \nameref{sub:SDG}.
|
||||
\frac{1}{b}\sum_i l(\bm{x}_i;\bm{\theta})\qquad \bm{\theta}_{t+1} = \bm{\theta}_t - \dfrac{\eta}{b}\sum_i \nabla_{\bm{\theta}} l(\bm{x}_i;\bm{\theta}_t)
|
||||
\end{equation}
|
||||
|
||||
WEITER AUF SEITE 166
|
||||
\subsection{Gradient for Logisitic Regression}%
|
||||
\label{sub:Gradient for Logisitic Regression}
|
||||
Die Loss Function für die Logistic Regression kann dank der Eigenschaften der \noms{sigmoid} (\cref{ssub:Logistic sigmoid function}) einfach aufgestellt werden:
|
||||
\begin{align} \label{eq:gradient_for_logistic_regression}
|
||||
\dfrac{\partial\text{loss}_i}{\partial\bm{w}}
|
||||
&= \dfrac{\partial}{\partial\bm{w}}\left(c_i\log\nomeq{sigmoid}(\bm{w}^T\bm{\phi}(\bm{x}_i)) + (1 - c_i) \log(1-\nomeq{sigmoid}(\bm{w}^T\bm{\phi}(\bm{x}_i)))\right)\\
|
||||
&= \dots\text{\color{red}siehe Vorlesung 02 Folie 65}\\
|
||||
&= (c_i - \nomeq{sigmoid}(\bm{w}^T\bm{\phi}(\bm{x}_i)))\phi(\bm{x}_i)
|
||||
\end{align}
|
||||
|
||||
\section{Multiclass Classification}%
|
||||
\label{sec:Multiclass Classification}
|
||||
\begin{wrapfigure}{r}{.4\textwidth}
|
||||
\vspace*{-10mm}
|
||||
\centering
|
||||
\includegraphics[width=0.8\linewidth]{multiclass_linear_classification.png}
|
||||
\caption{Multiclass Linear Classification}
|
||||
\label{fig:multiclass_linear_classification}
|
||||
\vspace*{-10mm}
|
||||
\end{wrapfigure}
|
||||
Mithilfe der Linearen Klassifikation können auch mehr als zwei Klassen klassifiziert werden.
|
||||
Hierfür wird die Softmax Likelihood Function verwendet:
|
||||
\begin{equation} \label{eq:softmax_likelihood_function}
|
||||
p(c=c_i|\bm{x}) = \frac{\exp(\bm{w}_i^T\bm{\phi}{x})}{\sum_{k=1}^K \exp(\bm{w}_k^T\bm{\phi}(\bm{x}))}
|
||||
\end{equation}
|
||||
Hiebei erhält jede Klasse einen eigenen Gewichtsvektor.
|
||||
Die Klassifizierung erfolgt,
|
||||
indem geguckt wird, für welche klasse $c_i$ $\bm{w}_i^T\bm{\phi}(\bm{x})$ am höchsten ist.
|
||||
|
||||
Das Problem kann hierbei mithilfe des 1-Hot-Encodings als eine Conditional Multinomial Distribution aufgestellt werden (\cref{sub:Multinomial/Categorical Dristribution}):
|
||||
\begin{align} \label{eq:multiclass_classification_distribution}
|
||||
p(c|\bm{x}) &= \prod_{k=1}^K p(c=k|\bm{x})^{\bm{h}_{c,k}}\\
|
||||
&= \prod_{k=1}^K \left(\frac{\exp(\bm{w}_k^T\bm{\phi}(\bm{x}_i))}{\sum_{k'=1}^K \exp(\bm{w}_{k'}^T\bm{\phi}(\bm{x}_i))} \right)^{\bm{h}_{c,k}}\\
|
||||
\end{align}
|
||||
|
||||
\paragraph{Data log-likelihood}%
|
||||
\label{par:multiclass_classification:Data log-likelihood}
|
||||
\begin{align} \label{eq:multiclass_classification:Data log-likelihood}
|
||||
\loglik(\mathcal{D},\bm{w}_{1:K}) &= \sum_{i=1}^N\log p(c_i|\bm{x}_i)\\
|
||||
&= \dots \text{ \color{red}siehe Vorlesung 02 Folie 69}\\
|
||||
&= \sum_{i=1}^N\sum_{k=1}^K\bm{h}_{c_i,k}\bm{w}_k^T\bm{\phi}(\bm{x}_i)
|
||||
- \underbrace{\log\left( \sum_{j=1}^K\exp(\bm{w}_j^T\bm{\phi}(\bm{x}_i))\right)}_\text{independent from $k$} \underbrace{\sum_k\bm{h}_{c_i,k}}_{=1}
|
||||
\end{align}
|
||||
|
||||
\paragraph{Gradient}%
|
||||
\label{par:multiclass_classification:Gradient}
|
||||
\begin{alignat*}{2} \label{eq:multiclass_classification:Gradient}
|
||||
&\frac{\partial\loss_i}{\partial\bm{w}_k}
|
||||
&&= \frac{\partial}{\partial \bm{w}_k}\left(\sum_{k=1}^K\bm{h}_{c_i,k}\bm{w}_k^T\bm{\phi}(\bm{x}_i) - \log\left( \sum_{j=1}^K\exp(\bm{w}_j^T\bm{\phi}(\bm{x}_i))\right) \right)\\
|
||||
& &&= \dots\text{ \color{red} siehe Übung 2 Aufgabe 1.1 }\\
|
||||
& &&= \bm h_{c_i,k} \bm\phi(\bm x_i) - \dfrac{\exp(\bm w_k^T\bm \phi(\bm x_i))}{\sum_j^K \exp(\bm w^T_j \bm \phi(\bm x_i))} \bm \phi(\bm x_i)\\
|
||||
\Leftrightarrow&\frac{\partial\loss_i}{\partial\bm{w}}
|
||||
&&= - \sum_i^N \left( \bm h_{i,k} \bm \phi(\bm x_i) - \dfrac{\exp(\bm w_k^T\bm \phi(\bm x_i))}{\sum_j^K \exp(\bm w^T_j \bm \phi(\bm x_i))} \bm \phi(\bm x_i) \right) \\
|
||||
& &&= -\sum_i^N \bm \phi(\bm x_i) \left( h_{i,k} - p_{i,k} \right)
|
||||
\end{alignat*}
|
||||
|
||||
|
BIN
images/multiclass_linear_classification.png
Normal file
BIN
images/multiclass_linear_classification.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 78 KiB |
Loading…
x
Reference in New Issue
Block a user