diff --git a/Appendix.tex b/Appendix.tex
index 364fc47..68b8d39 100644
--- a/Appendix.tex
+++ b/Appendix.tex
@@ -5,203 +5,203 @@
 
 \section{Herleitung: Gradient for Logistic Regression}%
 \label{sec:Herleitung: Gradient for Logistic Regression}
-\includegraphics[page=64,width=\textwidth]{Vorlesungen/02_LinearClassification.pdf}
+\includegraphics[page=64,width=.8\textwidth]{Vorlesungen/02_LinearClassification.pdf}
 
 \section{Herleitung: Multiclass Classification: Data log-likelihood}%
 \label{sec:Herleitung: Multiclass Classification: Data log-likelihood}
-\includegraphics[page=68,width=\textwidth]{Vorlesungen/02_LinearClassification.pdf}
+\includegraphics[page=68,width=.8\textwidth]{Vorlesungen/02_LinearClassification.pdf}
 
 \section{Herleitung: CART: Classification Tree}%
 \label{sec:Herleitung: CART: Classification Tree}
-\includegraphics[page=32,width=\textwidth]{Vorlesungen/04_TreesAndForests.pdf}
-\includegraphics[page=33,width=\textwidth]{Vorlesungen/04_TreesAndForests.pdf}
-\includegraphics[page=34,width=\textwidth]{Vorlesungen/04_TreesAndForests.pdf}
-\includegraphics[page=35,width=\textwidth]{Vorlesungen/04_TreesAndForests.pdf}
-\includegraphics[page=36,width=\textwidth]{Vorlesungen/04_TreesAndForests.pdf}
+\includegraphics[page=32,width=.8\textwidth]{Vorlesungen/04_TreesAndForests.pdf}\\
+\includegraphics[page=33,width=.8\textwidth]{Vorlesungen/04_TreesAndForests.pdf}\\
+\includegraphics[page=34,width=.8\textwidth]{Vorlesungen/04_TreesAndForests.pdf}\\
+\includegraphics[page=35,width=.8\textwidth]{Vorlesungen/04_TreesAndForests.pdf}\\
+\includegraphics[page=36,width=.8\textwidth]{Vorlesungen/04_TreesAndForests.pdf}
 
 \section{Herleitung: CART: Regression Tree}%
 \label{sec:Herleitung: CART: Regression Tree}
-\includegraphics[page=24,width=\textwidth]{Vorlesungen/04_TreesAndForests.pdf}
-\includegraphics[page=25,width=\textwidth]{Vorlesungen/04_TreesAndForests.pdf}
-\includegraphics[page=26,width=\textwidth]{Vorlesungen/04_TreesAndForests.pdf}
-\includegraphics[page=27,width=\textwidth]{Vorlesungen/04_TreesAndForests.pdf}
-\includegraphics[page=28,width=\textwidth]{Vorlesungen/04_TreesAndForests.pdf}
-\includegraphics[page=29,width=\textwidth]{Vorlesungen/04_TreesAndForests.pdf}
-\includegraphics[page=30,width=\textwidth]{Vorlesungen/04_TreesAndForests.pdf}
-\includegraphics[page=31,width=\textwidth]{Vorlesungen/04_TreesAndForests.pdf}
+\includegraphics[page=24,width=.8\textwidth]{Vorlesungen/04_TreesAndForests.pdf}\\
+\includegraphics[page=25,width=.8\textwidth]{Vorlesungen/04_TreesAndForests.pdf}\\
+\includegraphics[page=26,width=.8\textwidth]{Vorlesungen/04_TreesAndForests.pdf}\\
+\includegraphics[page=27,width=.8\textwidth]{Vorlesungen/04_TreesAndForests.pdf}\\
+\includegraphics[page=28,width=.8\textwidth]{Vorlesungen/04_TreesAndForests.pdf}\\
+\includegraphics[page=29,width=.8\textwidth]{Vorlesungen/04_TreesAndForests.pdf}\\
+\includegraphics[page=30,width=.8\textwidth]{Vorlesungen/04_TreesAndForests.pdf}\\
+\includegraphics[page=31,width=.8\textwidth]{Vorlesungen/04_TreesAndForests.pdf}
 
 \section{Herleitung: Soft Max-Margin: Hinge Loss}%
 \label{sec:Herleitung: Soft Max-Margin: Hinge Loss}
-\includegraphics[page=21,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
+\includegraphics[page=21,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}
 
 \section{Anwendungsbeispiele: \glstopshortpl{SVM}}%
 \label{sec:Anwendungsbeispiele: SVMs}
-\includegraphics[page=34,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
-\includegraphics[page=35,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
-\includegraphics[page=36,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
-\includegraphics[page=37,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
-\includegraphics[page=38,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
-\includegraphics[page=39,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
-\includegraphics[page=40,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
+\includegraphics[page=34,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}\\
+\includegraphics[page=35,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}\\
+\includegraphics[page=36,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}\\
+\includegraphics[page=37,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}\\
+\includegraphics[page=38,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}\\
+\includegraphics[page=39,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}\\
+\includegraphics[page=40,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}
 
 \section{Herleitung: SVMs with Kernels}%
 \label{sec:Herleitung: SVMs with Kernels}
-\includegraphics[page=52,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
-\includegraphics[page=53,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
-\includegraphics[page=54,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
-\includegraphics[page=55,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
-\includegraphics[page=56,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
+\includegraphics[page=52,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}\\
+\includegraphics[page=53,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}\\
+\includegraphics[page=54,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}\\
+\includegraphics[page=55,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}\\
+\includegraphics[page=56,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}
 
 \section{Beispiele: SVM: Model Selection}%
 \label{sec:Beispiele: SVM: Model Selection}
-\includegraphics[page=57,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
-\includegraphics[page=58,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
-\includegraphics[page=59,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
-\includegraphics[page=60,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
-\includegraphics[page=62,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
-\includegraphics[page=63,width=\textwidth]{Vorlesungen/06_SVMs.pdf}
+\includegraphics[page=57,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}\\
+\includegraphics[page=58,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}\\
+\includegraphics[page=59,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}\\
+\includegraphics[page=60,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}\\
+\includegraphics[page=62,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}\\
+\includegraphics[page=63,width=.8\textwidth]{Vorlesungen/06_SVMs.pdf}
 
 \section{Anwendungsbeispiel: Bayesian Learning: Regression}%
 \label{sec:Anwendungsbeispiel: Bayesian Learning: Regression}
-\includegraphics[page=18,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
-\includegraphics[page=19,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
-\includegraphics[page=20,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
+\includegraphics[page=18,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}\\
+\includegraphics[page=19,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}\\
+\includegraphics[page=20,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
 
 \section{Beweis: Gaussian Processes ist eine kernelized Bayesian Linear Regression}%
 \label{sec:Beweis: Gaussian Processes ist eine kernelized Bayesian Linear Regression}
-\includegraphics[page=41,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
-\includegraphics[page=42,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
-\includegraphics[page=43,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
-\includegraphics[page=44,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
-\includegraphics[page=45,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
+\includegraphics[page=41,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}\\
+\includegraphics[page=42,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}\\
+\includegraphics[page=43,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}\\
+\includegraphics[page=44,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}\\
+\includegraphics[page=45,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
 
 \section{Herleitung: Gaussian Processes: Posterior}%
 \label{sec:Herleitung: Gaussian Processes: Posterior}
-\includegraphics[page=38,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
+\includegraphics[page=38,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
 
 \section{Herleitung: Gaussian Processes: \nomsym{mean} und \nomsym{variance}}%
 \label{sec:Herleitung: Gaussian Processes: mean and variance}
-\includegraphics[page=39,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
+\includegraphics[page=39,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
 
 \section{Beispiel: Neural Network: XOR}%
 \label{sec:Beispiel: Neural Network: XOR}
-\includegraphics[page=25,width=\textwidth]{Vorlesungen/08_NeuralNets.pdf}
-\includegraphics[page=26,width=\textwidth]{Vorlesungen/08_NeuralNets.pdf}
+\includegraphics[page=25,width=.8\textwidth]{Vorlesungen/08_NeuralNets.pdf}\\
+\includegraphics[page=26,width=.8\textwidth]{Vorlesungen/08_NeuralNets.pdf}
 
 \section{Beispiel: Neural Networks: Feature Learning}%
 \label{sec:Beispiel: Neural Networks: Feature Learning}
-\includegraphics[page=35,width=\textwidth]{Vorlesungen/08_NeuralNets.pdf}
+\includegraphics[page=35,width=.8\textwidth]{Vorlesungen/08_NeuralNets.pdf}
 
 \section{Herleitung: Backpropagation in Matrix-Form}%
 \label{sec:Herleitung: Backpropagation in Matrix-Form}
-\includegraphics[page=52,width=\textwidth]{Vorlesungen/08_NeuralNets.pdf}
-\includegraphics[page=53,width=\textwidth]{Vorlesungen/08_NeuralNets.pdf}
+\includegraphics[page=52,width=.8\textwidth]{Vorlesungen/08_NeuralNets.pdf}\\
+\includegraphics[page=53,width=.8\textwidth]{Vorlesungen/08_NeuralNets.pdf}
 
 \section{Zusätzliche Informationen: Second Order Optimization}%
 \label{sec:Zusaetzliche Informationen: Second Order Optimization}
-\includegraphics[page=74,width=\textwidth]{Vorlesungen/08_NeuralNets.pdf}
-\includegraphics[page=75,width=\textwidth]{Vorlesungen/08_NeuralNets.pdf}
-\includegraphics[page=76,width=\textwidth]{Vorlesungen/08_NeuralNets.pdf}
+\includegraphics[page=74,width=.8\textwidth]{Vorlesungen/08_NeuralNets.pdf}\\
+\includegraphics[page=75,width=.8\textwidth]{Vorlesungen/08_NeuralNets.pdf}\\
+\includegraphics[page=76,width=.8\textwidth]{Vorlesungen/08_NeuralNets.pdf}
 
 \section{Zusätzliche Informationen: MNIST Datensatz}%
 \label{sec:Zusaetzliche Informationen: MNIST Datensatz}
-\includegraphics[page=82,width=\textwidth]{Vorlesungen/08_NeuralNets.pdf}
+\includegraphics[page=82,width=.8\textwidth]{Vorlesungen/08_NeuralNets.pdf}
 
 \section{Anwendungsbeispiele für CNNs}%
 \label{sec:Anwendungsbeispiele fuer CNNs}
-\includegraphics[page=3,width=\textwidth]{Vorlesungen/09_CNNs+RNNs.pdf}
-\includegraphics[page=4,width=\textwidth]{Vorlesungen/09_CNNs+RNNs.pdf}
+\includegraphics[page=3,width=.8\textwidth]{Vorlesungen/09_CNNs+RNNs.pdf}\\
+\includegraphics[page=4,width=.8\textwidth]{Vorlesungen/09_CNNs+RNNs.pdf}
 
 \section{Beispiel: Convolutional Layer: Stride and Padding}%
 \label{sec:Beispiel: Convolutional Layer: Stride and Padding}
-\includegraphics[page=14,width=\textwidth]{Vorlesungen/09_CNNs+RNNs.pdf}
-\includegraphics[page=15,width=\textwidth]{Vorlesungen/09_CNNs+RNNs.pdf}
+\includegraphics[page=14,width=.8\textwidth]{Vorlesungen/09_CNNs+RNNs.pdf}\\
+\includegraphics[page=15,width=.8\textwidth]{Vorlesungen/09_CNNs+RNNs.pdf}
 
 \section{Herleitung: Dimensionality Reduction: Minimizing the Error}%
 \label{sec:Herleitung: Dimensionality Reduction: Minimizing the Error}
-\includegraphics[page=16,width=\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}
+\includegraphics[page=16,width=.8\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}
 
 \section{Herleitung: PCA: Maximierungsproblem in Matrix-Schreibweise}%
 \label{sec:Herleitung: PCA: Maximierungsproblem in Matrix-Schreibweise}
-\includegraphics[page=19,width=\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}
+\includegraphics[page=19,width=.8\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}
 
 \section{Anwendungsbeispiele: PCA}%
 \label{sec:Anwendungsbeispiele: PCA}
-\includegraphics[page=27,width=\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}
-\includegraphics[page=28,width=\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}
-\includegraphics[page=29,width=\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}
-\includegraphics[page=30,width=\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}
-\includegraphics[page=31,width=\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}
-\includegraphics[page=32,width=\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}
-\includegraphics[page=33,width=\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}
+\includegraphics[page=27,width=.8\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}\\
+\includegraphics[page=28,width=.8\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}\\
+\includegraphics[page=29,width=.8\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}\\
+\includegraphics[page=30,width=.8\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}\\
+\includegraphics[page=31,width=.8\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}\\
+\includegraphics[page=32,width=.8\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}\\
+\includegraphics[page=33,width=.8\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}
 
 \section{Beweis: K-Means Konvergenz}%
 \label{sec:Beweis: K-Means Konvergenz}
-\includegraphics[page=49,width=\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}
+\includegraphics[page=49,width=.8\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}
 
 \section{Formale Definition: Histrograms}%
 \label{sec:Formale Definition: Histrograms}
-\includegraphics[page=64,width=\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}
+\includegraphics[page=64,width=.8\textwidth]{Vorlesungen/10_DimensionalityReductionClustering.pdf}
 
 \section{Herleitung: Differenzierung des \glstopshortpl{GMM}}%
 \label{sec:Herleitung: Differenzierung des GMMs}
-\includegraphics[page=7,width=\textwidth]{Vorlesungen/11 - ExpectationMaximization.pdf}
+\includegraphics[page=7,width=.8\textwidth]{Vorlesungen/11 - ExpectationMaximization.pdf}
 
 \section{Herleitung: \glstopshort{EM}-Decomposition}%
 \label{sec:Herleitung: EM-Decomposition}
-\includegraphics[page=24,width=\textwidth]{Vorlesungen/11 - ExpectationMaximization.pdf}
+\includegraphics[page=24,width=.8\textwidth]{Vorlesungen/11 - ExpectationMaximization.pdf}
 
 \section{Herleitung: EM for GMMs: Maximization"~Step}%
 \label{sec:Herleitung: EM for GMMs: Maximization-Step}
-\includegraphics[page=16,width=\textwidth]{Vorlesungen/11 - ExpectationMaximization.pdf}
+\includegraphics[page=16,width=.8\textwidth]{Vorlesungen/11 - ExpectationMaximization.pdf}
 
 \section{Herleitung: EM for Dimensionality Reduction: Maximization"~Step}%
 \label{sec:Herleitung: EM for Dimensionality Reduction: Maximization-Step}
-\includegraphics[page=38,width=\textwidth]{Vorlesungen/11 - ExpectationMaximization.pdf}
+\includegraphics[page=38,width=.8\textwidth]{Vorlesungen/11 - ExpectationMaximization.pdf}
 
 \section{Herleitung: EM for Dimensionality Reduction: Maximization"~Step: Monte-Carlo Esitmation}%
 \label{sec:Herleitung: EM for Dimensionality Reduction: Maximization-Step: Monte-Carlo Esitmation}
-\includegraphics[page=40,width=\textwidth]{Vorlesungen/11 - ExpectationMaximization.pdf}
+\includegraphics[page=40,width=.8\textwidth]{Vorlesungen/11 - ExpectationMaximization.pdf}
 
 \section{Herleitung: Variational Bayes: Maximierung des Marginal Log"~Likelihood}%
 \label{sec:Herleitung: Variational Bayes: Maximierung des Marginal Log-Likelihood}
-\includegraphics[page=13,width=\textwidth]{Vorlesungen/12 - VaraitionalAutoEncoders.pdf}
-\includegraphics[page=14,width=\textwidth]{Vorlesungen/12 - VaraitionalAutoEncoders.pdf}
+\includegraphics[page=13,width=.8\textwidth]{Vorlesungen/12 - VaraitionalAutoEncoders.pdf}\\
+\includegraphics[page=14,width=.8\textwidth]{Vorlesungen/12 - VaraitionalAutoEncoders.pdf}
 
 \section{Reparameterization Trick}%
 \label{sec:Reparameterization Trick}
-\includegraphics[page=19,width=\textwidth]{Vorlesungen/12 - VaraitionalAutoEncoders.pdf}
-\includegraphics[page=20,width=\textwidth]{Vorlesungen/12 - VaraitionalAutoEncoders.pdf}
+\includegraphics[page=19,width=.8\textwidth]{Vorlesungen/12 - VaraitionalAutoEncoders.pdf}\\
+\includegraphics[page=20,width=.8\textwidth]{Vorlesungen/12 - VaraitionalAutoEncoders.pdf}
 
 \section{Zusätzliche Informationen: Optimization over the variational distribution}%
 \label{sec:Zusaetzliche Informationen: Optimization over the variational distribution}
-\includegraphics[page=21,width=\textwidth]{Vorlesungen/12 - VaraitionalAutoEncoders.pdf}
+\includegraphics[page=21,width=.8\textwidth]{Vorlesungen/12 - VaraitionalAutoEncoders.pdf}
 
 \section{Zusätzliche Informationen: MLE: conditional log-likelihood}%
 \label{sec:Zusaetzliche Informationen: MLE: conditional log-likelihood}
-\includegraphics[page=21,width=\textwidth]{Vorlesungen/02_LinearClassification.pdf}
-\includegraphics[page=22,width=\textwidth]{Vorlesungen/02_LinearClassification.pdf}
+\includegraphics[page=21,width=.8\textwidth]{Vorlesungen/02_LinearClassification.pdf}\\
+\includegraphics[page=22,width=.8\textwidth]{Vorlesungen/02_LinearClassification.pdf}
 
 \section{Beweis für die positive Definitheit des Gaussian Kernels}%
 \label{sec:Beweis fuer die positive Definitheit des Gaussian Kernels}
-\includegraphics[page=14,width=\textwidth]{Vorlesungen/05_KernelMethods.pdf}
-\includegraphics[page=15,width=\textwidth]{Vorlesungen/05_KernelMethods.pdf}
+\includegraphics[page=14,width=.8\textwidth]{Vorlesungen/05_KernelMethods.pdf}\\
+\includegraphics[page=15,width=.8\textwidth]{Vorlesungen/05_KernelMethods.pdf}
 
 \section{Beispiele für die Optimierung von Hyper-Parametern eines Gaussian Kernels}%
 \label{sec:Beispiele fuer die Optimierung von Hyper-Parametern eines Gaussian Kernels}
-\includegraphics[page=53,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
-\includegraphics[page=54,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
-\includegraphics[page=55,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
-\includegraphics[page=56,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
-\includegraphics[page=57,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
+\includegraphics[page=53,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}\\
+\includegraphics[page=54,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}\\
+\includegraphics[page=55,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}\\
+\includegraphics[page=56,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}\\
+\includegraphics[page=57,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
 
 \section{Herleitung: Gaussian Bayes Rules}%
 \label{sec:Herleitung: Gaussian Bayes Rules}
-\includegraphics[page=26,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
-\includegraphics[page=27,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
+\includegraphics[page=26,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}\\
+\includegraphics[page=27,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
 
 \section{Herleitung: Gaussian Propagation}%
 \label{sec:Herleitung: Gaussian Propagation}
-\includegraphics[page=29,width=\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
+\includegraphics[page=29,width=.8\textwidth]{Vorlesungen/07_BayesianLearning.pdf}
 
 
diff --git a/ML_Zusammenfassung.tex b/ML_Zusammenfassung.tex
index d886037..95ba831 100644
--- a/ML_Zusammenfassung.tex
+++ b/ML_Zusammenfassung.tex
@@ -128,5 +128,7 @@
     \bibliographystyle{IEEEtran-de}
     \bibliography{Bibliography.bib}
 
+    \pagenumbering{arabic}
+    \renewcommand*{\thepage}{A-\arabic{page}}
     \include{Appendix.tex}
 \end{document}
diff --git a/Readme.md b/Readme.md
index b7df512..7546b12 100644
--- a/Readme.md
+++ b/Readme.md
@@ -5,8 +5,8 @@
 - [x] für alle \nameref prüfen, ob eine richtige Referenz nachfolgen sollte.
 - [x] Folien aus der Vorlesung, auf die in der Zusammenfassung verwiesen werden einfach in den Anhang packen
 - [x] babel shortcuts fixen
-- [ ] Teilnummer in Anhang entfernen
-- [ ] Seitenumbrüche optimieren
+- [x] Teilnummer in Anhang entfernen
+- [x] Seitenumbrüche optimieren
 
 ## Notice
 Requires you to enable [--shell escape](https://tex.stackexchange.com/questions/516604/how-to-enable-shell-escape-or-write18-visual-studio-code-latex-workshop)
diff --git a/Style.tex b/Style.tex
index 16a51fc..43133a9 100644
--- a/Style.tex
+++ b/Style.tex
@@ -10,7 +10,7 @@
 \pagestyle{fancy}
 \fancyhf{}
 \chead{\textbf{Zusammenfassung \MODULE \\\vspace{1mm}}}
-\lhead{\partname~\thepart}
+\lhead{}
 \rhead{\leftmark}
 \lfoot{\AUTHOR~|~\DATE}
 \rfoot{\thepage}
diff --git a/chapters/Classical_Supervised_Learning/Linear_Classification.tex b/chapters/Classical_Supervised_Learning/Linear_Classification.tex
index 417972e..15e05f2 100644
--- a/chapters/Classical_Supervised_Learning/Linear_Classification.tex
+++ b/chapters/Classical_Supervised_Learning/Linear_Classification.tex
@@ -212,13 +212,13 @@ Dies stellt eine Approximation des tatsächlich erwarteten Verlustes nach dem Pr
 
 \subsection{\texorpdfstring{\glsxtrfull{SDG}}{\glsfmtfull{SDG}}}%
 \label{sub:SDG}
-\begin{wrapfigure}{r}{.5\textwidth}
+\begin{wrapfigure}{r}{.4\textwidth}
     \vspace*{-15mm}
     \centering
     \includegraphics[width=0.8\linewidth]{batch_vs_stochastic_gradient_descent.png}
     \caption{Batch vs. Stochastic Gradient Descent}
     \label{fig:batch_vs_stochastic_gradient_descent}
-    \vspace*{-20mm}
+    \vspace*{-10mm}
 \end{wrapfigure}
 Um die Loss Function nicht für alle Datenpunkte evaluieren zu müssen wird beim \gls{SDG} lediglich der Verlust an einem einzelnen, zufällig gewählten Punkt ermittelt
 \begin{equation} \label{eq:stochastic_gradient_descent}
diff --git a/chapters/Classical_Supervised_Learning/Trees_and_Forests.tex b/chapters/Classical_Supervised_Learning/Trees_and_Forests.tex
index ccfa2eb..8cd7fe0 100644
--- a/chapters/Classical_Supervised_Learning/Trees_and_Forests.tex
+++ b/chapters/Classical_Supervised_Learning/Trees_and_Forests.tex
@@ -82,6 +82,7 @@ welchen Anteil die Klasse $k$ auf der linken Seite des Splits hat.
 \label{ssub:Classification Tree}
 \includegraphics[width=.6\textwidth]{classification_tree.png}\\
 (Herleitung: \cref{sec:Herleitung: CART: Classification Tree})
+\clearpage
 
 \subsubsection{Regression Tree}%
 \label{ssub:Regression Tree}
diff --git a/chapters/Mathematische_Grundlagen/Constraint_Optimization.tex b/chapters/Mathematische_Grundlagen/Constraint_Optimization.tex
index 74ce145..84dc2f8 100644
--- a/chapters/Mathematische_Grundlagen/Constraint_Optimization.tex
+++ b/chapters/Mathematische_Grundlagen/Constraint_Optimization.tex
@@ -33,6 +33,8 @@ Man spricht hierbei dann von einem Dual Optimization Problem
     \bm\lambda^*=\argmax_{\bm\lambda} g(\bm\lambda), g(\bm\lambda)= \min_{\bm x}L(\bm x,\bm\lambda)
 \end{equation}
 Hieraus ergibt sich der folgende Ablauf für die Lagrangian Optimization
+\pagebreak
+
 \begin{mybox}
    \textbf{\large Lagrangian Optimization}\\
    \begin{enumerate}
diff --git a/chapters/Mathematische_Grundlagen/Gaussian_Identities.tex b/chapters/Mathematische_Grundlagen/Gaussian_Identities.tex
index 54de320..4800692 100644
--- a/chapters/Mathematische_Grundlagen/Gaussian_Identities.tex
+++ b/chapters/Mathematische_Grundlagen/Gaussian_Identities.tex
@@ -73,7 +73,7 @@ Gegeben: Marginal (\cref{eq:marginal_gaussian_distribution}) und Conditional (\c
 \section{Gaussian Propagation}%
 \label{sec:Gaussian Propagation}
 Mit den Marginal und Conditional aus \cref{eq:marginal_gaussian_distribution} und \cref{eq:conditional_gaussian_distribution} ist es möglich den Conditional $p(\bm y)$ zu ermitteln:\\
-({\color{red}Herleitung Vorlesung 07 Folie 31})
+(Herleitung: \cref{sec:Herleitung: Gaussian Propagation})
 \begin{itemize}
     \item Mean: \tabto{2.2cm}$\bm\mu_{\bm y} = \bm F\bm\mu_{\bm x}$
     \item Covariance:\tabto{2.2cm} $\nomeq{covariance}_{\bm y} = \sigma_{\bm y}^2\nomeq{identity_matrix} + \bm F\nomeq{covariance}_{\bm x}\bm F^T$
diff --git a/chapters/Mathematische_Grundlagen/Kernel_Basics.tex b/chapters/Mathematische_Grundlagen/Kernel_Basics.tex
index fa93fb0..a8c655c 100644
--- a/chapters/Mathematische_Grundlagen/Kernel_Basics.tex
+++ b/chapters/Mathematische_Grundlagen/Kernel_Basics.tex
@@ -75,7 +75,7 @@ und ist die am häufigsten genutzte Kernel Methode
 \begin{equation} \label{eq:gaussian_kernel}
     \nomeq{kernel_function}(\bm x,\bm y) = \exp\left(-\frac{\|\bm x - \bm y\|^2}{2\nomeq{variance}}\right)
 \end{equation}
-{\color{red}Beweis für die positive Definitheit in Vorlesung 04 Seite 14 f.}
+(Beweis für die positive Definitheit in \cref{sec:Beweis fuer die positive Definitheit des Gaussian Kernels})
 
 \section{Kernel Trick}%
 \label{sec:Kernel Trick}
diff --git a/chapters/Neural_Networks/Basics.tex b/chapters/Neural_Networks/Basics.tex
index 1839bca..580646a 100644
--- a/chapters/Neural_Networks/Basics.tex
+++ b/chapters/Neural_Networks/Basics.tex
@@ -57,7 +57,6 @@ ergibt sich durch:
     \bm y = \nomeq{activation_function}(\bm W\bm x + \bm b)
 \end{equation}
 \begin{wrapfigure}{r}{.3\textwidth}
-    \vspace*{-8mm}
     \centering
     \includegraphics[width=0.8\linewidth]{feedforward_neural_network_composition.png}
     \caption{Feedforward Neural Network mit Funktionen}
@@ -92,15 +91,15 @@ ab welchem Schwellwert das Produkt aus Eingangswerten und Gewichten zu relevante
 In den meisten Fällen wird die \glsxtrshort{ReLU} \noms{activation_function} verwendet,
 wobei es sich auch lohnt, die Leaky \glsxtrshort{ReLU} oder \glsxtrshort{ELU} auszubrobieren.
 Die Sigmoid Funktion (\cref{ssub:Logistic sigmoid function}) sollte ausschließlich als \noms{activation_function} in Klassifikationsproblemen verwendet werden.\\
-\includegraphics[scale=.7]{sigmoid_activation_function.png}\\
+\includegraphics[scale=.6]{sigmoid_activation_function.png}\\
 \hrule{\textwidth,1mm}
-\includegraphics[scale=.7]{tanh_activation_function.png}\\
+\includegraphics[scale=.6]{tanh_activation_function.png}\\
 \hrule{\textwidth,1mm}
-\includegraphics[scale=.7]{ReLU_activation_function.png}\\
+\includegraphics[scale=.6]{ReLU_activation_function.png}\\
 \hrule{\textwidth,1mm}
-\includegraphics[scale=.7]{Leaky_ReLU_activation_function.png}\\
+\includegraphics[scale=.6]{Leaky_ReLU_activation_function.png}\\
 \hrule{\textwidth,1mm}
-\includegraphics[scale=.7]{exponential_linear_units_activation_function.png}\\
+\includegraphics[scale=.6]{exponential_linear_units_activation_function.png}\\
 
 
 \section{Optimization}%
diff --git a/chapters/Neural_Networks/Gradient_Descent.tex b/chapters/Neural_Networks/Gradient_Descent.tex
index 102c06f..8581951 100644
--- a/chapters/Neural_Networks/Gradient_Descent.tex
+++ b/chapters/Neural_Networks/Gradient_Descent.tex
@@ -39,6 +39,7 @@ Hier berechnet sich der Loss durch
     \mathcal L &= \frac{1}{2}(y-t)^2
 \end{align}
 Für dieses Neural Network ist die Backpropagation dann
+
 \begin{alignat}{5} \label{eq:backward_pass}
     \frac{\partial \mathcal L}{\partial y} &= y - t && 
                                            &&=\overline{y}\\
@@ -163,17 +164,17 @@ bei denen die Lernrate abhängig von der Anzahl der Durchläufe des \nameref{cha
     \begin{tabularx}{\textwidth}{X|Y|Y}
         \bfseries\centering Verfahren & \bfseries Learning Rate & \bfseries Training Loss\\
         \hline
-        \textbf{Step:} Lernrate verändert sich nach einer bestimmten Anzahl von Algorithmus-Durchläufen & & \includegraphics[width=\linewidth,align=c]{learning_rate_decay_step.png}\\
+        \textbf{Step:} Lernrate verändert sich nach einer bestimmten Anzahl von Algorithmus-Durchläufen & & \includegraphics[width=.8\linewidth,align=c]{learning_rate_decay_step.png}\\
         \hline
         \textbf{Cosine:}$\alpha_t = \frac{1}{2}\alpha_0(1+\cos(\frac{t\pi}{T}))$ &
-        \includegraphics[width=\linewidth,align=c]{learning_rate_decay_cosine_learning_rate.png} &
-        \includegraphics[width=\linewidth,align=c]{learning_rate_decay_cosine_training_loss.png} \\
+        \includegraphics[width=.8\linewidth,align=c]{learning_rate_decay_cosine_learning_rate.png} &
+        \includegraphics[width=.8\linewidth,align=c]{learning_rate_decay_cosine_training_loss.png} \\
         \hline
         \textbf{Linear:}$\alpha_t = \alpha_0(1-\frac{t}{T})$ &
-        \includegraphics[width=\linewidth,align=c]{learning_rate_decay_linear_learning_rate.png} & \\
+        \includegraphics[width=.8\linewidth,align=c]{learning_rate_decay_linear_learning_rate.png} & \\
         \hline
         \textbf{Inverse sqrt:}$\alpha_t = \frac{\alpha_0}{\sqrt{t}}$ &
-        \includegraphics[width=\linewidth,align=c]{learning_rate_decay_inverse_sqrt.png} & \\
+        \includegraphics[width=.8\linewidth,align=c]{learning_rate_decay_inverse_sqrt.png} & \\
     \end{tabularx}
     ($\alpha_0$: inital learning rate, $\alpha_t$: learning rate at epoch $t$, $T$: total number of epochs) 
 \end{table}