\documentclass{beamer}
\usepackage{verbatim}
\usepackage{amsmath, array, amsthm, amsmath,mathrsfs}
% \usepackage{ulem}
\newcommand{\R}{\mathbb{R}}
\newcommand{\To}{\Rightarrow}
\newcommand{\del}{\nabla}
\newcommand{\ds}{\displaystyle}
\newcommand{\CC}{\mathbb{C}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\K}{\mathbb{K}}
\newcommand{\B}{\mathcal{B}}
\newcommand{\U}{\mathcal{U}}
\newcommand{\xnk}{x_{n_k}}
\newcommand{\C}{\mathcal{C}}
\renewcommand{\L}{\mathcal{L}}
\newcommand{\BigL}{\mathscr{L}}
\newcommand{\ben}{\begin{enumerate}}
\newcommand{\een}{\end{enumerate}}
\newcommand{\eps}{\varepsilon}
\renewcommand{\a}{\alpha}
\newcommand{\fa}{\forall}
\renewcommand{\l}{\ell}
\newcommand{\ex}{\exists}
\newcommand{\p}{\partial}
\newcommand{\<}{\langle}
\newcommand{\st}{\textrm{ s.t. }}
\renewcommand{\>}{\rangle}
\renewcommand{\subset}{\subseteq}
\newcommand{\wto}{\rightharpoonup}
\renewcommand{\phi}{\varphi}
\newcommand{\cont}{\Rightarrow \Leftarrow}
\newcommand{\back}{\backslash}
\newcommand{\norm}[1]{\left\|{#1}\right\|}
\newcommand{\abs}[1]{\left|{#1} \right|}
\newcommand{\ip}[1]{\langle{#1}\rangle}
\usepackage{hyperref}
\hypersetup{colorlinks,% 
citecolor=black,% 
filecolor=black,% 
linkcolor=red,% 
urlcolor=blue,% 
hyperindex=true,%
pdftex}
\usepackage[numbered,framed]{mcode}

% \usepackage{beamerthemesplit} // Activate for custom appearance

\title{Sensitivity Analysis, Automatic Differentiation, and Subset Selection}
\author{Adam Attarian}
\date{\today}

\begin{document}

\frame{\titlepage}

%\section[Outline]{}
%\frame{\tableofcontents}

%\section{Introduction}
%\subsection{Overview of the Beamer Class}

\begin{frame}[fragile]{Sensitivity Analysis}
Given a parameter dependent ODE,
$$
\dot x = f(t,x(t,q),q), \quad x \in \R^n, q \in \R^m
$$
the sensitivities are given by how the state changes wrt the parameters. Differentiating wrt $q$ we obtain
$$
\frac{d}{dt}\frac{\p x}{\p q_j} = \frac{\p f}{\p x}\frac{\p x}{\p q_j} + \frac{\p f}{\p q_j},
$$

which are the sensitivity equations. Computing the sensitivities requires solving $mn+n$ differential equations: $n$ from the original states and $mn$ sensitivities. 
\end{frame}

\begin{frame}[fragile]{Sensitivity Analysis}

\begin{itemize}
\item Sensitivity analysis is a local concept -- the model is evaluated at a specific, fixed parameter value.
\item You see how the model changes sensitivity to parameters over time -- this can be important.
\item The derivatives $\frac{\p x}{\p q_j}$ are solved for by the integrator, the other derivatives must be provided by some other method.
\item Could use finite difference, analytic solutions if you got'em, or automatic differentiation.
\end{itemize}
\end{frame}

\begin{frame}{Sensitivity Analysis}
Once the sensitivities are computed, we compute the \emph{relative ranking}, given by

$$
\norm{\frac{\p x_i}{\p q_j}}_2 = \bigg[ \frac{1}{t_f-t_0} \int_{t_0}^{t_f}\bigg(\frac{\p x_i}{\p q_j} \frac{q_j}{x_i}\bigg)^2 dt \bigg]^{1/2}.
$$
\begin{itemize}
\item Multiplying by $\ds{\frac{q_j}{x_i}}$ essentially non-dimensionalizes the results allowing comparison.
\item This allows a determination to be made as to which are they most sensitive parameters in a model. \item To numerically compute this, use a trapezoidal method. In Matlab, it's \texttt{trapz(X,Y)}. Do {\textcolor{red} NOT} use \texttt{trapz(Y)}. This assumes unit spacing and is \emph{bad}.
\end{itemize}
\end{frame}

\begin{frame}{Spring Example}
Consider the spring mass model that has been normalized with respect to mass,
$$
\ddot x + C \dot x + K x = 0.
$$
A change of variables gives us the system
\begin{align*}
\dot x_1 &= x_2 \\
\dot x_2 &= -Cx_2 -Kx_1
\end{align*}
Now we differentiate the system with respect to the parameters\dots
\end{frame}

\begin{frame}{Spring Sensitivities}
And then manually, by hand taking the partial derivatives we have
\begin{align*}
\frac{d}{dt} \frac{\p x_1}{\p C} &= \frac{\p x_2}{\p C} \\
\frac{d}{dt} \frac{\p x_1}{\p K} &= \frac{\p x_2}{\p K} \\
\frac{d}{dt} \frac{\p x_2}{\p C} &= -x_2 -C \frac{\p x_2}{\p C}- K \frac{\p x_1}{\p C} \\
\frac{d}{dt} \frac{\p x_2}{\p K} &= -C \frac{\p x_2}{\p K} - x_1 - K\frac{\p x_1}{\p K}.
\end{align*}
So you can see how for large compartment models with many states and parameters this is a long, tedious, error prone task. Better way: Automatic Differentiation.
\end{frame}

\begin{frame}{Automatic Differentiation (AD)}
\begin{itemize}
\item AD is essentially the application of the chain rule on an expression.
\item AD uses the product rule, quotient rule, it knows the derivative of $\sin$ is $\cos$, etc. A big table lookup.
\item AD breaks down a function into its basic constituent operations, and then differentiates. 
\item Not a numerical approximation to the derivative, but rather machine-precision exact.
\item Free AD for Matlab: myAD by Martin Fink. Available on the Matlab File Exchange \href{http://www.mathworks.com/matlabcentral/fileexchange/15235}{here.} 
\end{itemize}
\end{frame}

\begin{frame}{AD Example}
Suppose we want to take the derivative of a vector valued function, $f: \R^2 \to \R^2$ at the point $(1,1)$, where
$$
f(x_1,x_2)= \left[\begin{array}{c}2x_1 + x_2^2 \\x_1^3+x_1x_2\end{array}\right].
$$

The derivative is given by the Jacobian matrix:
$$
f'(x_1,x_2) = \left[\begin{array}{cc}\frac{\p f_1}{\p x_1} & \frac{\p f_1}{\p x_2} \\ \frac{\p f_2}{\p x_1} & \frac{\p f_2}{\p x_2}\end{array}\right] = \left[\begin{array}{cc}2 & 2x_2 \\3x_1^2+x_2 & x_1\end{array}\right] \bigg|_{(1,1)} = \left[\begin{array}{cc}2 & 2 \\4 & 1\end{array}\right].
$$

And to code this up in AD\dots
\end{frame}

\begin{frame}[fragile]{AD Example}
\begin{lstlisting}
function adex

x=[1;1];

bigX=myAD(x); % convert to a myAD variable
out=fun(bigX); % evaluate with the myAD variable

dfdx=getderivs(adOut); % get the jacobian

function f=fun(x)
x=[2*x(1)+x(2)^2;
	x(1)^3+x(1)*x(2)];

\end{lstlisting}

Running the code returns the result that we expect.
\end{frame}

\begin{frame}[fragile]{AD \& Sensitivities}
The book keeping on computing sensitivities on many states and parameters becomes a problem, so we wrote a code \texttt{tssolve.m} to do the hard parts. We'll apply \texttt{tssolve} to the spring model in a moment, but we first need to modify the spring ODE as follows.

\begin{lstlisting}
function dx=springode(t,x,q)

K=q(2);
C=q(1);

dx=[(q(1)./q(1)).*x(2);
    -K.*x(1)-C.*x(2)];
\end{lstlisting}

$\To$ each state needs to have a parameter in it, and myAD only operates on scalar operations (!)
\end{frame}

\begin{frame}[fragile]{AD \& Sensitivities}
\texttt{tssolve.m} is available from \href{http://www4.ncsu.edu/~arattari/tssolve.m}{here}, as is the \href{http://www4.ncsu.edu/~arattari/tssolveman.pdf}{manual} (read the manual or read the help!). To compute just relative sensitivities, here is how to use the code.

\begin{lstlisting}
q0=[4;5];
x0=[1 1];
t=linspace(0,5,100);
sens=tssolve(@springode,x0,q0,t,1);
\end{lstlisting}

\texttt{sens} is a Matlab structure, so it has lots of things in it. The important ones are
\begin{itemize}
\item \verb+sens.t+: the time vector
\item \verb+sens.relsens+: a 3D array containing the relative sensitivities
\end{itemize} 
\end{frame}

\begin{frame}[fragile]{AD \& Sensitivities}
RS.relsens is ordered in 3D arrays as (time, state, parameter), so the sensitivity of state one with parameter 5 would be relsens(:,1,5).

In the spring example, $q=[q_1,q_2]=[C,K]$, so we have
\begin{table}[htdp]
\begin{center}\begin{tabular}{cc}\vspace{2mm}
$\ds{\frac{\p x_1}{\p C}}$ & \texttt{sens.relsens(:,1,1)} \\\vspace{2mm}
$\ds{\frac{\p x_1}{\p K}}$ & \texttt{sens.relsens(:,1,2)} \\\vspace{2mm}
$\ds{\frac{\p x_2}{\p C}}$ & \texttt{sens.relsens(:,2,1)} \\\vspace{2mm}
$\ds{\frac{\p x_2}{\p K}}$ & \texttt{sens.relsens(:,2,2)} \\\vspace{2mm}
\end{tabular}
\end{center}
\end{table}
Read the \texttt{tssolve.m} documentation for more info. Computes lots of other things if you want.
\end{frame}

\begin{frame}{Subset Selection}
\begin{itemize}\item Subset selection allows you to determine a set of parameters that are most identifiable. \item Many times it is preferable to optimize over a smaller set of parameters rather than a larger set. 
\item The ones that are deemed less identifiable can be fixed to some nominal value.
\item Use a QR decomposition with column pivoting applied to a sensitivity matrix.
\end{itemize}
\end{frame}
\begin{frame}{Subset Selection}
We'll define the sensitivity matrix to be all of the sensitivities of the model output. On the spring model, if we only observe velocity ($N$ observations) then
$$
S=\left[\begin{array}{cc}\frac{\p x_1}{\p C} & \frac{\p x_1}{\p K}\end{array}\right]_{N \times 2}.
$$
Then the parameters $C,K$ are identifiable iff rank$(S)=m$, the number of unknown parameters. Equivalently, 
$$
\det(S^\top S) \ne 0.
$$
So the ``more linearly independent" columns of $S^\top S$ are more identifiable. How do determine which is more linearly independent?
\end{frame}

\begin{frame}[fragile]{Subset Selection}
We use a QR factorization with column pivoting. We factor $S^\top S$ as
$$
S^\top S \Pi = QR,
$$
where $\Pi$ is a permutation matrix. QR with pivoting puts the most linearly independent columns of $S^\top S$ to the left. 
\begin{itemize}
\item The permutation matrix encodes which parameters were moved.
\item You can essentially read it off from the matrix which parameters are most identifiable.
\item In Matlab: \verb+[Q,R,P]=qr(A)+
\end{itemize}
\end{frame}

\begin{frame}{Subset Selection}
Suppose you're analyzing three parameters, and after doing QR with pivoting you obtain a permutation matrix that looks like
$$
\Pi = \left[\begin{array}{ccc}0 & 1 & 0 \\0 & 0 & 1 \\1 & 0 & 0\end{array}\right].
$$
This says, essentially, the third parameter is the most identifiable, followed by the first, and lastly the second. For a more quantitative take on things, take a look at the eigenvalues of $S^\top S$. 
\end{frame}
\end{document}