# Max Dama
""" 
Position Sizing Module 

Example:

# Load some data from yahoo
import matplotlib.finance as fin
import datetime as dt
start = dt.datetime(2006,1,1)
end = dt.datetime(2007,1,1)
d = fin.quotes_historical_yahoo('SPY', start, end)
import numpy as np
close = np.array([bar[4] for bar in d])
close = close[range(0,len(close),5)] # make weekly
returns = np.diff(close)/close[:-1]

# Empirical Kelly
kelly(returns)
# Continuous/Gaussian Analytic Kelly
np.mean(returns)/np.var(returns)

# Why is the empirically optimal leverage less than the analytic?
import pylab as pl
pl.hist(returns)
pl.show()
# Good: heavy left tail caused empirical Kelly to be less than continuous/Gaussian Kelly

"""

import numpy as np
import scipy.optimize

def kelly(hist_returns, binned_optimization=False, num_bins=100, stop_loss=-np.inf):
	"""Compute the optimal multiplier to leverage historical returns
	
	Parameters
	----------
	hist_returns : ndarray
		arithmetic 1-pd returns
	binned_optimization : boolean
		see empirical distribution. improves runtime
	num_bins : int
		see empirical distribution. fewer bins improves runtime
	stop_loss : double
		experimental. simulate the effect of a stop loss at stop_loss percent return

	Returns
	-------
	f : float
		the optimal leverage factor."""
	
	if stop_loss > -np.inf:
		stopped_out = hist_returns < stop_loss
		hist_returns[stopped_out] = stop_loss
	
	probabilities, returns = empirical_distribution(hist_returns, binned_optimization, num_bins)
	
	expected_log_return = lambda(f): expectation(probabilities, np.log(1+f*returns))
	objective = lambda(f): -expected_log_return(f)
	derivative = lambda(f): -expectation(probabilities, returns/(1.+f*returns))
	return scipy.optimize.fmin_cg(f=objective, x0=1.0, fprime=derivative, disp=1, full_output=1, maxiter=5000, callback=mycall)

def empirical_distribution(hist_returns, binned_optimization=True, num_bins=100):
	"""Aggregate observations and generate an empirical probability distribution
	
	Parameters
	----------
	hist_returns : ndarray
		observations, assumed uniform probability ie point masses
	binned_optimization : boolean
		whether to aggregate point masses to speed computations using the distribution
	num_bins : int
		number of bins for histogram. fewer bins improves runtime but hides granular details
	
	Returns
	-------
	probabilites : ndarray
		probabilities of respective events
	returns : ndarray
		events/aggregated observations."""
	if binned_optimization:
		frequencies, return_bins = np.histogram(hist_returns, bins=num_bins) 
		probabilities = np.double(frequencies)/len(hist_returns)
		returns = (return_bins[:-1]+return_bins[1:])/2
	else:
		# uniform point masses at each return observation
		probabilities = np.double(np.ones_like(hist_returns))/len(hist_returns)
		returns = hist_returns
	return probabilities, returns

def expectation(probabilities, returns):
	"""Compute the expected value of a discrete set of events given their probabilities."""
	return sum(probabilities * returns) 

def mycall(xk):
	print xk


