Neuroevolution
%pylab inline
Populating the interactive namespace from numpy and matplotlib
def objective(x):
return sqrt(x[0]**2+x[1]**2)
def plot_objective():
x, y = meshgrid(arange(-1.6, 1.6, 0.1), arange(-1.6, 1.6, 0.1))
z = [[objective([x[i,j], y[i,j]]) for i in range(x.shape[0])] for j in range(x.shape[1])]
contourf(x, y, z, cmap=cm.Blues)
setp(gca(), xticks=(), yticks=(), xlim=(-1.5, 1.5), ylim=(-1.5, 1.5))
figure(figsize=(5, 5))
plot_objective()
def equiprobable_ellipse(C, factor=1.0):
"""Source: http://stackoverflow.com/questions/12301071/multidimensional-confidence-intervals"""
def eigsorted(C):
vals, vecs = linalg.eigh(C)
order = vals.argsort()[::-1]
return vals[order], vecs[:,order]
vals, vecs = eigsorted(C)
angle = arctan2(*vecs[:,0][::-1])
width, height = 2 * factor * np.sqrt(vals)
return angle, width, height
def plot_ellipse(C, mean=zeros(2)):
from matplotlib.patches import Ellipse
angle, width, height = equiprobable_ellipse(C)
e = Ellipse(xy=mean, width=width, height=height,
angle=degrees(angle), ec="orange",
fc="none", lw=3, ls="dashed")
gca().add_artist(e)
__import__("sys").path.append("05_neuroevolution")
from barecmaes2 import Cmaes
opt = Cmaes(xstart=[1.0, 1.0], sigma=0.1, ftarget=1e-10, popsize=50)
def plot_generations(n_generations):
figure(figsize=(n_generations*4, 4))
for it in range(n_generations):
X = asarray(opt.ask())
F = [objective(x) for x in X]
subplot(1, n_generations, it+1)
plot_objective()
plot_ellipse(C=opt.sigma*asarray(opt.C), mean=opt.xmean)
weights = zeros_like(F)
weights[argsort(F)[:len(opt.weights)]] = opt.weights
scatter(X[:, 0], X[:, 1], c=weights, cmap=cm.gray)
opt.tell(X, F)
plot_generations(n_generations=3)
plot_generations(n_generations=3)
plot_generations(n_generations=3)
from IPython.display import YouTubeVideo
YouTubeVideo("5y3UTEgFH2Q")
print YouTubeVideo("5y3UTEgFH2Q").src
http://www.youtube.com/embed/5y3UTEgFH2Q
YouTubeVideo("AxeeHif0euY")
print YouTubeVideo("AxeeHif0euY").src
http://www.youtube.com/embed/AxeeHif0euY
YouTubeVideo("3JiNC6vw8zE")
print YouTubeVideo("3JiNC6vw8zE").src
http://www.youtube.com/embed/3JiNC6vw8zE
This problem also has been solved with visual input.
Markov property:
\[P(s_{t+1}|a_t,s_t,\ldots,a_0,s_0) = P(s_{t+1}|a_t,s_t)\]
But we cannot observe \(s_t\) completely!
Can you give examples for partially observable Markov decision processes?
How can we deal with partial observability?
class ABF(object):
"""Alpha-beta filter."""
def __init__(self, g, delta):
r = (4+g-sqrt(8*g+g**2))/4
self.a, self.b = 1-r**2, 2*(1-r)**2
self.delta = delta
self.s, self.sd = None, None
def step(self, s):
if self.s == None: self.s, self.sd = s, zeros_like(s)
se = self.s + self.delta * self.sd
sde = self.sd
r = s - se
self.s = se + self.a * r
self.sd = sde + self.b/self.delta * r
return self.s, self.sd
random.seed(0)
steps = 100
X = linspace(0, numpy.pi*4, steps)
S = sin(X)
Sn = S + random.randn(*S.shape)*0.2
figure(figsize=(12, 5))
plot(Sn, label="Observation")
plot(S, label="Signal")
r = legend()
abf = ABF(g=0.05, delta=1.0)
SE = []
SDE = []
for s in range(steps):
se, sd = abf.step(Sn[s])
SE.append(se)
SDE.append(sd)
figure(figsize=(12, 5))
plot(Sn, label="Observation")
plot(S, label="Signal")
plot(SE, label="Estimation")
r = legend()
General function approximators
Nonlinear dynamical systems
Via points and splines