%pylab inline
from IPython.display import display, HTML, Image
import sys
sys.path.append("02_meta_learning")
Populating the interactive namespace from numpy and matplotlib
Data Flow
| | Harry Potter | Avatar | LOTR | Gladiator | Titanic | Glitter | | :---- | ------------:| ------:| ----:| ---------:| -------:| -------:| | Alice | ? | 5 | 3 | ? | 5 | ? | | Bob | 4 | 5 | 5 | 4 | ? | ? | | Carol | 3 | ? | ? | 2 | 5 | 3 | | David | 3 | ? | 4 | 5 | 1 | 1 | | Eric | 4 | ? | 2 | ? | ? | 3 | | Fred | 1 | 1 | 5 | ? | ? | 1 |
How can we win the competition? -- Ensemble methods
"The lesson here is that having lots of models is useful for the incremental results needed to win competitions, but practically, excellent systems can be built with just a few well-selected models."
We vary the training set and (hypothetically) observe the true error.
with xkcd():
figure(figsize=(6, 6))
def plot_target():
t = linspace(0, 2*pi, 100); plot(cos(t), sin(t)); plot(0.67*cos(t), 0.67*sin(t))
plot(0.33*cos(t), 0.33*sin(t)); scatter(0, 0, color="black")
setp(subplot(2, 2, 1), xticks=(), yticks=()); plot_target()
title("Low Variance"); ylabel("Low Bias")
scatter((random.rand(10)-0.5)*0.2, (random.rand(10)-0.5)*0.2)
setp(subplot(2, 2, 2), xticks=(), yticks=()); plot_target()
title("High Variance")
scatter((random.rand(10)-0.5)*0.8, (random.rand(10)-0.5)*0.8)
setp(subplot(2, 2, 3), xticks=(), yticks=()); plot_target()
ylabel("High Bias")
scatter((random.rand(10)-0.5)*0.2+0.5, (random.rand(10)-0.5)*0.2+0.5)
setp(subplot(2, 2, 4), xticks=(), yticks=()); plot_target()
scatter((random.rand(10)-0.5)*0.8+0.5, (random.rand(10)-0.5)*0.8+0.5)
with xkcd():
setp(gca(), xticks=(), yticks=(), xlabel="Complexity", ylabel="Error")
resolution = 100
complexity = linspace(0, 4, resolution)
noise_error = ones(resolution) * 0.1
bias_error = exp(-complexity)
variance_error = exp(complexity - np.max(complexity))
plot(complexity, bias_error, label="Bias")
plot(complexity, variance_error, label="Variance")
plot(complexity, noise_error, label="Noise")
plot(complexity, bias_error + variance_error + noise_error, label="Total")
legend(loc="best")
Example for Bias
Example for Variance
To run this example you have to install the library OpenANN.
from openann import *
class NeuralNetwork(object):
"""Wrapper around OpenANN library."""
def __init__(self, n_nodes):
self.n_nodes = n_nodes
def fit(self, X, y):
Y = y[:, newaxis]
self.net = Net().input_layer(X.shape[1]) \
.fully_connected_layer(self.n_nodes, Activation.TANH) \
.output_layer(1, Activation.LINEAR)
dataset = DataSet(X, Y)
optimizer = LMA({"maximal_iterations" : 50})
optimizer.optimize(self.net, dataset)
def predict(self, X):
return self.net.predict(X)[:, 0]
In this example we will average predictions are over all base learners!
class Bagging(object):
def __init__(self, models, bag_size):
assert bag_size > 0.0 and bag_size < 1.0
self.models = models
self.bag_size = bag_size
def fit(self, X, y):
N = X.shape[0]
for model in self.models:
bag_indices = random.randint(0, N, int(N*self.bag_size))
model.fit(X[bag_indices], y[bag_indices])
def predict(self, X):
return mean([m.predict(X) for m in self.models], axis=0)
Sine function with normally distributed noise.
random.seed(0)
N = 100
X = linspace(0, 2*pi, N)[:, newaxis]
y = array(sin(X[:, 0]) + random.randn(N) * 0.3)
plot(X, y, "o")
r = xlim(0, 2*pi)
def eval_bagging(X, y, n_models=50, bag_size=0.2, n_nodes=10):
models = [NeuralNetwork(n_nodes) for _ in xrange(n_models)]
bagging = Bagging(models, bag_size)
bagging.fit(X, y)
h = bagging.predict(X)
p = [m.predict(X) for m in models]
p_err = [abs(pn-y) for pn in p]
h_err = abs(h-y)
return p, h, p_err, h_err
random.seed(0)
RandomNumberGenerator().seed(0)
p, h, p_err, h_err = eval_bagging(X, y)
figure(figsize=(10, 5))
# Plot dataset and model(s)
setp(subplot(1, 2, 1), xlabel="x", ylabel="y", xlim=(0, 2*pi), ylim=(-3, 3))
plot(X, y, "o")
for pn in p: plot(X, pn, "r-")
plot(X, h, "-", linewidth=5)
# Plot errors
setp(subplot(1, 2, 2), xlabel="x", ylabel="Error", xlim=(0, 2*pi))
plot(X, h_err, "g", label="Bagging Error")
plot(X, mean(p_err, axis=0), "b", label="Average Error of Base Learners")
l = legend(loc="best")
(There is another interesting type of boosting: Human Boosting)
n_samples = 500
random.seed(0)
X = random.randn(n_samples, 2)
y = array([linalg.norm(x) > 1.0 for x in X], dtype=float64)
T = y[:, newaxis]
figure(figsize=(5, 5))
r = scatter(X[:, 0], X[:, 1], c=y)
To run this example OpenANN is required.
n_models = 5
from openann import *
from util import plot_classifier
# Train ensemble
RandomNumberGenerator().seed(0)
adaboost = AdaBoost()
nets = [Net().input_layer(2)
.fully_connected_layer(2, Activation.LOGISTIC)
.output_layer(1, Activation.LOGISTIC)
for _ in xrange(n_models)]
for net in nets: adaboost.add_learner(net)
opt = LMA(stop={"maximal_iterations" : 10})
adaboost.set_optimizer(opt)
dataset = DataSet(X, T)
adaboost.train(dataset)
weights = adaboost.get_weights()
figure(figsize=(9, 6))
n_rows, n_cols = (2, 3)
for m in xrange(n_models):
subplot(n_rows, n_cols, 1+m)
plot_classifier(X, y, nets[m], "Net #%d, weight: %.2f" % (m+1, weights[m]), threshold=0.5)
subplot(n_rows, n_cols, n_models+1)
plot_classifier(X, y, adaboost, "AdaBoost", threshold=0.5)